From 5877771853e65df4ebc5213c5dd098d2e49bf95a Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Tue, 8 Jun 2004 04:30:48 +0000 Subject: [PATCH] expmed.c (add_cost, [...]): Make arrays indexed by machine mode. * expmed.c (add_cost, neg_cost, sdiv_pow2_cheap, smod_pow2_cheap): Make arrays indexed by machine mode. Rename negate_cost to neg_cost. (init_expmed): Initialize these cost arrays as appropriate. (store_bit_field, extract_bit_field): Correct whitespace. (synth_mult, choose_mult_variant, expand_mult, expand_mult_highpart, expand_mult_highpart_optab, expand_divmod): Update uses of add_cost, neg_cost, sdiv_pow2_cheap, smod_pow2_cheap to index with mode, word_mode or compute_mode as appropriate. From-SVN: r82743 --- gcc/ChangeLog | 11 ++++ gcc/expmed.c | 174 +++++++++++++++++++++++++++----------------------- 2 files changed, 106 insertions(+), 79 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0be308243f8..355a54b2f2e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2004-06-07 Roger Sayle + + * expmed.c (add_cost, neg_cost, sdiv_pow2_cheap, smod_pow2_cheap): + Make arrays indexed by machine mode. Rename negate_cost to neg_cost. + (init_expmed): Initialize these cost arrays as appropriate. + (store_bit_field, extract_bit_field): Correct whitespace. + (synth_mult, choose_mult_variant, expand_mult, expand_mult_highpart, + expand_mult_highpart_optab, expand_divmod): Update uses of add_cost, + neg_cost, sdiv_pow2_cheap, smod_pow2_cheap to index with mode, + word_mode or compute_mode as appropriate. + 2004-06-07 Eric Botcazou PR target/15783 diff --git a/gcc/expmed.c b/gcc/expmed.c index 9679b71bd71..b6012721479 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -57,7 +57,8 @@ static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx); Usually, this will mean that the MD file will emit non-branch sequences. */ -static int sdiv_pow2_cheap, smod_pow2_cheap; +static int sdiv_pow2_cheap[NUM_MACHINE_MODES]; +static int smod_pow2_cheap[NUM_MACHINE_MODES]; #ifndef SLOW_UNALIGNED_ACCESS #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT @@ -90,7 +91,9 @@ static int sdiv_pow2_cheap, smod_pow2_cheap; /* Cost of various pieces of RTL. Note that some of these are indexed by shift count and some by mode. */ -static int add_cost, negate_cost, zero_cost; +static int zero_cost; +static int add_cost[NUM_MACHINE_MODES]; +static int neg_cost[NUM_MACHINE_MODES]; static int shift_cost[MAX_BITS_PER_WORD]; static int shiftadd_cost[MAX_BITS_PER_WORD]; static int shiftsub_cost[MAX_BITS_PER_WORD]; @@ -114,7 +117,6 @@ init_expmed (void) reg = gen_rtx_REG (word_mode, 10000); zero_cost = rtx_cost (const0_rtx, 0); - add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET); shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg, gen_rtx_ASHIFT (word_mode, reg, @@ -136,52 +138,33 @@ init_expmed (void) init_recog (); - shift_cost[0] = 0; - shiftadd_cost[0] = shiftsub_cost[0] = add_cost; - - for (m = 1; m < MAX_BITS_PER_WORD; m++) - { - rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m); - shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000; - - XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m); - if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0) - shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET); - - XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int; - if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0) - shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET); - - XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int; - if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0) - shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET); - } - - negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET); - - sdiv_pow2_cheap - = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET) - <= 2 * add_cost); - smod_pow2_cheap - = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET) - <= 2 * add_cost); for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) { reg = gen_rtx_REG (mode, 10000); - div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET); - mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET); + add_cost[mode] = rtx_cost (gen_rtx_PLUS (mode, reg, reg), SET); + neg_cost[mode] = rtx_cost (gen_rtx_NEG (mode, reg), SET); + div_cost[mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET); + mul_cost[mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET); + + sdiv_pow2_cheap[mode] + = (rtx_cost (gen_rtx_DIV (mode, reg, GEN_INT (32)), SET) + <= 2 * add_cost[mode]); + smod_pow2_cheap[mode] + = (rtx_cost (gen_rtx_MOD (mode, reg, GEN_INT (32)), SET) + <= 2 * add_cost[mode]); + wider_mode = GET_MODE_WIDER_MODE (mode); if (wider_mode != VOIDmode) { - mul_widen_cost[(int) wider_mode] + mul_widen_cost[wider_mode] = rtx_cost (gen_rtx_MULT (wider_mode, gen_rtx_ZERO_EXTEND (wider_mode, reg), gen_rtx_ZERO_EXTEND (wider_mode, reg)), SET); - mul_highpart_cost[(int) mode] + mul_highpart_cost[mode] = rtx_cost (gen_rtx_TRUNCATE (mode, gen_rtx_LSHIFTRT (wider_mode, @@ -195,6 +178,27 @@ init_expmed (void) } } + shift_cost[0] = 0; + shiftadd_cost[0] = shiftsub_cost[0] = add_cost[word_mode]; + + for (m = 1; m < MAX_BITS_PER_WORD; m++) + { + rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m); + shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000; + + XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m); + if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0) + shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET); + + XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int; + if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0) + shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET); + + XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int; + if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0) + shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET); + } + end_sequence (); } @@ -317,7 +321,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, available. */ if (VECTOR_MODE_P (GET_MODE (op0)) && GET_CODE (op0) != MEM - && (vec_set_optab->handlers[(int)GET_MODE (op0)].insn_code + && (vec_set_optab->handlers[GET_MODE (op0)].insn_code != CODE_FOR_nothing) && fieldmode == GET_MODE_INNER (GET_MODE (op0)) && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) @@ -325,7 +329,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, { enum machine_mode outermode = GET_MODE (op0); enum machine_mode innermode = GET_MODE_INNER (outermode); - int icode = (int) vec_set_optab->handlers[(int) outermode].insn_code; + int icode = (int) vec_set_optab->handlers[outermode].insn_code; int pos = bitnum / GET_MODE_BITSIZE (innermode); rtx rtxpos = GEN_INT (pos); rtx src = value; @@ -451,10 +455,10 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, if (GET_CODE (op0) != MEM && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0) && bitsize == GET_MODE_BITSIZE (fieldmode) - && (movstrict_optab->handlers[(int) fieldmode].insn_code + && (movstrict_optab->handlers[fieldmode].insn_code != CODE_FOR_nothing)) { - int icode = movstrict_optab->handlers[(int) fieldmode].insn_code; + int icode = movstrict_optab->handlers[fieldmode].insn_code; /* Get appropriate low part of the value being stored. */ if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG) @@ -1086,14 +1090,14 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, available. */ if (VECTOR_MODE_P (GET_MODE (op0)) && GET_CODE (op0) != MEM - && (vec_extract_optab->handlers[(int)GET_MODE (op0)].insn_code + && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code != CODE_FOR_nothing) && ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) == bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) { enum machine_mode outermode = GET_MODE (op0); enum machine_mode innermode = GET_MODE_INNER (outermode); - int icode = (int) vec_extract_optab->handlers[(int) outermode].insn_code; + int icode = (int) vec_extract_optab->handlers[outermode].insn_code; int pos = bitnum / GET_MODE_BITSIZE (innermode); rtx rtxpos = GEN_INT (pos); rtx src = op0; @@ -2255,7 +2259,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ - cost = add_cost; + cost = add_cost[word_mode]; synth_mult (alg_in, t + 1, cost_limit - cost); cost += alg_in->cost; @@ -2272,7 +2276,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, { /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ - cost = add_cost; + cost = add_cost[word_mode]; synth_mult (alg_in, t - 1, cost_limit - cost); cost += alg_in->cost; @@ -2304,7 +2308,9 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; if (t % d == 0 && t > d && m < BITS_PER_WORD) { - cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]); + cost = add_cost[word_mode] + shift_cost[m]; + if (shiftadd_cost[m] < cost) + cost = shiftadd_cost[m]; synth_mult (alg_in, t / d, cost_limit - cost); cost += alg_in->cost; @@ -2323,7 +2329,9 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; if (t % d == 0 && t > d && m < BITS_PER_WORD) { - cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]); + cost = add_cost[word_mode] + shift_cost[m]; + if (shiftsub_cost[m] < cost) + cost = shiftsub_cost[m]; synth_mult (alg_in, t / d, cost_limit - cost); cost += alg_in->cost; @@ -2427,15 +2435,17 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, `unsigned int' */ if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) { - synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost); - alg2.cost += negate_cost; + synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) + - neg_cost[mode]); + alg2.cost += neg_cost[mode]; if (alg2.cost < alg->cost) *alg = alg2, *variant = negate_variant; } /* This proves very useful for division-by-constant. */ - synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost); - alg2.cost += add_cost; + synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) + - add_cost[mode]); + alg2.cost += add_cost[mode]; if (alg2.cost < alg->cost) *alg = alg2, *variant = add_variant; @@ -2633,7 +2643,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, && (unsignedp || !flag_trapv)) { int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET); - mult_cost = MIN (12 * add_cost, mult_cost); + mult_cost = MIN (12 * add_cost[mode], mult_cost); if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant, mult_cost)) @@ -2887,7 +2897,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Firstly, try using a multiplication insn that only generates the needed high part of the product, and in the sign flavor of unsignedp. */ - if (mul_highpart_cost[(int) mode] < max_cost) + if (mul_highpart_cost[mode] < max_cost) { moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; tem = expand_binop (mode, moptab, op0, narrow_op1, target, @@ -2899,8 +2909,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Secondly, same as above, but use sign flavor opposite of unsignedp. Need to adjust the result after the multiplication. */ if (size - 1 < BITS_PER_WORD - && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost - < max_cost)) + && (mul_highpart_cost[mode] + 2 * shift_cost[size-1] + + 4 * add_cost[mode] < max_cost)) { moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; tem = expand_binop (mode, moptab, op0, narrow_op1, target, @@ -2913,8 +2923,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Try widening multiplication. */ moptab = unsignedp ? umul_widen_optab : smul_widen_optab; - if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing - && mul_widen_cost[(int) wider_mode] < max_cost) + if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing + && mul_widen_cost[wider_mode] < max_cost) { tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, unsignedp, OPTAB_WIDEN); @@ -2924,9 +2934,9 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Try widening the mode and perform a non-widening multiplication. */ moptab = smul_optab; - if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing + if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost) + && mul_cost[wider_mode] + shift_cost[size-1] < max_cost) { tem = expand_binop (wider_mode, moptab, op0, op1, 0, unsignedp, OPTAB_WIDEN); @@ -2936,10 +2946,10 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, /* Try widening multiplication of opposite signedness, and adjust. */ moptab = unsignedp ? smul_widen_optab : umul_widen_optab; - if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing + if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing && size - 1 < BITS_PER_WORD - && (mul_widen_cost[(int) wider_mode] - + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)) + && (mul_widen_cost[wider_mode] + 2 * shift_cost[size-1] + + 4 * add_cost[mode] < max_cost)) { tem = expand_binop (wider_mode, moptab, op0, narrow_op1, NULL_RTX, ! unsignedp, OPTAB_WIDEN); @@ -2998,7 +3008,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) { sign_adjust = true; - extra_cost += add_cost; + extra_cost += add_cost[mode]; } /* See whether shift/add multiplication is cheap enough. */ @@ -3179,15 +3189,15 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, for (compute_mode = mode; compute_mode != VOIDmode; compute_mode = GET_MODE_WIDER_MODE (compute_mode)) - if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing - || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing) + if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing + || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing) break; if (compute_mode == VOIDmode) for (compute_mode = mode; compute_mode != VOIDmode; compute_mode = GET_MODE_WIDER_MODE (compute_mode)) - if (optab1->handlers[(int) compute_mode].libfunc - || optab2->handlers[(int) compute_mode].libfunc) + if (optab1->handlers[compute_mode].libfunc + || optab2->handlers[compute_mode].libfunc) break; /* If we still couldn't find a mode, use MODE, but we'll probably abort @@ -3211,10 +3221,11 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, /* Only deduct something for a REM if the last divide done was for a different constant. Then set the constant of the last divide. */ - max_cost = div_cost[(int) compute_mode] + max_cost = div_cost[compute_mode] - (rem_flag && ! (last_div_const != 0 && op1_is_constant && INTVAL (op1) == last_div_const) - ? mul_cost[(int) compute_mode] + add_cost : 0); + ? mul_cost[compute_mode] + add_cost[compute_mode] + : 0); last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; @@ -3332,7 +3343,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, goto fail1; extra_cost = (shift_cost[post_shift - 1] - + shift_cost[1] + 2 * add_cost); + + shift_cost[1] + + 2 * add_cost[compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -3415,15 +3427,16 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, goto fail1; } else if (EXACT_POWER_OF_2_OR_ZERO_P (d) - && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap) + && (rem_flag ? smod_pow2_cheap[compute_mode] + : sdiv_pow2_cheap[compute_mode]) /* ??? The cheap metric is computed only for word_mode. If this operation is wider, this may not be so. Assume true if the optab has an expander for this mode. */ && (((rem_flag ? smod_optab : sdiv_optab) - ->handlers[(int) compute_mode].insn_code + ->handlers[compute_mode].insn_code != CODE_FOR_nothing) - || (sdivmod_optab->handlers[(int) compute_mode] + || (sdivmod_optab->handlers[compute_mode] .insn_code != CODE_FOR_nothing))) ; else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) @@ -3497,7 +3510,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, goto fail1; extra_cost = (shift_cost[post_shift] - + shift_cost[size - 1] + add_cost); + + shift_cost[size - 1] + + add_cost[compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, ml, NULL_RTX, 0, max_cost - extra_cost); @@ -3528,7 +3542,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); extra_cost = (shift_cost[post_shift] - + shift_cost[size - 1] + 2 * add_cost); + + shift_cost[size - 1] + + 2 * add_cost[compute_mode]); t1 = expand_mult_highpart (compute_mode, op0, ml, NULL_RTX, 0, max_cost - extra_cost); @@ -3618,7 +3633,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, t2 = expand_binop (compute_mode, xor_optab, op0, t1, NULL_RTX, 0, OPTAB_WIDEN); extra_cost = (shift_cost[post_shift] - + shift_cost[size - 1] + 2 * add_cost); + + shift_cost[size - 1] + + 2 * add_cost[compute_mode]); t3 = expand_mult_highpart (compute_mode, t2, ml, NULL_RTX, 1, max_cost - extra_cost); @@ -4079,7 +4095,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, = sign_expand_binop (compute_mode, umod_optab, smod_optab, op0, op1, target, unsignedp, - ((optab2->handlers[(int) compute_mode].insn_code + ((optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing) ? OPTAB_DIRECT : OPTAB_WIDEN)); if (remainder == 0) @@ -4107,7 +4123,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, op0, op1, rem_flag ? NULL_RTX : target, unsignedp, - ((optab2->handlers[(int) compute_mode].insn_code + ((optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing) ? OPTAB_DIRECT : OPTAB_WIDEN)); @@ -4751,9 +4767,9 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, that is compensated by the subsequent overflow when subtracting one / negating. */ - if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing) + if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing) tem = expand_unop (mode, abs_optab, op0, subtarget, 1); - else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing) + else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing) tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) { -- 2.30.2