From: Bill Schmidt Date: Mon, 6 Jun 2011 14:27:41 +0000 (+0000) Subject: re PR tree-optimization/46728 (GCC does not generate fmadd for pow (x, 0.75)+y on... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3906ea1b34ab9967430cd2b9c423815dc532acfe;p=gcc.git re PR tree-optimization/46728 (GCC does not generate fmadd for pow (x, 0.75)+y on powerpc) 2011-06-06 Bill Schmidt PR tree-optimization/46728 * builtins.c (powi_table): Remove. (powi_lookup_cost): Remove. (powi_cost): Remove. (expand_powi_1): Remove. (expand_powi): Remove. (expand_builtin_pow_root): Remove. (expand_builtin_pow): Remove. (expand_builtin_powi): Eliminate handling of constant exponent. (expand_builtin): Use expand_builtin_mathfn_2 for BUILT_IN_POW. From-SVN: r174701 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a06eb75fc0f..d52bde645e3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2011-06-06 Bill Schmidt + + PR tree-optimization/46728 + * builtins.c (powi_table): Remove. + (powi_lookup_cost): Remove. + (powi_cost): Remove. + (expand_powi_1): Remove. + (expand_powi): Remove. + (expand_builtin_pow_root): Remove. + (expand_builtin_pow): Remove. + (expand_builtin_powi): Eliminate handling of constant exponent. + (expand_builtin): Use expand_builtin_mathfn_2 for BUILT_IN_POW. + 2011-06-06 Alexandre Oliva * cprop.c (local_cprop_pass): Don't set changed for debug insns. diff --git a/gcc/builtins.c b/gcc/builtins.c index d37558c1526..7b24a0ce703 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -2823,451 +2823,6 @@ expand_builtin_int_roundingfn_2 (tree exp, rtx target) return target; } -/* To evaluate powi(x,n), the floating point value x raised to the - constant integer exponent n, we use a hybrid algorithm that - combines the "window method" with look-up tables. For an - introduction to exponentiation algorithms and "addition chains", - see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, - "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", - 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation - Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ - -/* Provide a default value for POWI_MAX_MULTS, the maximum number of - multiplications to inline before calling the system library's pow - function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, - so this default never requires calling pow, powf or powl. */ - -#ifndef POWI_MAX_MULTS -#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) -#endif - -/* The size of the "optimal power tree" lookup table. All - exponents less than this value are simply looked up in the - powi_table below. This threshold is also used to size the - cache of pseudo registers that hold intermediate results. */ -#define POWI_TABLE_SIZE 256 - -/* The size, in bits of the window, used in the "window method" - exponentiation algorithm. This is equivalent to a radix of - (1<= POWI_TABLE_SIZE) - { - if (val & 1) - { - digit = val & ((1 << POWI_WINDOW_SIZE) - 1); - result += powi_lookup_cost (digit, cache) - + POWI_WINDOW_SIZE + 1; - val >>= POWI_WINDOW_SIZE; - } - else - { - val >>= 1; - result++; - } - } - - return result + powi_lookup_cost (val, cache); -} - -/* Recursive subroutine of expand_powi. This function takes the array, - CACHE, of already calculated exponents and an exponent N and returns - an RTX that corresponds to CACHE[1]**N, as calculated in mode MODE. */ - -static rtx -expand_powi_1 (enum machine_mode mode, unsigned HOST_WIDE_INT n, rtx *cache) -{ - unsigned HOST_WIDE_INT digit; - rtx target, result; - rtx op0, op1; - - if (n < POWI_TABLE_SIZE) - { - if (cache[n]) - return cache[n]; - - target = gen_reg_rtx (mode); - cache[n] = target; - - op0 = expand_powi_1 (mode, n - powi_table[n], cache); - op1 = expand_powi_1 (mode, powi_table[n], cache); - } - else if (n & 1) - { - target = gen_reg_rtx (mode); - digit = n & ((1 << POWI_WINDOW_SIZE) - 1); - op0 = expand_powi_1 (mode, n - digit, cache); - op1 = expand_powi_1 (mode, digit, cache); - } - else - { - target = gen_reg_rtx (mode); - op0 = expand_powi_1 (mode, n >> 1, cache); - op1 = op0; - } - - result = expand_mult (mode, op0, op1, target, 0); - if (result != target) - emit_move_insn (target, result); - return target; -} - -/* Expand the RTL to evaluate powi(x,n) in mode MODE. X is the - floating point operand in mode MODE, and N is the exponent. This - function needs to be kept in sync with powi_cost above. */ - -static rtx -expand_powi (rtx x, enum machine_mode mode, HOST_WIDE_INT n) -{ - rtx cache[POWI_TABLE_SIZE]; - rtx result; - - if (n == 0) - return CONST1_RTX (mode); - - memset (cache, 0, sizeof (cache)); - cache[1] = x; - - result = expand_powi_1 (mode, (n < 0) ? -n : n, cache); - - /* If the original exponent was negative, reciprocate the result. */ - if (n < 0) - result = expand_binop (mode, sdiv_optab, CONST1_RTX (mode), - result, NULL_RTX, 0, OPTAB_LIB_WIDEN); - - return result; -} - -/* Fold a builtin function call to pow, powf, or powl into a series of sqrts or - cbrts. Return NULL_RTX if no simplification can be made or expand the tree - if we can simplify it. */ -static rtx -expand_builtin_pow_root (location_t loc, tree arg0, tree arg1, tree type, - rtx subtarget) -{ - if (TREE_CODE (arg1) == REAL_CST - && !TREE_OVERFLOW (arg1) - && flag_unsafe_math_optimizations) - { - enum machine_mode mode = TYPE_MODE (type); - tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); - tree cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); - REAL_VALUE_TYPE c = TREE_REAL_CST (arg1); - tree op = NULL_TREE; - - if (sqrtfn) - { - /* Optimize pow (x, 0.5) into sqrt. */ - if (REAL_VALUES_EQUAL (c, dconsthalf)) - op = build_call_nofold_loc (loc, sqrtfn, 1, arg0); - - /* Don't do this optimization if we don't have a sqrt insn. */ - else if (optab_handler (sqrt_optab, mode) != CODE_FOR_nothing) - { - REAL_VALUE_TYPE dconst1_4 = dconst1; - REAL_VALUE_TYPE dconst3_4; - SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); - - real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0); - SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2); - - /* Optimize pow (x, 0.25) into sqrt (sqrt (x)). Assume on most - machines that a builtin sqrt instruction is smaller than a - call to pow with 0.25, so do this optimization even if - -Os. */ - if (REAL_VALUES_EQUAL (c, dconst1_4)) - { - op = build_call_nofold_loc (loc, sqrtfn, 1, arg0); - op = build_call_nofold_loc (loc, sqrtfn, 1, op); - } - - /* Optimize pow (x, 0.75) = sqrt (x) * sqrt (sqrt (x)) unless we - are optimizing for space. */ - else if (optimize_insn_for_speed_p () - && !TREE_SIDE_EFFECTS (arg0) - && REAL_VALUES_EQUAL (c, dconst3_4)) - { - tree sqrt1 = build_call_expr_loc (loc, sqrtfn, 1, arg0); - tree sqrt2 = builtin_save_expr (sqrt1); - tree sqrt3 = build_call_expr_loc (loc, sqrtfn, 1, sqrt1); - op = fold_build2_loc (loc, MULT_EXPR, type, sqrt2, sqrt3); - } - } - } - - /* Check whether we can do cbrt insstead of pow (x, 1./3.) and - cbrt/sqrts instead of pow (x, 1./6.). */ - if (cbrtfn && ! op - && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))) - { - /* First try 1/3. */ - REAL_VALUE_TYPE dconst1_3 - = real_value_truncate (mode, dconst_third ()); - - if (REAL_VALUES_EQUAL (c, dconst1_3)) - op = build_call_nofold_loc (loc, cbrtfn, 1, arg0); - - /* Now try 1/6. */ - else if (optimize_insn_for_speed_p () - && optab_handler (sqrt_optab, mode) != CODE_FOR_nothing) - { - REAL_VALUE_TYPE dconst1_6 = dconst1_3; - SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); - - if (REAL_VALUES_EQUAL (c, dconst1_6)) - { - op = build_call_nofold_loc (loc, sqrtfn, 1, arg0); - op = build_call_nofold_loc (loc, cbrtfn, 1, op); - } - } - } - - if (op) - return expand_expr (op, subtarget, mode, EXPAND_NORMAL); - } - - return NULL_RTX; -} - -/* Expand a call to the pow built-in mathematical function. Return NULL_RTX if - a normal call should be emitted rather than expanding the function - in-line. EXP is the expression that is a call to the builtin - function; if convenient, the result should be placed in TARGET. */ - -static rtx -expand_builtin_pow (tree exp, rtx target, rtx subtarget) -{ - tree arg0, arg1; - tree fn, narg0; - tree type = TREE_TYPE (exp); - REAL_VALUE_TYPE cint, c, c2; - HOST_WIDE_INT n; - rtx op, op2; - enum machine_mode mode = TYPE_MODE (type); - - if (! validate_arglist (exp, REAL_TYPE, REAL_TYPE, VOID_TYPE)) - return NULL_RTX; - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - - if (TREE_CODE (arg1) != REAL_CST - || TREE_OVERFLOW (arg1)) - return expand_builtin_mathfn_2 (exp, target, subtarget); - - /* Handle constant exponents. */ - - /* For integer valued exponents we can expand to an optimal multiplication - sequence using expand_powi. */ - c = TREE_REAL_CST (arg1); - n = real_to_integer (&c); - real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0); - if (real_identical (&c, &cint) - && ((n >= -1 && n <= 2) - || (flag_unsafe_math_optimizations - && optimize_insn_for_speed_p () - && powi_cost (n) <= POWI_MAX_MULTS))) - { - op = expand_expr (arg0, subtarget, VOIDmode, EXPAND_NORMAL); - if (n != 1) - { - op = force_reg (mode, op); - op = expand_powi (op, mode, n); - } - return op; - } - - narg0 = builtin_save_expr (arg0); - - /* If the exponent is not integer valued, check if it is half of an integer. - In this case we can expand to sqrt (x) * x**(n/2). */ - fn = mathfn_built_in (type, BUILT_IN_SQRT); - if (fn != NULL_TREE) - { - real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); - n = real_to_integer (&c2); - real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0); - if (real_identical (&c2, &cint) - && ((flag_unsafe_math_optimizations - && optimize_insn_for_speed_p () - && powi_cost (n/2) <= POWI_MAX_MULTS) - /* Even the c == 0.5 case cannot be done unconditionally - when we need to preserve signed zeros, as - pow (-0, 0.5) is +0, while sqrt(-0) is -0. */ - || (!HONOR_SIGNED_ZEROS (mode) && n == 1) - /* For c == 1.5 we can assume that x * sqrt (x) is always - smaller than pow (x, 1.5) if sqrt will not be expanded - as a call. */ - || (n == 3 - && optab_handler (sqrt_optab, mode) != CODE_FOR_nothing))) - { - tree call_expr = build_call_nofold_loc (EXPR_LOCATION (exp), fn, 1, - narg0); - /* Use expand_expr in case the newly built call expression - was folded to a non-call. */ - op = expand_expr (call_expr, subtarget, mode, EXPAND_NORMAL); - if (n != 1) - { - op2 = expand_expr (narg0, subtarget, VOIDmode, EXPAND_NORMAL); - op2 = force_reg (mode, op2); - op2 = expand_powi (op2, mode, abs (n / 2)); - op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX, - 0, OPTAB_LIB_WIDEN); - /* If the original exponent was negative, reciprocate the - result. */ - if (n < 0) - op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode), - op, NULL_RTX, 0, OPTAB_LIB_WIDEN); - } - return op; - } - } - - /* Check whether we can do a series of sqrt or cbrt's instead of the pow - call. */ - op = expand_builtin_pow_root (EXPR_LOCATION (exp), arg0, arg1, type, - subtarget); - if (op) - return op; - - /* Try if the exponent is a third of an integer. In this case - we can expand to x**(n/3) * cbrt(x)**(n%3). As cbrt (x) is - different from pow (x, 1./3.) due to rounding and behavior - with negative x we need to constrain this transformation to - unsafe math and positive x or finite math. */ - fn = mathfn_built_in (type, BUILT_IN_CBRT); - if (fn != NULL_TREE - && flag_unsafe_math_optimizations - && (tree_expr_nonnegative_p (arg0) - || !HONOR_NANS (mode))) - { - REAL_VALUE_TYPE dconst3; - real_from_integer (&dconst3, VOIDmode, 3, 0, 0); - real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); - real_round (&c2, mode, &c2); - n = real_to_integer (&c2); - real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0); - real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); - real_convert (&c2, mode, &c2); - if (real_identical (&c2, &c) - && ((optimize_insn_for_speed_p () - && powi_cost (n/3) <= POWI_MAX_MULTS) - || n == 1)) - { - tree call_expr = build_call_nofold_loc (EXPR_LOCATION (exp), fn, 1, - narg0); - op = expand_builtin (call_expr, NULL_RTX, subtarget, mode, 0); - if (abs (n) % 3 == 2) - op = expand_simple_binop (mode, MULT, op, op, op, - 0, OPTAB_LIB_WIDEN); - if (n != 1) - { - op2 = expand_expr (narg0, subtarget, VOIDmode, EXPAND_NORMAL); - op2 = force_reg (mode, op2); - op2 = expand_powi (op2, mode, abs (n / 3)); - op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX, - 0, OPTAB_LIB_WIDEN); - /* If the original exponent was negative, reciprocate the - result. */ - if (n < 0) - op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode), - op, NULL_RTX, 0, OPTAB_LIB_WIDEN); - } - return op; - } - } - - /* Fall back to optab expansion. */ - return expand_builtin_mathfn_2 (exp, target, subtarget); -} - /* Expand a call to the powi built-in mathematical function. Return NULL_RTX if a normal call should be emitted rather than expanding the function in-line. EXP is the expression that is a call to the builtin @@ -3288,27 +2843,6 @@ expand_builtin_powi (tree exp, rtx target) arg1 = CALL_EXPR_ARG (exp, 1); mode = TYPE_MODE (TREE_TYPE (exp)); - /* Handle constant power. */ - - if (TREE_CODE (arg1) == INTEGER_CST - && !TREE_OVERFLOW (arg1)) - { - HOST_WIDE_INT n = TREE_INT_CST_LOW (arg1); - - /* If the exponent is -1, 0, 1 or 2, then expand_powi is exact. - Otherwise, check the number of multiplications required. */ - if ((TREE_INT_CST_HIGH (arg1) == 0 - || TREE_INT_CST_HIGH (arg1) == -1) - && ((n >= -1 && n <= 2) - || (optimize_insn_for_speed_p () - && powi_cost (n) <= POWI_MAX_MULTS))) - { - op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); - op0 = force_reg (mode, op0); - return expand_powi (op0, mode, n); - } - } - /* Emit a libcall to libgcc. */ /* Mode of the 2nd argument must match that of an int. */ @@ -5862,12 +5396,6 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode, return target; break; - CASE_FLT_FN (BUILT_IN_POW): - target = expand_builtin_pow (exp, target, subtarget); - if (target) - return target; - break; - CASE_FLT_FN (BUILT_IN_POWI): target = expand_builtin_powi (exp, target); if (target) @@ -5885,6 +5413,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode, CASE_FLT_FN (BUILT_IN_FMOD): CASE_FLT_FN (BUILT_IN_REMAINDER): CASE_FLT_FN (BUILT_IN_DREM): + CASE_FLT_FN (BUILT_IN_POW): target = expand_builtin_mathfn_2 (exp, target, subtarget); if (target) return target;