From bf510679bb3f9bfd6019666065016bb26a5b5466 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 6 Oct 2020 10:32:22 +0200 Subject: [PATCH] divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282] As written in the comment, tree-ssa-math-opts.c wouldn't create a DIVMOD ifn call for division + modulo by constant for the fear that during expansion we could generate better code for those cases. If the divisoris a power of two, that is certainly the case always, but otherwise expand_divmod can punt in many cases, e.g. if the division type's precision is above HOST_BITS_PER_WIDE_INT, we don't even call choose_multiplier, because it works on HOST_WIDE_INTs (true, something we should fix eventually now that we have wide_ints), or if pre/post shift is larger than BITS_PER_WORD. So, the following patch recognizes DIVMOD with constant last argument even when it is unclear if expand_divmod will be able to optimize it, and then during DIVMOD expansion if the divisor is constant attempts to expand it as division + modulo and if they actually don't contain any libcalls or division/modulo, they are kept as is, otherwise that sequence is thrown away and divmod optab or libcall is used. 2020-10-06 Jakub Jelinek PR rtl-optimization/97282 * tree-ssa-math-opts.c (divmod_candidate_p): Don't return false for constant op2 if it is not a power of two and the type has precision larger than HOST_BITS_PER_WIDE_INT or BITS_PER_WORD. * internal-fn.c (contains_call_div_mod): New function. (expand_DIVMOD): If last argument is a constant, try to expand it as TRUNC_DIV_EXPR followed by TRUNC_MOD_EXPR, but if the sequence contains any calls or {,U}{DIV,MOD} rtxes, throw it away and use divmod optab or divmod libfunc. * gcc.target/i386/pr97282.c: New test. --- gcc/internal-fn.c | 67 +++++++++++++++++++++++-- gcc/testsuite/gcc.target/i386/pr97282.c | 25 +++++++++ gcc/tree-ssa-math-opts.c | 17 ++++++- 3 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr97282.c diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index c8970820026..92cb3cd845a 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-phinodes.h" #include "ssa-iterators.h" #include "explow.h" +#include "rtl-iter.h" /* The names of each internal function, indexed by function number. */ const char *const internal_fn_name_array[] = { @@ -2985,6 +2986,32 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab) emit_move_insn (lhs_rtx, ops[0].value); } +/* Helper for expand_DIVMOD. Return true if the sequence starting with + INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes. */ + +static bool +contains_call_div_mod (rtx_insn *insn) +{ + subrtx_iterator::array_type array; + for (; insn; insn = NEXT_INSN (insn)) + if (CALL_P (insn)) + return true; + else if (INSN_P (insn)) + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) + switch (GET_CODE (*iter)) + { + case CALL: + case DIV: + case UDIV: + case MOD: + case UMOD: + return true; + default: + break; + } + return false; + } + /* Expand DIVMOD() using: a) optab handler for udivmod/sdivmod if it is available. b) If optab_handler doesn't exist, generate call to @@ -3007,10 +3034,44 @@ expand_DIVMOD (internal_fn, gcall *call_stmt) rtx op1 = expand_normal (arg1); rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); - rtx quotient, remainder, libfunc; + rtx quotient = NULL_RTX, remainder = NULL_RTX; + rtx_insn *insns = NULL; + + if (TREE_CODE (arg1) == INTEGER_CST) + { + /* For DIVMOD by integral constants, there could be efficient code + expanded inline e.g. using shifts and plus/minus. Try to expand + the division and modulo and if it emits any library calls or any + {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or + divmod libcall. */ + struct separate_ops ops; + ops.code = TRUNC_DIV_EXPR; + ops.type = type; + ops.op0 = make_tree (ops.type, op0); + ops.op1 = arg1; + ops.op2 = NULL_TREE; + ops.location = gimple_location (call_stmt); + start_sequence (); + quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL); + if (contains_call_div_mod (get_insns ())) + quotient = NULL_RTX; + else + { + ops.code = TRUNC_MOD_EXPR; + remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL); + if (contains_call_div_mod (get_insns ())) + remainder = NULL_RTX; + } + if (remainder) + insns = get_insns (); + end_sequence (); + } + + if (remainder) + emit_insn (insns); /* Check if optab_handler exists for divmod_optab for given mode. */ - if (optab_handler (tab, mode) != CODE_FOR_nothing) + else if (optab_handler (tab, mode) != CODE_FOR_nothing) { quotient = gen_reg_rtx (mode); remainder = gen_reg_rtx (mode); @@ -3018,7 +3079,7 @@ expand_DIVMOD (internal_fn, gcall *call_stmt) } /* Generate call to divmod libfunc if it exists. */ - else if ((libfunc = optab_libfunc (tab, mode)) != NULL_RTX) + else if (rtx libfunc = optab_libfunc (tab, mode)) targetm.expand_divmod_libfunc (libfunc, mode, op0, op1, "ient, &remainder); diff --git a/gcc/testsuite/gcc.target/i386/pr97282.c b/gcc/testsuite/gcc.target/i386/pr97282.c new file mode 100644 index 00000000000..6fb10c8bb82 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr97282.c @@ -0,0 +1,25 @@ +/* PR rtl-optimization/97282 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler "call\[^\n\r]*__udivmod\[dt]i4" } } */ + +#ifdef __SIZEOF_INT128__ +typedef __uint128_t T; +#else +typedef unsigned long long T; +#endif + +unsigned long +foo (T x) +{ + if (x == 0) + return 0; + + unsigned long ret = 0; + while (x > 0) + { + ret = ret + x % 10; + x = x / 10; + } + return ret; +} diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index bdbb9d965f0..4927255d456 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -3567,9 +3567,24 @@ divmod_candidate_p (gassign *stmt) /* Disable the transform if either is a constant, since division-by-constant may have specialized expansion. */ - if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2)) + if (CONSTANT_CLASS_P (op1)) return false; + if (CONSTANT_CLASS_P (op2)) + { + if (integer_pow2p (op2)) + return false; + + if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT + && TYPE_PRECISION (type) <= BITS_PER_WORD) + return false; + + /* If the divisor is not power of 2 and the precision wider than + HWI, expand_divmod punts on that, so in that case it is better + to use divmod optab or libfunc. Similarly if choose_multiplier + might need pre/post shifts of BITS_PER_WORD or more. */ + } + /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should expand using the [su]divv optabs. */ if (TYPE_OVERFLOW_TRAPS (type)) -- 2.30.2