From e72531b9cb3c214731f0e0f64f53033d3efb80ca Mon Sep 17 00:00:00 2001 From: Prathamesh Kulkarni Date: Fri, 28 Oct 2016 19:05:12 +0000 Subject: [PATCH] re PR tree-optimization/43721 (Failure to optimise (a/b) and (a%b) into single __aeabi_idivmod call) 2016-10-28 Prathamesh Kulkarni Kugan Vivekanandarajah Jim Wilson PR tree-optimization/43721 * target.def: New hook expand_divmod_libfunc. * doc/tm.texi.in: Add hook for TARGET_EXPAND_DIVMOD_LIBFUNC * doc/tm.texi: Regenerate. * internal-fn.def: Add new entry for DIVMOD ifn. * internal-fn.c (expand_DIVMOD): New. * tree-ssa-math-opts.c: Include optabs-libfuncs.h, tree-eh.h, targhooks.h. (widen_mul_stats): Add new field divmod_calls_inserted. (target_supports_divmod_p): New. (divmod_candidate_p): Likewise. (convert_to_divmod): Likewise. (pass_optimize_widening_mul::execute): Call calculate_dominance_info(), renumber_gimple_stmt_uids() at beginning of function. Call convert_to_divmod() and record stats for divmod. * config/arm/arm.c (arm_expand_divmod_libfunc): Override hook TARGET_EXPAND_DIVMOD_LIBFUNC. * doc/sourcebuild.texi: Add items for arm_divmod_simode, divmod, divmod_simode. testsuite/ * lib/target-supports.exp (check_effective_target_divmod): New. (check_effective_target_divmod_simode): Likewise. (check_effective_target_arm_divmod_simode): Likewise. * gcc.dg/divmod-1-simode.c: New test. * gcc.dg/divmod-1.c: Likewise. * gcc.dg/divmod-2-simode.c: Likewise. * gcc.dg/divmod-2.c: Likewise. * gcc.dg/divmod-3-simode.c: Likewise. * gcc.dg/divmod-3.c: Likewise. * gcc.dg/divmod-4-simode.c: Likewise. * gcc.dg/divmod-4.c: Likewise. * gcc.dg/divmod-5.c: Likewise. * gcc.dg/divmod-6-simode.c: Likewise. * gcc.dg/divmod-6.c: Likewise. * gcc.dg/divmod-7.c: Likewise. Co-Authored-By: Jim Wilson Co-Authored-By: Kugan Vivekanandarajah From-SVN: r241660 --- gcc/ChangeLog | 25 +++ gcc/config/arm/arm.c | 34 ++++ gcc/doc/sourcebuild.texi | 11 ++ gcc/doc/tm.texi | 5 + gcc/doc/tm.texi.in | 2 + gcc/internal-fn.c | 47 ++++++ gcc/internal-fn.def | 3 + gcc/target.def | 9 + gcc/testsuite/ChangeLog | 21 +++ gcc/testsuite/gcc.dg/divmod-1-simode.c | 25 +++ gcc/testsuite/gcc.dg/divmod-1.c | 32 ++++ gcc/testsuite/gcc.dg/divmod-2-simode.c | 25 +++ gcc/testsuite/gcc.dg/divmod-2.c | 32 ++++ gcc/testsuite/gcc.dg/divmod-3-simode.c | 23 +++ gcc/testsuite/gcc.dg/divmod-3.c | 30 ++++ gcc/testsuite/gcc.dg/divmod-4-simode.c | 23 +++ gcc/testsuite/gcc.dg/divmod-4.c | 30 ++++ gcc/testsuite/gcc.dg/divmod-5.c | 19 +++ gcc/testsuite/gcc.dg/divmod-6-simode.c | 26 +++ gcc/testsuite/gcc.dg/divmod-6.c | 33 ++++ gcc/testsuite/gcc.dg/divmod-7.c | 21 +++ gcc/testsuite/lib/target-supports.exp | 38 +++++ gcc/tree-ssa-math-opts.c | 221 +++++++++++++++++++++++++ 23 files changed, 735 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/divmod-1-simode.c create mode 100644 gcc/testsuite/gcc.dg/divmod-1.c create mode 100644 gcc/testsuite/gcc.dg/divmod-2-simode.c create mode 100644 gcc/testsuite/gcc.dg/divmod-2.c create mode 100644 gcc/testsuite/gcc.dg/divmod-3-simode.c create mode 100644 gcc/testsuite/gcc.dg/divmod-3.c create mode 100644 gcc/testsuite/gcc.dg/divmod-4-simode.c create mode 100644 gcc/testsuite/gcc.dg/divmod-4.c create mode 100644 gcc/testsuite/gcc.dg/divmod-5.c create mode 100644 gcc/testsuite/gcc.dg/divmod-6-simode.c create mode 100644 gcc/testsuite/gcc.dg/divmod-6.c create mode 100644 gcc/testsuite/gcc.dg/divmod-7.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a3b49ecec75..4302280b832 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2016-10-28 Prathamesh Kulkarni + Kugan Vivekanandarajah + Jim Wilson + + PR tree-optimization/43721 + * target.def: New hook expand_divmod_libfunc. + * doc/tm.texi.in: Add hook for TARGET_EXPAND_DIVMOD_LIBFUNC + * doc/tm.texi: Regenerate. + * internal-fn.def: Add new entry for DIVMOD ifn. + * internal-fn.c (expand_DIVMOD): New. + * tree-ssa-math-opts.c: Include optabs-libfuncs.h, tree-eh.h, + targhooks.h. + (widen_mul_stats): Add new field divmod_calls_inserted. + (target_supports_divmod_p): New. + (divmod_candidate_p): Likewise. + (convert_to_divmod): Likewise. + (pass_optimize_widening_mul::execute): Call + calculate_dominance_info(), renumber_gimple_stmt_uids() at + beginning of function. Call convert_to_divmod() + and record stats for divmod. + * config/arm/arm.c (arm_expand_divmod_libfunc): Override hook + TARGET_EXPAND_DIVMOD_LIBFUNC. + * doc/sourcebuild.texi: Add items for arm_divmod_simode, divmod, + divmod_simode. + 2016-10-28 Eric Botcazou * dojump.c (do_jump_by_parts_greater_rtx): Invert probability when diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 3c4c7042d9c..f47edbab07e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -62,6 +62,7 @@ #include "builtins.h" #include "tm-constrs.h" #include "rtl-iter.h" +#include "optabs-libfuncs.h" /* This file should be included last. */ #include "target-def.h" @@ -304,6 +305,7 @@ static section *arm_function_section (tree, enum node_frequency, bool, bool); static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num); static unsigned int arm_elf_section_type_flags (tree decl, const char *name, int reloc); +static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *); /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -739,6 +741,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_SECTION_TYPE_FLAGS #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags +#undef TARGET_EXPAND_DIVMOD_LIBFUNC +#define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -30845,4 +30850,33 @@ arm_elf_section_type_flags (tree decl, const char *name, int reloc) return flags; } +/* Generate call to __aeabi_[mode]divmod (op0, op1). */ + +static void +arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode, + rtx op0, rtx op1, + rtx *quot_p, rtx *rem_p) +{ + if (mode == SImode) + gcc_assert (!TARGET_IDIV); + + machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode), + MODE_INT); + + rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, + libval_mode, 2, + op0, GET_MODE (op0), + op1, GET_MODE (op1)); + + rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0); + rtx remainder = simplify_gen_subreg (mode, libval, libval_mode, + GET_MODE_SIZE (mode)); + + gcc_assert (quotient); + gcc_assert (remainder); + + *quot_p = quotient; + *rem_p = remainder; +} + #include "gt-arm.h" diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 07c75e2847a..39de0ffb082 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1675,6 +1675,10 @@ and @code{MOVT} instructions available. ARM target generates Thumb-1 code for @code{-mthumb} with @code{CBZ} and @code{CBNZ} instructions available. +@item arm_divmod_simode +ARM target for which divmod transform is disabled, if it supports hardware +div instruction. + @end table @subsubsection AArch64-specific attributes @@ -1848,6 +1852,13 @@ Target requires a command line argument to enable a SIMD instruction set. @item pie_copyreloc The x86-64 target linker supports PIE with copy reloc. + +@item divmod +Target supporting hardware divmod insn or divmod libcall. + +@item divmod_simode +Target supporting hardware divmod insn or divmod libcall for SImode. + @end table @subsubsection Environment attributes diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index cffcfe9cca9..d2bcdca6d8c 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -7096,6 +7096,11 @@ This is firstly introduced on ARM/AArch64 targets, please refer to the hook implementation for how different fusion types are supported. @end deftypefn +@deftypefn {Target Hook} void TARGET_EXPAND_DIVMOD_LIBFUNC (rtx @var{libfunc}, machine_mode @var{mode}, rtx @var{op0}, rtx @var{op1}, rtx *@var{quot}, rtx *@var{rem}) +Define this hook for enabling divmod transform if the port does not have +hardware divmod insn but defines target-specific divmod libfuncs. +@end deftypefn + @node Sections @section Dividing the Output into Sections (Texts, Data, @dots{}) @c the above section title is WAY too long. maybe cut the part between diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index d2dd45fdea6..3399465fa05 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4892,6 +4892,8 @@ them: try the first ones in this list first. @hook TARGET_SCHED_FUSION_PRIORITY +@hook TARGET_EXPAND_DIVMOD_LIBFUNC + @node Sections @section Dividing the Output into Sections (Texts, Data, @dots{}) @c the above section title is WAY too long. maybe cut the part between diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 156ba31047c..1eeb15e4a50 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -2232,6 +2232,53 @@ expand_LAUNDER (internal_fn, gcall *call) expand_assignment (lhs, gimple_call_arg (call, 0), false); } +/* Expand DIVMOD() using: + a) optab handler for udivmod/sdivmod if it is available. + b) If optab_handler doesn't exist, generate call to + target-specific divmod libfunc. */ + +static void +expand_DIVMOD (internal_fn, gcall *call_stmt) +{ + tree lhs = gimple_call_lhs (call_stmt); + tree arg0 = gimple_call_arg (call_stmt, 0); + tree arg1 = gimple_call_arg (call_stmt, 1); + + gcc_assert (TREE_CODE (TREE_TYPE (lhs)) == COMPLEX_TYPE); + tree type = TREE_TYPE (TREE_TYPE (lhs)); + machine_mode mode = TYPE_MODE (type); + bool unsignedp = TYPE_UNSIGNED (type); + optab tab = (unsignedp) ? udivmod_optab : sdivmod_optab; + + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + + rtx quotient, remainder, libfunc; + + /* Check if optab_handler exists for divmod_optab for given mode. */ + if (optab_handler (tab, mode) != CODE_FOR_nothing) + { + quotient = gen_reg_rtx (mode); + remainder = gen_reg_rtx (mode); + expand_twoval_binop (tab, op0, op1, quotient, remainder, unsignedp); + } + + /* Generate call to divmod libfunc if it exists. */ + else if ((libfunc = optab_libfunc (tab, mode)) != NULL_RTX) + targetm.expand_divmod_libfunc (libfunc, mode, op0, op1, + "ient, &remainder); + + else + gcc_unreachable (); + + /* Wrap the return value (quotient, remainder) within COMPLEX_EXPR. */ + expand_expr (build2 (COMPLEX_EXPR, TREE_TYPE (lhs), + make_tree (TREE_TYPE (arg0), quotient), + make_tree (TREE_TYPE (arg1), remainder)), + target, VOIDmode, EXPAND_NORMAL); +} + /* Expand a call to FN using the operands in STMT. FN has a single output operand and NARGS input operands. */ diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 28863dfe4b3..cf2c402f335 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -201,6 +201,9 @@ DEF_INTERNAL_FN (FALLTHROUGH, ECF_LEAF | ECF_NOTHROW, NULL) /* To implement __builtin_launder. */ DEF_INTERNAL_FN (LAUNDER, ECF_LEAF | ECF_NOTHROW | ECF_NOVOPS, NULL) +/* Divmod function. */ +DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL) + #undef DEF_INTERNAL_INT_FN #undef DEF_INTERNAL_FLT_FN #undef DEF_INTERNAL_OPTAB_FN diff --git a/gcc/target.def b/gcc/target.def index 20def246043..ae0ea16e57f 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -5055,6 +5055,15 @@ Normally, this is not needed.", bool, (const_tree field, machine_mode mode), default_member_type_forces_blk) +/* See tree-ssa-math-opts.c:divmod_candidate_p for conditions + that gate the divod transform. */ +DEFHOOK +(expand_divmod_libfunc, + "Define this hook for enabling divmod transform if the port does not have\n\ +hardware divmod insn but defines target-specific divmod libfuncs.", + void, (rtx libfunc, machine_mode mode, rtx op0, rtx op1, rtx *quot, rtx *rem), + NULL) + /* Return the class for a secondary reload, and fill in extra information. */ DEFHOOK (secondary_reload, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3a4041de42d..cd2b1eb073c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,24 @@ +2016-10-28 Prathamesh Kulkarni + Kugan Vivekanandarajah + Jim Wilson + + PR tree-optimization/43721 + * lib/target-supports.exp (check_effective_target_divmod): New. + (check_effective_target_divmod_simode): Likewise. + (check_effective_target_arm_divmod_simode): Likewise. + * gcc.dg/divmod-1-simode.c: New test. + * gcc.dg/divmod-1.c: Likewise. + * gcc.dg/divmod-2-simode.c: Likewise. + * gcc.dg/divmod-2.c: Likewise. + * gcc.dg/divmod-3-simode.c: Likewise. + * gcc.dg/divmod-3.c: Likewise. + * gcc.dg/divmod-4-simode.c: Likewise. + * gcc.dg/divmod-4.c: Likewise. + * gcc.dg/divmod-5.c: Likewise. + * gcc.dg/divmod-6-simode.c: Likewise. + * gcc.dg/divmod-6.c: Likewise. + * gcc.dg/divmod-7.c: Likewise. + 2016-10-28 Kyrylo Tkachov Jakub Jelinek Andrew Pinski diff --git a/gcc/testsuite/gcc.dg/divmod-1-simode.c b/gcc/testsuite/gcc.dg/divmod-1-simode.c new file mode 100644 index 00000000000..9e477997bcf --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-1-simode.c @@ -0,0 +1,25 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div dominates mod. */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + if (cond) \ + foo (); \ + bigtype r = x % y; \ + return q + r; \ +} + +FOO(SImode, SImode, 1) +FOO(SImode, USImode, 2) +FOO(USImode, USImode, 3) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-1.c b/gcc/testsuite/gcc.dg/divmod-1.c new file mode 100644 index 00000000000..edcc2a107da --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-1.c @@ -0,0 +1,32 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div dominates mod. */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +typedef int DImode __attribute__((mode(DI))); +typedef unsigned UDImode __attribute__((mode(DI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + if (cond) \ + foo (); \ + bigtype r = x % y; \ + return q + r; \ +} + +FOO(SImode, DImode, 1) +FOO(SImode, UDImode, 2) +FOO(USImode, DImode, 3) +FOO(USImode, UDImode, 4) +FOO(DImode, DImode, 5) +FOO(DImode, UDImode, 6) +FOO(UDImode, UDImode, 7) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-2-simode.c b/gcc/testsuite/gcc.dg/divmod-2-simode.c new file mode 100644 index 00000000000..fa28beb3cef --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-2-simode.c @@ -0,0 +1,25 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* mod dominates div. */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype r = x % y; \ + if (cond) \ + foo (); \ + bigtype q = x / y; \ + return q + r; \ +} + +FOO(SImode, SImode, 1) +FOO(SImode, USImode, 2) +FOO(USImode, USImode, 3) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-2.c b/gcc/testsuite/gcc.dg/divmod-2.c new file mode 100644 index 00000000000..ded732e121d --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-2.c @@ -0,0 +1,32 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* mod dominates div. */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +typedef int DImode __attribute__((mode(DI))); +typedef unsigned UDImode __attribute__((mode(DI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype r = x % y; \ + if (cond) \ + foo (); \ + bigtype q = x / y; \ + return q + r; \ +} + +FOO(SImode, DImode, 1) +FOO(SImode, UDImode, 2) +FOO(USImode, DImode, 3) +FOO(USImode, UDImode, 4) +FOO(DImode, DImode, 5) +FOO(DImode, UDImode, 6) +FOO(UDImode, UDImode, 7) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-3-simode.c b/gcc/testsuite/gcc.dg/divmod-3-simode.c new file mode 100644 index 00000000000..9dee5bf603b --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-3-simode.c @@ -0,0 +1,23 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div comes before mod in same bb. */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + bigtype r = x % y; \ + return q + r; \ +} + +FOO(SImode, SImode, 1) +FOO(SImode, USImode, 2) +FOO(USImode, USImode, 3) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-3.c b/gcc/testsuite/gcc.dg/divmod-3.c new file mode 100644 index 00000000000..02aa367ac6e --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-3.c @@ -0,0 +1,30 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div comes before mod in same bb. */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +typedef int DImode __attribute__((mode(DI))); +typedef unsigned UDImode __attribute__((mode(DI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + bigtype r = x % y; \ + return q + r; \ +} + +FOO(SImode, DImode, 1) +FOO(SImode, UDImode, 2) +FOO(USImode, DImode, 3) +FOO(USImode, UDImode, 4) +FOO(DImode, DImode, 5) +FOO(DImode, UDImode, 6) +FOO(UDImode, UDImode, 7) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-4-simode.c b/gcc/testsuite/gcc.dg/divmod-4-simode.c new file mode 100644 index 00000000000..dbe29cb761d --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-4-simode.c @@ -0,0 +1,23 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* mod comes before div in same bb. */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype r = x % y; \ + bigtype q = x / y; \ + return q + r; \ +} + +FOO(SImode, SImode, 1) +FOO(SImode, USImode, 2) +FOO(USImode, USImode, 3) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-4.c b/gcc/testsuite/gcc.dg/divmod-4.c new file mode 100644 index 00000000000..861ecbdec4b --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-4.c @@ -0,0 +1,30 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* mod comes before div in same bb. */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +typedef int DImode __attribute__((mode(DI))); +typedef unsigned UDImode __attribute__((mode(DI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype r = x % y; \ + bigtype q = x / y; \ + return q + r; \ +} + +FOO(SImode, DImode, 3) +FOO(SImode, UDImode, 4) +FOO(USImode, DImode, 6) +FOO(USImode, UDImode, 7) +FOO(DImode, DImode, 8) +FOO(DImode, UDImode, 9) +FOO(UDImode, UDImode, 10) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-5.c b/gcc/testsuite/gcc.dg/divmod-5.c new file mode 100644 index 00000000000..8a8cee50ae2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-5.c @@ -0,0 +1,19 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div and mod are not in same bb and + bb's containing div and mod don't dominate each other. */ + +int f(int x, int y) +{ + int q = 0; + int r = 0; + extern int cond; + + if (cond) + q = x / y; + + r = x % y; + return q + r; +} + +/* { dg-final { scan-tree-dump-times "DIVMOD" 0 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-6-simode.c b/gcc/testsuite/gcc.dg/divmod-6-simode.c new file mode 100644 index 00000000000..1107f760b42 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-6-simode.c @@ -0,0 +1,26 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + bigtype r1 = 0, r2 = 0; \ + if (cond) \ + r1 = x % y; \ + else \ + r2 = x % y; \ + return q + r1 + r2; \ +} + +FOO(SImode, SImode, 1) +FOO(SImode, USImode, 2) +FOO(USImode, USImode, 3) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-6.c b/gcc/testsuite/gcc.dg/divmod-6.c new file mode 100644 index 00000000000..495ebaff805 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-6.c @@ -0,0 +1,33 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ + +typedef int SImode __attribute__((mode(SI))); +typedef unsigned USImode __attribute__((mode(SI))); + +typedef int DImode __attribute__((mode(DI))); +typedef unsigned UDImode __attribute__((mode(DI))); + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + bigtype r1 = 0, r2 = 0; \ + if (cond) \ + r1 = x % y; \ + else \ + r2 = x % y; \ + return q + r1 + r2; \ +} + +FOO(SImode, DImode, 3) +FOO(SImode, UDImode, 4) +FOO(USImode, DImode, 6) +FOO(USImode, UDImode, 7) +FOO(DImode, DImode, 8) +FOO(DImode, UDImode, 9) +FOO(UDImode, UDImode, 10) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-7.c b/gcc/testsuite/gcc.dg/divmod-7.c new file mode 100644 index 00000000000..faa90b3ac8f --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-7.c @@ -0,0 +1,21 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ + +int f(int x, int y) +{ + int q = 0, r1 = 0, r2 = 0; + extern int cond; + + if (cond) + q = x / y; + else + { + r1 = x % y; + return q + r1; + } + + r2 = x % y; + return q + r2; +} + +/* { dg-final { scan-tree-dump-times "DIVMOD" 1 "widening_mul" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index b5a9faab5a4..938f2c0a556 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8091,3 +8091,41 @@ proc check_effective_target_profile_update_atomic {} { int main (void) { return 0; } } "-fprofile-update=atomic -fprofile-generate"] } + +#For versions of ARM architectures that have hardware div insn, +#disable the divmod transform + +proc check_effective_target_arm_divmod_simode { } { + return [check_no_compiler_messages arm_divmod assembly { + #ifdef __ARM_ARCH_EXT_IDIV__ + #error has div insn + #endif + int i; + }] +} + +# Return 1 if target supports divmod hardware insn or divmod libcall. + +proc check_effective_target_divmod { } { + #TODO: Add checks for all targets that have either hardware divmod insn + # or define libfunc for divmod. + if { [istarget arm*-*-*] + || [istarget x86_64-*-*] } { + return 1 + } + return 0 +} + +# Return 1 if target supports divmod for SImode. The reason for +# separating this from check_effective_target_divmod is that +# some versions of ARM architecture define div instruction +# only for simode, and for these archs, we do not want to enable +# divmod transform for simode. + +proc check_effective_target_divmod_simode { } { + if { [istarget arm*-*-*] } { + return [check_effective_target_arm_divmod_simode] + } + + return [check_effective_target_divmod] +} diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 0cea1a8472d..c315da88ce4 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -112,6 +112,9 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "internal-fn.h" #include "case-cfn-macros.h" +#include "optabs-libfuncs.h" +#include "tree-eh.h" +#include "targhooks.h" /* This structure represents one basic block that either computes a division, or is a common dominator for basic block that compute a @@ -184,6 +187,9 @@ static struct /* Number of fp fused multiply-add ops inserted. */ int fmas_inserted; + + /* Number of divmod calls inserted. */ + int divmod_calls_inserted; } widen_mul_stats; /* The instance of "struct occurrence" representing the highest @@ -3793,6 +3799,213 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt, return true; } +/* Return true if target has support for divmod. */ + +static bool +target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) +{ + /* If target supports hardware divmod insn, use it for divmod. */ + if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing) + return true; + + /* Check if libfunc for divmod is available. */ + rtx libfunc = optab_libfunc (divmod_optab, mode); + if (libfunc != NULL_RTX) + { + /* If optab_handler exists for div_optab, perhaps in a wider mode, + we don't want to use the libfunc even if it exists for given mode. */ + for (machine_mode div_mode = mode; + div_mode != VOIDmode; + div_mode = GET_MODE_WIDER_MODE (div_mode)) + if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing) + return false; + + return targetm.expand_divmod_libfunc != NULL; + } + + return false; +} + +/* Check if stmt is candidate for divmod transform. */ + +static bool +divmod_candidate_p (gassign *stmt) +{ + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); + enum machine_mode mode = TYPE_MODE (type); + optab divmod_optab, div_optab; + + if (TYPE_UNSIGNED (type)) + { + divmod_optab = udivmod_optab; + div_optab = udiv_optab; + } + else + { + divmod_optab = sdivmod_optab; + div_optab = sdiv_optab; + } + + tree op1 = gimple_assign_rhs1 (stmt); + tree op2 = gimple_assign_rhs2 (stmt); + + /* Disable the transform if either is a constant, since division-by-constant + may have specialized expansion. */ + if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2)) + return false; + + /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should + expand using the [su]divv optabs. */ + if (TYPE_OVERFLOW_TRAPS (type)) + return false; + + if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) + return false; + + return true; +} + +/* This function looks for: + t1 = a TRUNC_DIV_EXPR b; + t2 = a TRUNC_MOD_EXPR b; + and transforms it to the following sequence: + complex_tmp = DIVMOD (a, b); + t1 = REALPART_EXPR(a); + t2 = IMAGPART_EXPR(b); + For conditions enabling the transform see divmod_candidate_p(). + + The pass has three parts: + 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all + other trunc_div_expr and trunc_mod_expr stmts. + 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt + to stmts vector. + 3) Insert DIVMOD call just before top_stmt and update entries in + stmts vector to use return value of DIMOVD (REALEXPR_PART for div, + IMAGPART_EXPR for mod). */ + +static bool +convert_to_divmod (gassign *stmt) +{ + if (stmt_can_throw_internal (stmt) + || !divmod_candidate_p (stmt)) + return false; + + tree op1 = gimple_assign_rhs1 (stmt); + tree op2 = gimple_assign_rhs2 (stmt); + + imm_use_iterator use_iter; + gimple *use_stmt; + auto_vec stmts; + + gimple *top_stmt = stmt; + basic_block top_bb = gimple_bb (stmt); + + /* Part 1: Try to set top_stmt to "topmost" stmt that dominates + at-least stmt and possibly other trunc_div/trunc_mod stmts + having same operands as stmt. */ + + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) + { + if (is_gimple_assign (use_stmt) + && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR + || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) + && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0) + && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0)) + { + if (stmt_can_throw_internal (use_stmt)) + continue; + + basic_block bb = gimple_bb (use_stmt); + + if (bb == top_bb) + { + if (gimple_uid (use_stmt) < gimple_uid (top_stmt)) + top_stmt = use_stmt; + } + else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb)) + { + top_bb = bb; + top_stmt = use_stmt; + } + } + } + + tree top_op1 = gimple_assign_rhs1 (top_stmt); + tree top_op2 = gimple_assign_rhs2 (top_stmt); + + stmts.safe_push (top_stmt); + bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR); + + /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb + to stmts vector. The 2nd loop will always add stmt to stmts vector, since + gimple_bb (top_stmt) dominates gimple_bb (stmt), so the + 2nd loop ends up adding at-least single trunc_mod_expr stmt. */ + + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) + { + if (is_gimple_assign (use_stmt) + && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR + || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) + && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0) + && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0)) + { + if (use_stmt == top_stmt + || stmt_can_throw_internal (use_stmt) + || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb)) + continue; + + stmts.safe_push (use_stmt); + if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR) + div_seen = true; + } + } + + if (!div_seen) + return false; + + /* Part 3: Create libcall to internal fn DIVMOD: + divmod_tmp = DIVMOD (op1, op2). */ + + gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); + tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)), + call_stmt, "divmod_tmp"); + gimple_call_set_lhs (call_stmt, res); + + /* Insert the call before top_stmt. */ + gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt); + gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT); + + widen_mul_stats.divmod_calls_inserted++; + + /* Update all statements in stmts vector: + lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR + lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR. */ + + for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i) + { + tree new_rhs; + + switch (gimple_assign_rhs_code (use_stmt)) + { + case TRUNC_DIV_EXPR: + new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); + break; + + case TRUNC_MOD_EXPR: + new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res); + break; + + default: + gcc_unreachable (); + } + + gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); + gimple_assign_set_rhs_from_tree (&gsi, new_rhs); + update_stmt (use_stmt); + } + + return true; +} /* Find integer multiplications where the operands are extended from smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR @@ -3837,6 +4050,8 @@ pass_optimize_widening_mul::execute (function *fun) bool cfg_changed = false; memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); + calculate_dominance_info (CDI_DOMINATORS); + renumber_gimple_stmt_uids (); FOR_EACH_BB_FN (bb, fun) { @@ -3870,6 +4085,10 @@ pass_optimize_widening_mul::execute (function *fun) match_uaddsub_overflow (&gsi, stmt, code); break; + case TRUNC_MOD_EXPR: + convert_to_divmod (as_a (stmt)); + break; + default:; } } @@ -3916,6 +4135,8 @@ pass_optimize_widening_mul::execute (function *fun) widen_mul_stats.maccs_inserted); statistics_counter_event (fun, "fused multiply-adds inserted", widen_mul_stats.fmas_inserted); + statistics_counter_event (fun, "divmod calls inserted", + widen_mul_stats.divmod_calls_inserted); return cfg_changed ? TODO_cleanup_cfg : 0; } -- 2.30.2