From: Jiufu Guo Date: Mon, 11 Nov 2019 06:30:38 +0000 (+0000) Subject: rs6000: Refine small loop unroll in loop_unroll_adjust hook X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=48f657953fe5f90218003c93feafdb3158e6d3d2;p=gcc.git rs6000: Refine small loop unroll in loop_unroll_adjust hook In this patch, loop unroll adjust hook is introduced for powerpc. We can do target related heuristic adjustment in this hook. In this patch, -funroll-loops is enabled for small loops at O2 and above with an option -munroll-small-loops to guard the small loops unrolling, and it works fine with -flto. gcc/ 2019-11-11 Jiufu Guo PR tree-optimization/88760 * gcc/config/rs6000/rs6000.opt (-munroll-only-small-loops): New option. * gcc/common/config/rs6000/rs6000-common.c (rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]: Turn on -funroll-loops and -munroll-only-small-loops. [OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers. * config/rs6000/rs6000.c (rs6000_option_override_internal): Remove set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS. Turn off -munroll-only-small-loops for explicit -funroll-loops. (TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook. (rs6000_loop_unroll_adjust): Define it. Use -munroll-only-small-loops. gcc.testsuite/ 2019-11-11 Jiufu Guo PR tree-optimization/88760 * gcc.dg/pr59643.c: Update back to r277550. From-SVN: r278034 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d34890b7b81..96efa4224bd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2019-11-11 Jiufu Guo + + PR tree-optimization/88760 + * gcc/config/rs6000/rs6000.opt (-munroll-only-small-loops): New option. + * gcc/common/config/rs6000/rs6000-common.c + (rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]: + Turn on -funroll-loops and -munroll-only-small-loops. + [OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers. + * config/rs6000/rs6000.c (rs6000_option_override_internal): Remove + set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS. + Turn off -munroll-only-small-loops for explicit -funroll-loops. + (TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook. + (rs6000_loop_unroll_adjust): Define it. Use -munroll-only-small-loops. + 2019-11-11 Kewen Lin * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): diff --git a/gcc/common/config/rs6000/rs6000-common.c b/gcc/common/config/rs6000/rs6000-common.c index b9471964a66..9dc7ae87f43 100644 --- a/gcc/common/config/rs6000/rs6000-common.c +++ b/gcc/common/config/rs6000/rs6000-common.c @@ -35,7 +35,14 @@ static const struct default_options rs6000_option_optimization_table[] = { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, /* Enable -fsched-pressure for first pass instruction scheduling. */ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 }, + /* Enable -munroll-only-small-loops with -funroll-loops to unroll small + loops at -O2 and above by default. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 }, + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 }, + /* -fweb and -frename-registers are useless in general for rs6000, + turn them off. */ + { OPT_LEVELS_ALL, OPT_fweb, NULL, 0 }, + { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 }, { OPT_LEVELS_NONE, 0, NULL, 0 } }; diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6e67db75590..5f776f87e49 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_VECTORIZE_DESTROY_COST_DATA #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data +#undef TARGET_LOOP_UNROLL_ADJUST +#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins #undef TARGET_BUILTIN_DECL @@ -4540,25 +4543,12 @@ rs6000_option_override_internal (bool global_init_p) global_options.x_param_values, global_options_set.x_param_values); - /* unroll very small loops 2 time if no -funroll-loops. */ - if (!global_options_set.x_flag_unroll_loops - && !global_options_set.x_flag_unroll_all_loops) - { - maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2, - global_options.x_param_values, - global_options_set.x_param_values); - - maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20, - global_options.x_param_values, - global_options_set.x_param_values); - - /* If fweb or frename-registers are not specificed in command-line, - do not turn them on implicitly. */ - if (!global_options_set.x_flag_web) - global_options.x_flag_web = 0; - if (!global_options_set.x_flag_rename_registers) - global_options.x_flag_rename_registers = 0; - } + /* Explicit -funroll-loops turns -munroll-only-small-loops off. */ + if (((global_options_set.x_flag_unroll_loops && flag_unroll_loops) + || (global_options_set.x_flag_unroll_all_loops + && flag_unroll_all_loops)) + && !global_options_set.x_unroll_only_small_loops) + unroll_only_small_loops = 0; /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) can be optimized to @@ -5105,6 +5095,25 @@ rs6000_destroy_cost_data (void *data) free (data); } +/* Implement targetm.loop_unroll_adjust. */ + +static unsigned +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop) +{ + if (unroll_only_small_loops) + { + /* TODO: This is hardcoded to 10 right now. It can be refined, for + example we may want to unroll very small loops more times (4 perhaps). + We also should use a PARAM for this. */ + if (loop->ninsns <= 10) + return MIN (2, nunroll); + else + return 0; + } + + return nunroll; +} + /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a library with vectorized intrinsics. */ diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 1f37a927703..387d3cfd377 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -501,6 +501,10 @@ moptimize-swaps Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save Analyze and remove doubleword swaps from VSX computations. +munroll-only-small-loops +Target Undocumented Var(unroll_only_small_loops) Init(0) Save +; Use conservative small loop unrolling. + mpower9-misc Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags) Use certain scalar instructions added in ISA 3.0. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 34a31f087c6..cc60856a6a6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-11-11 Jiufu Guo + + PR tree-optimization/88760 + * gcc.dg/pr59643.c: Update back to r277550. + 2019-11-10 Paul Thomas PR fortran/92123 diff --git a/gcc/testsuite/gcc.dg/pr59643.c b/gcc/testsuite/gcc.dg/pr59643.c index 4446f6e6139..de78d604bb2 100644 --- a/gcc/testsuite/gcc.dg/pr59643.c +++ b/gcc/testsuite/gcc.dg/pr59643.c @@ -1,9 +1,6 @@ /* PR tree-optimization/59643 */ /* { dg-do compile } */ /* { dg-options "-O3 -fdump-tree-pcom-details" } */ -/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */ -/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the - loop of this case. */ void foo (double *a, double *b, double *c, double d, double e, int n)