rs6000: Refine small loop unroll in loop_unroll_adjust hook
authorJiufu Guo <guojiufu@linux.ibm.com>
Mon, 11 Nov 2019 06:30:38 +0000 (06:30 +0000)
committerJiufu Guo <guojiufu@gcc.gnu.org>
Mon, 11 Nov 2019 06:30:38 +0000 (06:30 +0000)
In this patch, loop unroll adjust hook is introduced for powerpc.  We
can do target related heuristic adjustment in this hook.  In this patch,
-funroll-loops is enabled for small loops at O2 and above with an option
-munroll-small-loops to guard the small loops unrolling, and it works
fine with -flto.

gcc/
2019-11-11  Jiufu Guo  <guojiufu@linux.ibm.com>

PR tree-optimization/88760
* gcc/config/rs6000/rs6000.opt (-munroll-only-small-loops): New option.
* gcc/common/config/rs6000/rs6000-common.c
(rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]:
Turn on -funroll-loops and -munroll-only-small-loops.
[OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
Turn off -munroll-only-small-loops for explicit -funroll-loops.
(TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
(rs6000_loop_unroll_adjust): Define it.  Use -munroll-only-small-loops.

gcc.testsuite/
2019-11-11  Jiufu Guo  <guojiufu@linux.ibm.com>

PR tree-optimization/88760
* gcc.dg/pr59643.c: Update back to r277550.

From-SVN: r278034

gcc/ChangeLog
gcc/common/config/rs6000/rs6000-common.c
gcc/config/rs6000/rs6000.c
gcc/config/rs6000/rs6000.opt
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/pr59643.c

index d34890b7b81466533ad4a0801c75cdfeaaeb97ef..96efa4224bd741c53fe4fd446f662abeb14fafa6 100644 (file)
@@ -1,3 +1,17 @@
+2019-11-11  Jiufu Guo  <guojiufu@linux.ibm.com>
+
+       PR tree-optimization/88760
+       * gcc/config/rs6000/rs6000.opt (-munroll-only-small-loops): New option.
+       * gcc/common/config/rs6000/rs6000-common.c
+       (rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]:
+       Turn on -funroll-loops and -munroll-only-small-loops.
+       [OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers.
+       * config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
+       set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
+       Turn off -munroll-only-small-loops for explicit -funroll-loops.
+       (TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
+       (rs6000_loop_unroll_adjust): Define it.  Use -munroll-only-small-loops.
+
 2019-11-11  Kewen Lin  <linkw@gcc.gnu.org>
 
        * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost):
index b9471964a66a8ca3bf0d2a5598c1b40f75995340..9dc7ae87f434fdfad5190bb423801420e3fa6b7f 100644 (file)
@@ -35,7 +35,14 @@ static const struct default_options rs6000_option_optimization_table[] =
     { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
     /* Enable -fsched-pressure for first pass instruction scheduling.  */
     { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
-    { OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 },
+    /* Enable -munroll-only-small-loops with -funroll-loops to unroll small
+       loops at -O2 and above by default.  */
+    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 },
+    /* -fweb and -frename-registers are useless in general for rs6000,
+       turn them off.  */
+    { OPT_LEVELS_ALL, OPT_fweb, NULL, 0 },
+    { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
     { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
index 6e67db75590ababb6d759421441d6c5642c3bda0..5f776f87e4948fe9e366a880c5cfeeb3382c23a0 100644 (file)
@@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
 
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS rs6000_init_builtins
 #undef TARGET_BUILTIN_DECL
@@ -4540,25 +4543,12 @@ rs6000_option_override_internal (bool global_init_p)
                             global_options.x_param_values,
                             global_options_set.x_param_values);
 
-      /* unroll very small loops 2 time if no -funroll-loops.  */
-      if (!global_options_set.x_flag_unroll_loops
-         && !global_options_set.x_flag_unroll_all_loops)
-       {
-         maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
-                                global_options.x_param_values,
-                                global_options_set.x_param_values);
-
-         maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
-                                global_options.x_param_values,
-                                global_options_set.x_param_values);
-
-         /* If fweb or frename-registers are not specificed in command-line,
-            do not turn them on implicitly.  */
-         if (!global_options_set.x_flag_web)
-           global_options.x_flag_web = 0;
-         if (!global_options_set.x_flag_rename_registers)
-           global_options.x_flag_rename_registers = 0;
-       }
+      /* Explicit -funroll-loops turns -munroll-only-small-loops off.  */
+      if (((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
+          || (global_options_set.x_flag_unroll_all_loops
+              && flag_unroll_all_loops))
+         && !global_options_set.x_unroll_only_small_loops)
+       unroll_only_small_loops = 0;
 
       /* If using typedef char *va_list, signal that
         __builtin_va_start (&ap, 0) can be optimized to
@@ -5105,6 +5095,25 @@ rs6000_destroy_cost_data (void *data)
   free (data);
 }
 
+/* Implement targetm.loop_unroll_adjust.  */
+
+static unsigned
+rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+{
+   if (unroll_only_small_loops)
+    {
+      /* TODO: This is hardcoded to 10 right now.  It can be refined, for
+        example we may want to unroll very small loops more times (4 perhaps).
+        We also should use a PARAM for this.  */
+      if (loop->ninsns <= 10)
+       return MIN (2, nunroll);
+      else
+       return 0;
+    }
+
+  return nunroll;
+}
+
 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
    library with vectorized intrinsics.  */
 
index 1f37a927703cc0b35ef9967c4bcc534772ec161f..387d3cfd37714532eadd1abb747d008df300622a 100644 (file)
@@ -501,6 +501,10 @@ moptimize-swaps
 Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
 Analyze and remove doubleword swaps from VSX computations.
 
+munroll-only-small-loops
+Target Undocumented Var(unroll_only_small_loops) Init(0) Save
+; Use conservative small loop unrolling.
+
 mpower9-misc
 Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags)
 Use certain scalar instructions added in ISA 3.0.
index 34a31f087c65652efd47eb93f40b9fa0e85a10cd..cc60856a6a6377c1b861786da7129ba4569edbee 100644 (file)
@@ -1,3 +1,8 @@
+2019-11-11  Jiufu Guo  <guojiufu@linux.ibm.com>
+
+       PR tree-optimization/88760
+       * gcc.dg/pr59643.c: Update back to r277550.
+
 2019-11-10  Paul Thomas  <pault@gcc.gnu.org>
 
        PR fortran/92123
index 4446f6e6139b2c3c240226073e551c360ed359e3..de78d604bb200d27844963106043990bb9b65e9b 100644 (file)
@@ -1,9 +1,6 @@
 /* PR tree-optimization/59643 */
 /* { dg-do compile } */
 /* { dg-options "-O3 -fdump-tree-pcom-details" } */
-/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */
-/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the
-   loop of this case.  */
 
 void
 foo (double *a, double *b, double *c, double d, double e, int n)