Fix overflows in -fprofile-reorder-functions

author Jan Hubicka <hubicka@ucw.cz>

Sun, 8 Dec 2019 17:02:30 +0000 (18:02 +0100)

committer Jan Hubicka <hubicka@gcc.gnu.org>

Sun, 8 Dec 2019 17:02:30 +0000 (17:02 +0000)
author Jan Hubicka <hubicka@ucw.cz>
Sun, 8 Dec 2019 17:02:30 +0000 (18:02 +0100)
committer Jan Hubicka <hubicka@gcc.gnu.org>
Sun, 8 Dec 2019 17:02:30 +0000 (17:02 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 2b6558a12e6204ac46867dd49fde5bdedba5ae92..5bb4a5b7ad2b0ed13cc8c14bed5380699fa8abcf 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2019-12-07  Jan Hubicka  <hubicka@ucw.cz>
+
+       * cgraph.c (cgraph_node::dump): Make tp_first_run 64bit.
+       * cgraph.h (cgrpah_node): Likewise.
+       (tp_first_run_node_cmp): Deeclare.
+       * cgraphunit.c (node_cmp): Rename to ...
+       (tp_first_run_node_cmp): ... this; export; watch for 64bit overflows;
+       clear tp_first_run for no_reorder and !flag_profile_reorder_functions.
+       (expand_all_functions): Collect tp_first_run and normal functions to
+       two vectors so the other functions remain sorted. Do not check for
+       flag_profile_reorder_functions it is function local flag.
+       * profile.c (compute_value_histograms): Update tp_first_run printing.
+
  2019-12-07  Jan Hubicka  <hubicka@ucw.cz>
  
         * opts.c (common_handle_option): Do not clear ipa_reference for
diff --git a/gcc/cgraph.c b/gcc/cgraph.c

index 7288440708ee52472a9107bfead32bebc6fc83fe..5c72e832a232276b0807504f0336e9ab2aa376f2 100644 (file)
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1954,7 +1954,7 @@ cgraph_node::dump (FILE *f)
        count.dump (f);
      }
    if (tp_first_run > 0)
-    fprintf (f, " first_run:%i", tp_first_run);
+    fprintf (f, " first_run:%" PRId64, (int64_t) tp_first_run);
    if (origin)
      fprintf (f, " nested in:%s", origin->asm_name ());
    if (gimple_has_body_p (decl))
diff --git a/gcc/cgraph.h b/gcc/cgraph.h

index 9c086fedaef4f436eae002e50c073d2c3c6626dc..1b8a167fbdcfb258d53f998a33d66d08c4560792 100644 (file)
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1430,6 +1430,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
  
    /* Expected number of executions: calculated in profile.c.  */
    profile_count count;
+  /* Time profiler: first run of function.  */
+  gcov_type tp_first_run;
    /* How to scale counts at materialization time; used to merge
       LTO units with different number of profile runs.  */
    int count_materialization_scale;
@@ -1437,8 +1439,6 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
    unsigned int profile_id;
    /* ID of the translation unit.  */
    int unit_id;
-  /* Time profiler: first run of function.  */
-  int tp_first_run;
  
    /* Set when decl is an abstract function pointed to by the
       ABSTRACT_DECL_ORIGIN of a reachable function.  */
@@ -2463,6 +2463,7 @@ cgraph_inline_failed_type_t cgraph_inline_failed_type (cgraph_inline_failed_t);
  
  /* In cgraphunit.c  */
  void cgraphunit_c_finalize (void);
+int tp_first_run_node_cmp (const void *pa, const void *pb);
  
  /*  Initialize datastructures so DECL is a function in lowered gimple form.
      IN_SSA is true if the gimple is in SSA.  */
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c

index 1b3d28121528123f5516314fe9c7923bbc5f4e2c..0468ccfc483fd2f75e7ae49092eb58fa8b233310 100644 (file)
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -2359,19 +2359,33 @@ cgraph_node::expand (void)
  /* Node comparator that is responsible for the order that corresponds
     to time when a function was launched for the first time.  */
  
-static int
-node_cmp (const void *pa, const void *pb)
+int
+tp_first_run_node_cmp (const void *pa, const void *pb)
  {
    const cgraph_node *a = *(const cgraph_node * const *) pa;
    const cgraph_node *b = *(const cgraph_node * const *) pb;
+  gcov_type tp_first_run_a = a->tp_first_run;
+  gcov_type tp_first_run_b = b->tp_first_run;
+
+  if (!opt_for_fn (a->decl, flag_profile_reorder_functions)
+      || a->no_reorder)
+    tp_first_run_a = 0;
+  if (!opt_for_fn (b->decl, flag_profile_reorder_functions)
+      || b->no_reorder)
+    tp_first_run_b = 0;
+
+  if (tp_first_run_a == tp_first_run_b)
+    return a->order - b->order;
  
    /* Functions with time profile must be before these without profile.  */
-  if (!a->tp_first_run || !b->tp_first_run)
-    return a->tp_first_run - b->tp_first_run;
+  if (!tp_first_run_a || !tp_first_run_b)
+    return tp_first_run_b ? 1 : -1;
  
-  return a->tp_first_run != b->tp_first_run
-        ? b->tp_first_run - a->tp_first_run
-        : b->order - a->order;
+  /* Watch for overlflow - tp_first_run is 64bit.  */
+  if (tp_first_run_a > tp_first_run_b)
+    return 1;
+  else
+    return -1;
  }
  
  /* Expand all functions that must be output.
@@ -2390,8 +2404,10 @@ expand_all_functions (void)
    cgraph_node *node;
    cgraph_node **order = XCNEWVEC (cgraph_node *,
                                          symtab->cgraph_count);
+  cgraph_node **tp_first_run_order = XCNEWVEC (cgraph_node *,
+                                        symtab->cgraph_count);
    unsigned int expanded_func_count = 0, profiled_func_count = 0;
-  int order_pos, new_order_pos = 0;
+  int order_pos, tp_first_run_order_pos = 0, new_order_pos = 0;
    int i;
  
    order_pos = ipa_reverse_postorder (order);
@@ -2401,11 +2417,17 @@ expand_all_functions (void)
       optimization.  So we must be sure to not reference them.  */
    for (i = 0; i < order_pos; i++)
      if (order[i]->process)
-      order[new_order_pos++] = order[i];
-
-  if (flag_profile_reorder_functions)
-    qsort (order, new_order_pos, sizeof (cgraph_node *), node_cmp);
+      {
+       if (order[i]->tp_first_run
+           && opt_for_fn (order[i]->decl, flag_profile_reorder_functions))
+         tp_first_run_order[tp_first_run_order_pos++] = order[i];
+       else
+          order[new_order_pos++] = order[i];
+      }
  
+  /* Output functions in RPO so callers get optimized before callees.  This
+     makes ipa-ra and other propagators to work.
+     FIXME: This is far from optimal code layout.  */
    for (i = new_order_pos - 1; i >= 0; i--)
      {
        node = order[i];
@@ -2413,13 +2435,25 @@ expand_all_functions (void)
        if (node->process)
         {
           expanded_func_count++;
-         if(node->tp_first_run)
-           profiled_func_count++;
+         node->process = 0;
+         node->expand ();
+       }
+    }
+  qsort (tp_first_run_order, tp_first_run_order_pos,
+        sizeof (cgraph_node *), tp_first_run_node_cmp);
+  for (i = 0; i < tp_first_run_order_pos; i++)
+    {
+      node = tp_first_run_order[i];
+
+      if (node->process)
+       {
+         expanded_func_count++;
+         profiled_func_count++;
  
           if (symtab->dump_file)
             fprintf (symtab->dump_file,
-                    "Time profile order in expand_all_functions:%s:%d\n",
-                    node->asm_name (), node->tp_first_run);
+                    "Time profile order in expand_all_functions:%s:%" PRId64
+                    "\n", node->asm_name (), (int64_t) node->tp_first_run);
           node->process = 0;
           node->expand ();
         }
@@ -2429,7 +2463,7 @@ expand_all_functions (void)
        fprintf (dump_file, "Expanded functions with time profile (%s):%u/%u\n",
                 main_input_filename, profiled_func_count, expanded_func_count);
  
-  if (symtab->dump_file && flag_profile_reorder_functions)
+  if (symtab->dump_file && tp_first_run_order_pos)
      fprintf (symtab->dump_file, "Expanded functions with time profile:%u/%u\n",
               profiled_func_count, expanded_func_count);
  
diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog

index df9ddfa34324f8122b80ec77257857a14c25eeab..d80b2bd61bf6fe8297eda040539458d6fa7946eb 100644 (file)
--- a/gcc/lto/ChangeLog
+++ b/gcc/lto/ChangeLog
@@ -1,5 +1,11 @@
-2019-11-25  Joseph Myers  <joseph@codesourcery.com>
+2019-12-07  Jan Hubicka  <hubicka@ucw.cz>
  
+       * lto-partition.c (node_cmp): Turn into simple order comparsions.
+       (varpool_node_cmp): Remove.
+       (add_sorted_nodes): Use node_cmp.
+       (lto_balanced_map): Use tp_first_run_node_cmp.
+
+/bin/bash: :q: command not found
         PR c/91985
         * lto-lang.c (lto_type_for_mode): Handle decimal floating-point
         types being NULL_TREE.
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c

index 75cfdaabf42f7bcf1dc20f186c996eec264e4dd6..4da76509616b209c904bd9ad60ab9a2d5a27311f 100644 (file)
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -372,38 +372,9 @@ lto_max_map (void)
      new_partition ("empty");
  }
  
-/* Helper function for qsort; sort nodes by order. noreorder functions must have
-   been removed earlier.  */
-static int
-node_cmp (const void *pa, const void *pb)
-{
-  const struct cgraph_node *a = *(const struct cgraph_node * const *) pa;
-  const struct cgraph_node *b = *(const struct cgraph_node * const *) pb;
-
-  /* Profile reorder flag enables function reordering based on first execution
-     of a function. All functions with profile are placed in ascending
-     order at the beginning.  */
-
-  if (flag_profile_reorder_functions)
-  {
-    /* Functions with time profile are sorted in ascending order.  */
-    if (a->tp_first_run && b->tp_first_run)
-      return a->tp_first_run != b->tp_first_run
-       ? a->tp_first_run - b->tp_first_run
-        : a->order - b->order;
-
-    /* Functions with time profile are sorted before the functions
-       that do not have the profile.  */
-    if (a->tp_first_run || b->tp_first_run)
-      return b->tp_first_run - a->tp_first_run;
-  }
-
-  return b->order - a->order;
-}
-
  /* Helper function for qsort; sort nodes by order.  */
  static int
-varpool_node_cmp (const void *pa, const void *pb)
+node_cmp (const void *pa, const void *pb)
  {
    const symtab_node *a = *static_cast<const symtab_node * const *> (pa);
    const symtab_node *b = *static_cast<const symtab_node * const *> (pb);
@@ -418,7 +389,7 @@ add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
    unsigned i;
    symtab_node *node;
  
-  next_nodes.qsort (varpool_node_cmp);
+  next_nodes.qsort (node_cmp);
    FOR_EACH_VEC_ELT (next_nodes, i, node)
      if (!symbol_partitioned_p (node))
        add_symbol_to_partition (partition, node);
@@ -537,17 +508,17 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
       unit tends to import a lot of global trees defined there.  We should
       get better about minimizing the function bounday, but until that
       things works smoother if we order in source order.  */
-  order.qsort (node_cmp);
+  order.qsort (tp_first_run_node_cmp);
    noreorder.qsort (node_cmp);
  
    if (dump_file)
      {
        for (unsigned i = 0; i < order.length (); i++)
-       fprintf (dump_file, "Balanced map symbol order:%s:%u\n",
-                order[i]->name (), order[i]->tp_first_run);
+       fprintf (dump_file, "Balanced map symbol order:%s:%" PRId64 "\n",
+                order[i]->name (), (int64_t) order[i]->tp_first_run);
        for (unsigned i = 0; i < noreorder.length (); i++)
-       fprintf (dump_file, "Balanced map symbol no_reorder:%s:%u\n",
-                noreorder[i]->name (), noreorder[i]->tp_first_run);
+       fprintf (dump_file, "Balanced map symbol no_reorder:%s:%" PRId64 "\n",
+                noreorder[i]->name (), (int64_t) noreorder[i]->tp_first_run);
      }
  
    /* Collect all variables that should not be reordered.  */
@@ -556,7 +527,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
         && vnode->no_reorder)
        varpool_order.safe_push (vnode);
    n_varpool_nodes = varpool_order.length ();
-  varpool_order.qsort (varpool_node_cmp);
+  varpool_order.qsort (node_cmp);
  
    /* Compute partition size and create the first partition.  */
    if (param_min_partition_size > max_partition_size)
diff --git a/gcc/profile.c b/gcc/profile.c

index 7e2d7d3ca3eb68650e702b07e1b80a6ffec88a90..7d4121712228d3a87c7325cd03b2280428651793 100644 (file)
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -874,7 +874,8 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum,
           node->tp_first_run = hist->hvalue.counters[0];
  
            if (dump_file)
-            fprintf (dump_file, "Read tp_first_run: %d\n", node->tp_first_run);
+            fprintf (dump_file, "Read tp_first_run: %" PRId64 "\n",
+                    (int64_t) node->tp_first_run);
          }
      }
author	Jan Hubicka <hubicka@ucw.cz>
	Sun, 8 Dec 2019 17:02:30 +0000 (18:02 +0100)
committer	Jan Hubicka <hubicka@gcc.gnu.org>
	Sun, 8 Dec 2019 17:02:30 +0000 (17:02 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/cgraph.c		patch \| blob \| history
gcc/cgraph.h		patch \| blob \| history
gcc/cgraphunit.c		patch \| blob \| history
gcc/lto/ChangeLog		patch \| blob \| history
gcc/lto/lto-partition.c		patch \| blob \| history
gcc/profile.c		patch \| blob \| history