+2019-12-07 Jan Hubicka <hubicka@ucw.cz>
+
+ * cgraph.c (cgraph_node::dump): Make tp_first_run 64bit.
+ * cgraph.h (cgrpah_node): Likewise.
+ (tp_first_run_node_cmp): Deeclare.
+ * cgraphunit.c (node_cmp): Rename to ...
+ (tp_first_run_node_cmp): ... this; export; watch for 64bit overflows;
+ clear tp_first_run for no_reorder and !flag_profile_reorder_functions.
+ (expand_all_functions): Collect tp_first_run and normal functions to
+ two vectors so the other functions remain sorted. Do not check for
+ flag_profile_reorder_functions it is function local flag.
+ * profile.c (compute_value_histograms): Update tp_first_run printing.
+
2019-12-07 Jan Hubicka <hubicka@ucw.cz>
* opts.c (common_handle_option): Do not clear ipa_reference for
count.dump (f);
}
if (tp_first_run > 0)
- fprintf (f, " first_run:%i", tp_first_run);
+ fprintf (f, " first_run:%" PRId64, (int64_t) tp_first_run);
if (origin)
fprintf (f, " nested in:%s", origin->asm_name ());
if (gimple_has_body_p (decl))
/* Expected number of executions: calculated in profile.c. */
profile_count count;
+ /* Time profiler: first run of function. */
+ gcov_type tp_first_run;
/* How to scale counts at materialization time; used to merge
LTO units with different number of profile runs. */
int count_materialization_scale;
unsigned int profile_id;
/* ID of the translation unit. */
int unit_id;
- /* Time profiler: first run of function. */
- int tp_first_run;
/* Set when decl is an abstract function pointed to by the
ABSTRACT_DECL_ORIGIN of a reachable function. */
/* In cgraphunit.c */
void cgraphunit_c_finalize (void);
+int tp_first_run_node_cmp (const void *pa, const void *pb);
/* Initialize datastructures so DECL is a function in lowered gimple form.
IN_SSA is true if the gimple is in SSA. */
/* Node comparator that is responsible for the order that corresponds
to time when a function was launched for the first time. */
-static int
-node_cmp (const void *pa, const void *pb)
+int
+tp_first_run_node_cmp (const void *pa, const void *pb)
{
const cgraph_node *a = *(const cgraph_node * const *) pa;
const cgraph_node *b = *(const cgraph_node * const *) pb;
+ gcov_type tp_first_run_a = a->tp_first_run;
+ gcov_type tp_first_run_b = b->tp_first_run;
+
+ if (!opt_for_fn (a->decl, flag_profile_reorder_functions)
+ || a->no_reorder)
+ tp_first_run_a = 0;
+ if (!opt_for_fn (b->decl, flag_profile_reorder_functions)
+ || b->no_reorder)
+ tp_first_run_b = 0;
+
+ if (tp_first_run_a == tp_first_run_b)
+ return a->order - b->order;
/* Functions with time profile must be before these without profile. */
- if (!a->tp_first_run || !b->tp_first_run)
- return a->tp_first_run - b->tp_first_run;
+ if (!tp_first_run_a || !tp_first_run_b)
+ return tp_first_run_b ? 1 : -1;
- return a->tp_first_run != b->tp_first_run
- ? b->tp_first_run - a->tp_first_run
- : b->order - a->order;
+ /* Watch for overlflow - tp_first_run is 64bit. */
+ if (tp_first_run_a > tp_first_run_b)
+ return 1;
+ else
+ return -1;
}
/* Expand all functions that must be output.
cgraph_node *node;
cgraph_node **order = XCNEWVEC (cgraph_node *,
symtab->cgraph_count);
+ cgraph_node **tp_first_run_order = XCNEWVEC (cgraph_node *,
+ symtab->cgraph_count);
unsigned int expanded_func_count = 0, profiled_func_count = 0;
- int order_pos, new_order_pos = 0;
+ int order_pos, tp_first_run_order_pos = 0, new_order_pos = 0;
int i;
order_pos = ipa_reverse_postorder (order);
optimization. So we must be sure to not reference them. */
for (i = 0; i < order_pos; i++)
if (order[i]->process)
- order[new_order_pos++] = order[i];
-
- if (flag_profile_reorder_functions)
- qsort (order, new_order_pos, sizeof (cgraph_node *), node_cmp);
+ {
+ if (order[i]->tp_first_run
+ && opt_for_fn (order[i]->decl, flag_profile_reorder_functions))
+ tp_first_run_order[tp_first_run_order_pos++] = order[i];
+ else
+ order[new_order_pos++] = order[i];
+ }
+ /* Output functions in RPO so callers get optimized before callees. This
+ makes ipa-ra and other propagators to work.
+ FIXME: This is far from optimal code layout. */
for (i = new_order_pos - 1; i >= 0; i--)
{
node = order[i];
if (node->process)
{
expanded_func_count++;
- if(node->tp_first_run)
- profiled_func_count++;
+ node->process = 0;
+ node->expand ();
+ }
+ }
+ qsort (tp_first_run_order, tp_first_run_order_pos,
+ sizeof (cgraph_node *), tp_first_run_node_cmp);
+ for (i = 0; i < tp_first_run_order_pos; i++)
+ {
+ node = tp_first_run_order[i];
+
+ if (node->process)
+ {
+ expanded_func_count++;
+ profiled_func_count++;
if (symtab->dump_file)
fprintf (symtab->dump_file,
- "Time profile order in expand_all_functions:%s:%d\n",
- node->asm_name (), node->tp_first_run);
+ "Time profile order in expand_all_functions:%s:%" PRId64
+ "\n", node->asm_name (), (int64_t) node->tp_first_run);
node->process = 0;
node->expand ();
}
fprintf (dump_file, "Expanded functions with time profile (%s):%u/%u\n",
main_input_filename, profiled_func_count, expanded_func_count);
- if (symtab->dump_file && flag_profile_reorder_functions)
+ if (symtab->dump_file && tp_first_run_order_pos)
fprintf (symtab->dump_file, "Expanded functions with time profile:%u/%u\n",
profiled_func_count, expanded_func_count);
-2019-11-25 Joseph Myers <joseph@codesourcery.com>
+2019-12-07 Jan Hubicka <hubicka@ucw.cz>
+ * lto-partition.c (node_cmp): Turn into simple order comparsions.
+ (varpool_node_cmp): Remove.
+ (add_sorted_nodes): Use node_cmp.
+ (lto_balanced_map): Use tp_first_run_node_cmp.
+
+/bin/bash: :q: command not found
PR c/91985
* lto-lang.c (lto_type_for_mode): Handle decimal floating-point
types being NULL_TREE.
new_partition ("empty");
}
-/* Helper function for qsort; sort nodes by order. noreorder functions must have
- been removed earlier. */
-static int
-node_cmp (const void *pa, const void *pb)
-{
- const struct cgraph_node *a = *(const struct cgraph_node * const *) pa;
- const struct cgraph_node *b = *(const struct cgraph_node * const *) pb;
-
- /* Profile reorder flag enables function reordering based on first execution
- of a function. All functions with profile are placed in ascending
- order at the beginning. */
-
- if (flag_profile_reorder_functions)
- {
- /* Functions with time profile are sorted in ascending order. */
- if (a->tp_first_run && b->tp_first_run)
- return a->tp_first_run != b->tp_first_run
- ? a->tp_first_run - b->tp_first_run
- : a->order - b->order;
-
- /* Functions with time profile are sorted before the functions
- that do not have the profile. */
- if (a->tp_first_run || b->tp_first_run)
- return b->tp_first_run - a->tp_first_run;
- }
-
- return b->order - a->order;
-}
-
/* Helper function for qsort; sort nodes by order. */
static int
-varpool_node_cmp (const void *pa, const void *pb)
+node_cmp (const void *pa, const void *pb)
{
const symtab_node *a = *static_cast<const symtab_node * const *> (pa);
const symtab_node *b = *static_cast<const symtab_node * const *> (pb);
unsigned i;
symtab_node *node;
- next_nodes.qsort (varpool_node_cmp);
+ next_nodes.qsort (node_cmp);
FOR_EACH_VEC_ELT (next_nodes, i, node)
if (!symbol_partitioned_p (node))
add_symbol_to_partition (partition, node);
unit tends to import a lot of global trees defined there. We should
get better about minimizing the function bounday, but until that
things works smoother if we order in source order. */
- order.qsort (node_cmp);
+ order.qsort (tp_first_run_node_cmp);
noreorder.qsort (node_cmp);
if (dump_file)
{
for (unsigned i = 0; i < order.length (); i++)
- fprintf (dump_file, "Balanced map symbol order:%s:%u\n",
- order[i]->name (), order[i]->tp_first_run);
+ fprintf (dump_file, "Balanced map symbol order:%s:%" PRId64 "\n",
+ order[i]->name (), (int64_t) order[i]->tp_first_run);
for (unsigned i = 0; i < noreorder.length (); i++)
- fprintf (dump_file, "Balanced map symbol no_reorder:%s:%u\n",
- noreorder[i]->name (), noreorder[i]->tp_first_run);
+ fprintf (dump_file, "Balanced map symbol no_reorder:%s:%" PRId64 "\n",
+ noreorder[i]->name (), (int64_t) noreorder[i]->tp_first_run);
}
/* Collect all variables that should not be reordered. */
&& vnode->no_reorder)
varpool_order.safe_push (vnode);
n_varpool_nodes = varpool_order.length ();
- varpool_order.qsort (varpool_node_cmp);
+ varpool_order.qsort (node_cmp);
/* Compute partition size and create the first partition. */
if (param_min_partition_size > max_partition_size)
node->tp_first_run = hist->hvalue.counters[0];
if (dump_file)
- fprintf (dump_file, "Read tp_first_run: %d\n", node->tp_first_run);
+ fprintf (dump_file, "Read tp_first_run: %" PRId64 "\n",
+ (int64_t) node->tp_first_run);
}
}