* cgraphclones.c (cgraph_edge::clone): Rename gcov_count to prof_count.
(cgraph_edge::clone): Cleanup updating of profile.
* ipa-cp.c (update_profiling_info): Likewise.
* ipa-inline-transform.c (inline_transform): Likewise.
* ipa-inline.c (inline_small_functions): Add missing space to dump.
* ipa-split.c (execute_split_functions): Do not split when function
is cold.
* predict.c (estimate_bb_frequencies): Cleanup updating of profile.
* profile-count.c (profile_count::dump): Add global0.
(profile_count::to_cgraph_frequency): Do not ICE when entry is
undefined.
(profile_count::to_sreal_scale): Likewise.
(profile_count::adjust_for_ipa_scaling): Fix typo in comment.
(profile_count::combine_with_ipa_count): New function.
* profile-count.h (profile_guessed_global0adjusted): New.
(profile_count::adjusted_zero): New.
(profile_count::global0adjusted): New.
(profile_count::combine_with_ipa_count): New.
* tree-inline.c (copy_edges_for_bb): Add NUM/DEN arugment;
correct profile of return block of split functions.
(copy_cfg_body): Remove unused profile_count.
(copy_body): Likewise.
(expand_call_inline): Update.
(tree_function_versioning): Update.
From-SVN: r254919
+2017-11-18 Jan Hubicka <hubicka@ucw.cz>
+
+ * cgraphclones.c (cgraph_edge::clone): Rename gcov_count to prof_count.
+ (cgraph_edge::clone): Cleanup updating of profile.
+ * ipa-cp.c (update_profiling_info): Likewise.
+ * ipa-inline-transform.c (inline_transform): Likewise.
+ * ipa-inline.c (inline_small_functions): Add missing space to dump.
+ * ipa-split.c (execute_split_functions): Do not split when function
+ is cold.
+ * predict.c (estimate_bb_frequencies): Cleanup updating of profile.
+ * profile-count.c (profile_count::dump): Add global0.
+ (profile_count::to_cgraph_frequency): Do not ICE when entry is
+ undefined.
+ (profile_count::to_sreal_scale): Likewise.
+ (profile_count::adjust_for_ipa_scaling): Fix typo in comment.
+ (profile_count::combine_with_ipa_count): New function.
+ * profile-count.h (profile_guessed_global0adjusted): New.
+ (profile_count::adjusted_zero): New.
+ (profile_count::global0adjusted): New.
+ (profile_count::combine_with_ipa_count): New.
+ * tree-inline.c (copy_edges_for_bb): Add NUM/DEN arugment;
+ correct profile of return block of split functions.
+ (copy_cfg_body): Remove unused profile_count.
+ (copy_body): Likewise.
+ (expand_call_inline): Update.
+ (tree_function_versioning): Update.
+
2017-11-18 Aldy Hernandez <aldyh@redhat.com>
* hash-set.h (hash_set::empty): New.
{
cgraph_edge *new_edge;
profile_count::adjust_for_ipa_scaling (&num, &den);
- profile_count gcov_count = count.apply_scale (num, den);
+ profile_count prof_count = count.apply_scale (num, den);
if (indirect_unknown_callee)
{
{
cgraph_node *callee = cgraph_node::get (decl);
gcc_checking_assert (callee);
- new_edge = n->create_edge (callee, call_stmt, gcov_count);
+ new_edge = n->create_edge (callee, call_stmt, prof_count);
}
else
{
new_edge = n->create_indirect_edge (call_stmt,
indirect_info->ecf_flags,
- gcov_count, false);
+ prof_count, false);
*new_edge->indirect_info = *indirect_info;
}
}
else
{
- new_edge = n->create_edge (callee, call_stmt, gcov_count);
+ new_edge = n->create_edge (callee, call_stmt, prof_count);
if (indirect_info)
{
new_edge->indirect_info
new_edge->in_polymorphic_cdtor = in_polymorphic_cdtor;
/* Update IPA profile. Local profiles need no updating in original. */
- if (update_original
- && count.ipa () == count && new_edge->count.ipa () == new_edge->count)
- count -= new_edge->count;
- else if (caller->count.global0 () == caller->count
- && !(count == profile_count::zero ()))
- count = count.global0 ();
+ if (update_original)
+ count = count.combine_with_ipa_count (count.ipa ()
+ - new_edge->count.ipa ());
symtab->call_edge_duplication_hooks (this, new_edge);
return new_edge;
}
if (new_inlined_to)
dump_callgraph_transformation (this, new_inlined_to, "inlining to");
- if (prof_count == profile_count::zero ()
- && !(count == profile_count::zero ()))
- prof_count = count.global0 ();
-
+ prof_count = count.combine_with_ipa_count (prof_count);
new_node->count = prof_count;
/* Update IPA profile. Local profiles need no updating in original. */
- if (update_original && !(count == profile_count::zero ())
- && count.ipa () == count && prof_count.ipa () == prof_count)
- {
- if (count.nonzero_p ()
- && !(count - prof_count).nonzero_p ())
- count = count.global0 ();
- else
- count -= prof_count;
- }
+ if (update_original)
+ count = count.combine_with_ipa_count (count.ipa () - prof_count.ipa ());
new_node->decl = new_decl;
new_node->register_symbol ();
new_node->origin = origin;
}
}
- if (!new_sum.nonzero_p ())
- {
- new_sum = new_sum.global0 ();
- new_node->count = new_sum;
- remainder = orig_node->count;
- }
- else
- {
- remainder = orig_node_count - new_sum;
- if (!remainder.nonzero_p ())
- remainder = orig_node_count.global0 ();
- }
+ remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
+ - new_sum.ipa ());
+ new_sum = orig_node_count.combine_with_ipa_count (new_sum);
orig_node->count = remainder;
for (cs = new_node->callees; cs; cs = cs->next_callee)
{
profile_count num = node->count;
profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
- bool scale = num.initialized_p () && den.ipa_p ()
- && (den.nonzero_p () || num == profile_count::zero ())
- && !(num == den.ipa ());
+ bool scale = num.initialized_p () && !(num == den);
if (scale)
{
+ profile_count::adjust_for_ipa_scaling (&num, &den);
if (dump_file)
{
fprintf (dump_file, "Applying count scale ");
}
basic_block bb;
+ cfun->cfg->count_max = profile_count::uninitialized ();
FOR_ALL_BB_FN (bb, cfun)
- if (num == profile_count::zero ())
- bb->count = bb->count.global0 ();
- else
+ {
bb->count = bb->count.apply_scale (num, den);
+ cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
+ }
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count;
}
todo = optimize_inline_calls (current_function_decl);
/* Be sure that caches are maintained consistent.
This check is affected by scaling roundoff errors when compiling for
IPA this we skip it in that case. */
- if (!edge->callee->count.ipa_p ())
+ if (!edge->callee->count.ipa_p ()
+ && (!max_count.initialized_p () || !max_count.nonzero_p ()))
{
sreal cached_badness = edge_badness (edge, false);
{
fprintf (dump_file, " Called ");
edge->count.ipa ().dump (dump_file);
- fprintf (dump_file, "times\n");
+ fprintf (dump_file, " times\n");
}
if (dump_flags & TDF_DETAILS)
edge_badness (edge, true);
fprintf (dump_file, "Not splitting: main function.\n");
return 0;
}
+ if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not splitting: function is unlikely executed.\n");
+ return 0;
+ }
/* This can be relaxed; function might become inlinable after splitting
away the uninlinable part. */
if (ipa_fn_summaries
to outermost to examine frequencies for back edges. */
estimate_loops ();
- bool global0 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()
- && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ();
-
freq_max = 0;
FOR_EACH_BB_FN (bb, cfun)
if (freq_max < BLOCK_INFO (bb)->frequency)
freq_max = real_bb_freq_max / freq_max;
if (freq_max < 16)
freq_max = 16;
+ profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
cfun->cfg->count_max = profile_count::uninitialized ();
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
{
/* If we have profile feedback in which this function was never
executed, then preserve this info. */
- if (global0)
- bb->count = count.global0 ();
- else if (!(bb->count == profile_count::zero ()))
- bb->count = count.guessed_local ();
+ if (!(bb->count == profile_count::zero ()))
+ bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count);
cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
}
fprintf (f, " (estimated locally)");
else if (m_quality == profile_guessed_global0)
fprintf (f, " (estimated locally, globally 0)");
+ else if (m_quality == profile_guessed_global0adjusted)
+ fprintf (f, " (estimated locally, globally 0 adjusted)");
else if (m_quality == profile_adjusted)
fprintf (f, " (adjusted)");
else if (m_quality == profile_afdo)
int
profile_count::to_cgraph_frequency (profile_count entry_bb_count) const
{
- if (!initialized_p ())
+ if (!initialized_p () || !entry_bb_count.initialized_p ())
return CGRAPH_FREQ_BASE;
if (*this == profile_count::zero ())
return 0;
sreal
profile_count::to_sreal_scale (profile_count in, bool *known) const
{
- if (!initialized_p ())
+ if (!initialized_p () || !in.initialized_p ())
{
if (known)
*known = false;
*known = true;
if (*this == profile_count::zero ())
return 0;
- gcc_checking_assert (in.initialized_p ());
if (!in.m_val)
{
/* Scaling is no-op if NUM and DEN are the same. */
if (*num == *den)
return;
- /* Scaling to zero is always zeor. */
+ /* Scaling to zero is always zero. */
if (*num == profile_count::zero ())
return;
/* If den is non-zero we are safe. */
*den = den->force_nonzero ();
*num = num->force_nonzero ();
}
+
+/* THIS is a count of bb which is known to be executed IPA times.
+ Combine this information into bb counter. This means returning IPA
+ if it is nonzero, not changing anything if IPA is uninitialized
+ and if IPA is zero, turning THIS into corresponding local profile with
+ global0. */
+profile_count
+profile_count::combine_with_ipa_count (profile_count ipa)
+{
+ ipa = ipa.ipa ();
+ if (ipa.nonzero_p ())
+ return ipa;
+ if (!ipa.initialized_p ())
+ return *this;
+ if (ipa == profile_count::zero ())
+ return this->global0 ();
+ return this->global0adjusted ();
+}
Never used by probabilities. */
profile_guessed_global0 = 1,
+ /* Same as profile_guessed_global0 but global count is adjusted 0. */
+ profile_guessed_global0adjusted = 2,
/* Profile is based on static branch prediction heuristics. It may or may
not reflect the reality but it can be compared interprocedurally
(for example, we inlined function w/o profile feedback into function
with feedback and propagated from that).
Never used by probablities. */
- profile_guessed = 2,
+ profile_guessed = 3,
/* Profile was determined by autofdo. */
- profile_afdo = 3,
+ profile_afdo = 4,
/* Profile was originally based on feedback but it was adjusted
by code duplicating optimization. It may not precisely reflect the
particular code path. */
- profile_adjusted = 4,
+ profile_adjusted = 5,
/* Profile was read from profile feedback or determined by accurate static
method. */
- profile_precise = 5
+ profile_precise = 7
};
/* The base value for branch probability notes and edge probabilities. */
{
return from_gcov_type (0);
}
+ static profile_count adjusted_zero ()
+ {
+ profile_count c;
+ c.m_val = 0;
+ c.m_quality = profile_adjusted;
+ return c;
+ }
static profile_count guessed_zero ()
{
profile_count c;
return ret;
}
- /* We know that profile is globally0 but keep local profile if present. */
+ /* We know that profile is globally 0 but keep local profile if present. */
profile_count global0 () const
{
profile_count ret = *this;
return ret;
}
+ /* We know that profile is globally adjusted 0 but keep local profile
+ if present. */
+ profile_count global0adjusted () const
+ {
+ profile_count ret = *this;
+ if (!initialized_p ())
+ return *this;
+ ret.m_quality = profile_guessed_global0adjusted;
+ return ret;
+ }
+
/* Return THIS with quality dropped to GUESSED. */
profile_count guessed () const
{
acorss functions. */
profile_count ipa () const
{
- if (m_quality > profile_guessed_global0)
+ if (m_quality > profile_guessed_global0adjusted)
return *this;
if (m_quality == profile_guessed_global0)
return profile_count::zero ();
+ if (m_quality == profile_guessed_global0adjusted)
+ return profile_count::adjusted_zero ();
return profile_count::uninitialized ();
}
kinds. */
static void adjust_for_ipa_scaling (profile_count *num, profile_count *den);
+ /* THIS is a count of bb which is known to be executed IPA times.
+ Combine this information into bb counter. This means returning IPA
+ if it is nonzero, not changing anything if IPA is uninitialized
+ and if IPA is zero, turning THIS into corresponding local profile with
+ global0. */
+ profile_count combine_with_ipa_count (profile_count ipa);
+
/* LTO streaming support. */
static profile_count stream_in (struct lto_input_block *);
void stream_out (struct output_block *);
debug stmts are left after a statement that must end the basic block. */
static bool
-copy_edges_for_bb (basic_block bb,
+copy_edges_for_bb (basic_block bb, profile_count num, profile_count den,
basic_block ret_bb, basic_block abnormal_goto_dest)
{
basic_block new_bb = (basic_block) bb->aux;
if (bb->index == ENTRY_BLOCK || bb->index == EXIT_BLOCK)
return false;
+ /* When doing function splitting, we must decreate count of the return block
+ which was previously reachable by block we did not copy. */
+ if (single_succ_p (bb) && single_succ_edge (bb)->dest->index == EXIT_BLOCK)
+ FOR_EACH_EDGE (old_edge, ei, bb->preds)
+ if (old_edge->src->index != ENTRY_BLOCK
+ && !old_edge->src->aux)
+ new_bb->count -= old_edge->count ().apply_scale (num, den);
+
for (si = gsi_start_bb (new_bb); !gsi_end_p (si);)
{
gimple *copy_stmt;
profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun);
- if (ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p ()
- && count.ipa ().initialized_p ())
- {
- ENTRY_BLOCK_PTR_FOR_FN (cfun)->count =
- ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
- ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
- EXIT_BLOCK_PTR_FOR_FN (cfun)->count =
- EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
- ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
- }
- else
- {
- ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
- = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count;
- EXIT_BLOCK_PTR_FOR_FN (cfun)->count
- = EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count;
- }
+ profile_count num = count;
+ profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count;
+ profile_count::adjust_for_ipa_scaling (&num, &den);
+
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count =
+ ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
+ ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
+ EXIT_BLOCK_PTR_FOR_FN (cfun)->count =
+ EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
+ ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
if (src_cfun->eh)
init_eh_for_function ();
another function. Walks FN via CFG, returns new fndecl. */
static tree
-copy_cfg_body (copy_body_data * id, profile_count,
+copy_cfg_body (copy_body_data * id,
basic_block entry_block_map, basic_block exit_block_map,
basic_block new_entry)
{
FOR_ALL_BB_FN (bb, cfun_to_copy)
if (!id->blocks_to_copy
|| (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index)))
- need_debug_cleanup |= copy_edges_for_bb (bb, exit_block_map,
+ need_debug_cleanup |= copy_edges_for_bb (bb, num, den, exit_block_map,
abnormal_goto_dest);
if (new_entry)
{
- edge e = make_edge (entry_block_map, (basic_block)new_entry->aux, EDGE_FALLTHRU);
+ edge e = make_edge (entry_block_map, (basic_block)new_entry->aux,
+ EDGE_FALLTHRU);
e->probability = profile_probability::always ();
}
another function. */
static tree
-copy_body (copy_body_data *id, profile_count count,
+copy_body (copy_body_data *id,
basic_block entry_block_map, basic_block exit_block_map,
basic_block new_entry)
{
/* If this body has a CFG, walk CFG and copy. */
gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (fndecl)));
- body = copy_cfg_body (id, count, entry_block_map, exit_block_map,
+ body = copy_cfg_body (id, entry_block_map, exit_block_map,
new_entry);
copy_debug_stmts (id);
function in any way before this point, as this CALL_EXPR may be
a self-referential call; if we're calling ourselves, we need to
duplicate our body before altering anything. */
- copy_body (id, cg_edge->callee->count,
- bb, return_block, NULL);
+ copy_body (id, bb, return_block, NULL);
reset_debug_bindings (id, stmt_gsi);
DECL_RESULT (new_decl) = DECL_RESULT (old_decl);
DECL_ARGUMENTS (new_decl) = DECL_ARGUMENTS (old_decl);
initialize_cfun (new_decl, old_decl,
- old_entry_block->count);
+ new_entry ? new_entry->count : old_entry_block->count);
if (DECL_STRUCT_FUNCTION (new_decl)->gimple_df)
DECL_STRUCT_FUNCTION (new_decl)->gimple_df->ipa_pta
= id.src_cfun->gimple_df->ipa_pta;
}
/* Copy the Function's body. */
- copy_body (&id, old_entry_block->count,
- ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun),
+ copy_body (&id, ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun),
new_entry);
/* Renumber the lexical scoping (non-code) blocks consecutively. */