/* Branch prediction routines for the GNU compiler.
- Copyright (C) 2000-2017 Free Software Foundation, Inc.
+ Copyright (C) 2000-2018 Free Software Foundation, Inc.
This file is part of GCC.
#include "ipa-utils.h"
#include "gimple-pretty-print.h"
#include "selftest.h"
+#include "cfgrtl.h"
+#include "stringpool.h"
+#include "attribs.h"
/* Enum with reasons why a predictor is ignored. */
};
#undef DEF_PREDICTOR
-/* Return TRUE if frequency FREQ is considered to be hot. */
-
-static inline bool
-maybe_hot_frequency_p (struct function *fun, int freq)
-{
- struct cgraph_node *node = cgraph_node::get (fun->decl);
- if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
- {
- if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
- return false;
- if (node->frequency == NODE_FREQUENCY_HOT)
- return true;
- }
- if (profile_status_for_fn (fun) == PROFILE_ABSENT)
- return true;
- if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
- && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
- return false;
- if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
- return false;
- if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)
- < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
- return false;
- return true;
-}
-
static gcov_type min_count = -1;
/* Determine the threshold for hot BB counts. */
/* Return TRUE if frequency FREQ is considered to be hot. */
bool
-maybe_hot_count_p (struct function *, profile_count count)
+maybe_hot_count_p (struct function *fun, profile_count count)
{
if (!count.initialized_p ())
return true;
+ if (count.ipa () == profile_count::zero ())
+ return false;
+ if (!count.ipa_p ())
+ {
+ struct cgraph_node *node = cgraph_node::get (fun->decl);
+ if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
+ {
+ if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
+ return false;
+ if (node->frequency == NODE_FREQUENCY_HOT)
+ return true;
+ }
+ if (profile_status_for_fn (fun) == PROFILE_ABSENT)
+ return true;
+ if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
+ && count < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (2, 3)))
+ return false;
+ if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
+ return false;
+ if (count.apply_scale (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION), 1)
+ < ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
+ return false;
+ return true;
+ }
/* Code executed at most once is not hot. */
if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
return false;
maybe_hot_bb_p (struct function *fun, const_basic_block bb)
{
gcc_checking_assert (fun);
- if (!maybe_hot_count_p (fun, bb->count))
- return false;
- return maybe_hot_frequency_p (fun, bb->frequency);
+ return maybe_hot_count_p (fun, bb->count);
}
/* Return true in case BB can be CPU intensive and should be optimized
bool
maybe_hot_edge_p (edge e)
{
- if (!maybe_hot_count_p (cfun, e->count))
- return false;
- return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
+ return maybe_hot_count_p (cfun, e->count ());
}
/* Return true if profile COUNT and FREQUENCY, or function FUN static
static bool
probably_never_executed (struct function *fun,
- profile_count count, int)
+ profile_count count)
{
gcc_checking_assert (fun);
- if (count == profile_count::zero ())
+ if (count.ipa () == profile_count::zero ())
return true;
- if (count.initialized_p () && profile_status_for_fn (fun) == PROFILE_READ)
+ /* Do not trust adjusted counts. This will make us to drop int cold section
+ code with low execution count as a result of inlining. These low counts
+ are not safe even with read profile and may lead us to dropping
+ code which actually gets executed into cold section of binary that is not
+ desirable. */
+ if (count.precise_p () && profile_status_for_fn (fun) == PROFILE_READ)
{
int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs)
bool
probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
{
- return probably_never_executed (fun, bb->count, bb->frequency);
+ return probably_never_executed (fun, bb->count);
}
static bool
unlikely_executed_edge_p (edge e)
{
- return e->count == profile_count::zero ()
+ return (e->count () == profile_count::zero ()
+ || e->probability == profile_probability::never ())
|| (e->flags & (EDGE_EH | EDGE_FAKE));
}
bool
probably_never_executed_edge_p (struct function *fun, edge e)
{
- if (e->count.initialized_p ())
- unlikely_executed_edge_p (e);
- return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
+ if (unlikely_executed_edge_p (e))
+ return true;
+ return probably_never_executed (fun, e->count ());
}
/* Return true when current function should always be optimized for size. */
bool
predictable_edge_p (edge e)
{
- if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
+ if (!e->probability.initialized_p ())
return false;
- if ((e->probability
+ if ((e->probability.to_reg_br_prob_base ()
<= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
- || (REG_BR_PROB_BASE - e->probability
+ || (REG_BR_PROB_BASE - e->probability.to_reg_br_prob_base ()
<= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
return true;
return false;
return false;
}
-/* Return true when the probability of edge is reliable.
-
- The profile guessing code is good at predicting branch outcome (ie.
- taken/not taken), that is predicted right slightly over 75% of time.
- It is however notoriously poor on predicting the probability itself.
- In general the profile appear a lot flatter (with probabilities closer
- to 50%) than the reality so it is bad idea to use it to drive optimization
- such as those disabling dynamic branch prediction for well predictable
- branches.
-
- There are two exceptions - edges leading to noreturn edges and edges
- predicted by number of iterations heuristics are predicted well. This macro
- should be able to distinguish those, but at the moment it simply check for
- noreturn heuristic that is only one giving probability over 99% or bellow
- 1%. In future we might want to propagate reliability information across the
- CFG if we find this information useful on multiple places. */
-static bool
-probability_reliable_p (int prob)
-{
- return (profile_status_for_fn (cfun) == PROFILE_READ
- || (profile_status_for_fn (cfun) == PROFILE_GUESSED
- && (prob <= HITRATE (1) || prob >= HITRATE (99))));
-}
-
/* Same predicate as above, working on edges. */
bool
edge_probability_reliable_p (const_edge e)
{
- return probability_reliable_p (e->probability);
+ return e->probability.probably_reliable_p ();
}
/* Same predicate as edge_probability_reliable_p, working on notes. */
br_prob_note_reliable_p (const_rtx note)
{
gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
- return probability_reliable_p (XINT (note, 0));
+ return profile_probability::from_reg_br_prob_note
+ (XINT (note, 0)).probably_reliable_p ();
}
static void
enum prediction taken)
{
int probability = predictor_info[(int) predictor].hitrate;
+ gcc_assert (probability != PROB_UNINITIALIZED);
if (taken != TAKEN)
probability = REG_BR_PROB_BASE - probability;
for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
if (REG_NOTE_KIND (note) == REG_BR_PROB)
- XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
+ XINT (note, 0) = profile_probability::from_reg_br_prob_note
+ (XINT (note, 0)).invert ().to_reg_br_prob_note ();
else if (REG_NOTE_KIND (note) == REG_BR_PRED)
XEXP (XEXP (note, 0), 1)
= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
if (e)
{
fprintf (file, " hit ");
- e->count.dump (file);
- fprintf (file, " (%.1f%%)", e->count.to_gcov_type() * 100.0
+ e->count ().dump (file);
+ fprintf (file, " (%.1f%%)", e->count ().to_gcov_type() * 100.0
/ bb->count.to_gcov_type ());
}
}
fprintf (file, "\n");
+
+ /* Print output that be easily read by analyze_brprob.py script. We are
+ interested only in counts that are read from GCDA files. */
+ if (dump_file && (dump_flags & TDF_DETAILS)
+ && bb->count.precise_p ()
+ && reason == REASON_NONE)
+ {
+ gcc_assert (e->count ().precise_p ());
+ fprintf (file, ";;heuristics;%s;%" PRId64 ";%" PRId64 ";%.1f;\n",
+ predictor_info[predictor].name,
+ bb->count.to_gcov_type (), e->count ().to_gcov_type (),
+ probability * 100.0 / REG_BR_PROB_BASE);
+ }
}
/* Return true if STMT is known to be unlikely executed. */
{
if (!is_gimple_call (stmt))
return false;
- /* NORETURN attribute enough is not strong enough: exit() may be quite
+ /* NORETURN attribute alone is not strong enough: exit() may be quite
likely executed once during program run. */
if (gimple_call_fntype (stmt)
&& lookup_attribute ("cold",
cgraph_node *n = cgraph_node::get (decl);
if (!n)
return false;
- enum availability avail;
+
+ availability avail;
n = n->ultimate_alias_target (&avail);
if (avail < AVAIL_AVAILABLE)
- return NULL;
+ return false;
if (!n->analyzed
|| n->decl == current_function_decl)
return false;
set_even_probabilities (basic_block bb,
hash_set<edge> *unlikely_edges = NULL)
{
- unsigned nedges = 0;
+ unsigned nedges = 0, unlikely_count = 0;
edge e = NULL;
edge_iterator ei;
+ profile_probability all = profile_probability::always ();
FOR_EACH_EDGE (e, ei, bb->succs)
- if (!unlikely_executed_edge_p (e))
- nedges ++;
+ if (e->probability.initialized_p ())
+ all -= e->probability;
+ else if (!unlikely_executed_edge_p (e))
+ {
+ nedges ++;
+ if (unlikely_edges != NULL && unlikely_edges->contains (e))
+ {
+ all -= profile_probability::very_unlikely ();
+ unlikely_count++;
+ }
+ }
/* Make the distribution even if all edges are unlikely. */
- unsigned unlikely_count = unlikely_edges ? unlikely_edges->elements () : 0;
if (unlikely_count == nedges)
{
unlikely_edges = NULL;
unsigned c = nedges - unlikely_count;
FOR_EACH_EDGE (e, ei, bb->succs)
- if (!unlikely_executed_edge_p (e))
+ if (e->probability.initialized_p ())
+ ;
+ else if (!unlikely_executed_edge_p (e))
{
if (unlikely_edges != NULL && unlikely_edges->contains (e))
- e->probability = PROB_VERY_UNLIKELY;
+ e->probability = profile_probability::very_unlikely ();
else
- e->probability = (REG_BR_PROB_BASE + c / 2) / c;
+ e->probability = all.apply_scale (1, c).guessed ();
}
else
- e->probability = 0;
+ e->probability = profile_probability::never ();
+}
+
+/* Add REG_BR_PROB note to JUMP with PROB. */
+
+void
+add_reg_br_prob_note (rtx_insn *jump, profile_probability prob)
+{
+ gcc_checking_assert (JUMP_P (jump) && !find_reg_note (jump, REG_BR_PROB, 0));
+ add_int_reg_note (jump, REG_BR_PROB, prob.to_reg_br_prob_note ());
}
/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
if (!prob_note)
{
- add_int_reg_note (insn, REG_BR_PROB, combined_probability);
+ profile_probability p
+ = profile_probability::from_reg_br_prob_base (combined_probability);
+ add_reg_br_prob_note (insn, p);
/* Save the prediction into CFG in case we are seeing non-degenerated
conditional jump. */
if (!single_succ_p (bb))
{
- BRANCH_EDGE (bb)->probability = combined_probability;
+ BRANCH_EDGE (bb)->probability = p;
FALLTHRU_EDGE (bb)->probability
- = REG_BR_PROB_BASE - combined_probability;
+ = BRANCH_EDGE (bb)->probability.invert ();
}
}
else if (!single_succ_p (bb))
{
- int prob = XINT (prob_note, 0);
+ profile_probability prob = profile_probability::from_reg_br_prob_note
+ (XINT (prob_note, 0));
BRANCH_EDGE (bb)->probability = prob;
- FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
+ FALLTHRU_EDGE (bb)->probability = prob.invert ();
}
else
- single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
+ single_succ_edge (bb)->probability = profile_probability::always ();
}
/* Edge prediction hash traits. */
int nedges = 0;
edge e, first = NULL, second = NULL;
edge_iterator ei;
+ int nzero = 0;
+ int nunknown = 0;
FOR_EACH_EDGE (e, ei, bb->succs)
- if (!unlikely_executed_edge_p (e))
- {
- nedges ++;
- if (first && !second)
- second = e;
- if (!first)
- first = e;
- }
+ {
+ if (!unlikely_executed_edge_p (e))
+ {
+ nedges ++;
+ if (first && !second)
+ second = e;
+ if (!first)
+ first = e;
+ }
+ else if (!e->probability.initialized_p ())
+ e->probability = profile_probability::never ();
+ if (!e->probability.initialized_p ())
+ nunknown++;
+ else if (e->probability == profile_probability::never ())
+ nzero++;
+ }
/* When there is no successor or only one choice, prediction is easy.
if (pred->ep_probability <= PROB_VERY_UNLIKELY)
unlikely_edges.add (pred->ep_edge);
- if (!bb->count.initialized_p () && !dry_run)
+ if (!dry_run)
set_even_probabilities (bb, &unlikely_edges);
clear_bb_predictions (bb);
if (dump_file)
nedges, bb->index);
FOR_EACH_EDGE (e, ei, bb->succs)
if (!unlikely_executed_edge_p (e))
- dump_prediction (dump_file, PRED_COMBINED, e->probability,
- bb, REASON_NONE, e);
+ dump_prediction (dump_file, PRED_COMBINED,
+ e->probability.to_reg_br_prob_base (), bb, REASON_NONE, e);
}
}
return;
}
clear_bb_predictions (bb);
- if (!bb->count.initialized_p () && !dry_run)
+
+ /* If we have only one successor which is unknown, we can compute missing
+ probablity. */
+ if (nunknown == 1)
{
- first->probability = combined_probability;
- second->probability = REG_BR_PROB_BASE - combined_probability;
+ profile_probability prob = profile_probability::always ();
+ edge missing = NULL;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (e->probability.initialized_p ())
+ prob -= e->probability;
+ else if (missing == NULL)
+ missing = e;
+ else
+ gcc_unreachable ();
+ missing->probability = prob;
+ }
+ /* If nothing is unknown, we have nothing to update. */
+ else if (!nunknown && nzero != (int)EDGE_COUNT (bb->succs))
+ ;
+ else if (!dry_run)
+ {
+ first->probability
+ = profile_probability::from_reg_br_prob_base (combined_probability);
+ second->probability = first->probability.invert ();
}
}
return PRED_NO_PREDICTION;
}
+/* Return zero if phi result could have values other than -1, 0 or 1,
+ otherwise return a bitmask, with bits 0, 1 and 2 set if -1, 0 and 1
+ values are used or likely. */
+
+static int
+zero_one_minusone (gphi *phi, int limit)
+{
+ int phi_num_args = gimple_phi_num_args (phi);
+ int ret = 0;
+ for (int i = 0; i < phi_num_args; i++)
+ {
+ tree t = PHI_ARG_DEF (phi, i);
+ if (TREE_CODE (t) != INTEGER_CST)
+ continue;
+ wide_int w = wi::to_wide (t);
+ if (w == -1)
+ ret |= 1;
+ else if (w == 0)
+ ret |= 2;
+ else if (w == 1)
+ ret |= 4;
+ else
+ return 0;
+ }
+ for (int i = 0; i < phi_num_args; i++)
+ {
+ tree t = PHI_ARG_DEF (phi, i);
+ if (TREE_CODE (t) == INTEGER_CST)
+ continue;
+ if (TREE_CODE (t) != SSA_NAME)
+ return 0;
+ gimple *g = SSA_NAME_DEF_STMT (t);
+ if (gimple_code (g) == GIMPLE_PHI && limit > 0)
+ if (int r = zero_one_minusone (as_a <gphi *> (g), limit - 1))
+ {
+ ret |= r;
+ continue;
+ }
+ if (!is_gimple_assign (g))
+ return 0;
+ if (gimple_assign_cast_p (g))
+ {
+ tree rhs1 = gimple_assign_rhs1 (g);
+ if (TREE_CODE (rhs1) != SSA_NAME
+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
+ || TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
+ || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+ return 0;
+ ret |= (2 | 4);
+ continue;
+ }
+ if (TREE_CODE_CLASS (gimple_assign_rhs_code (g)) != tcc_comparison)
+ return 0;
+ ret |= (2 | 4);
+ }
+ return ret;
+}
+
/* Find the basic block with return expression and look up for possible
return value trying to apply RETURN_PREDICTION heuristics. */
static void
phi_num_args = gimple_phi_num_args (phi);
pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
+ /* Avoid the case where the function returns -1, 0 and 1 values and
+ nothing else. Those could be qsort etc. comparison functions
+ where the negative return isn't less probable than positive.
+ For this require that the function returns at least -1 or 1
+ or -1 and a boolean value or comparison result, so that functions
+ returning just -1 and 0 are treated as if -1 represents error value. */
+ if (INTEGRAL_TYPE_P (TREE_TYPE (return_val))
+ && !TYPE_UNSIGNED (TREE_TYPE (return_val))
+ && TYPE_PRECISION (TREE_TYPE (return_val)) > 1)
+ if (int r = zero_one_minusone (phi, 3))
+ if ((r & (1 | 4)) == (1 | 4))
+ return;
+
/* Avoid the degenerate case where all return values form the function
belongs to same category (ie they are all positive constants)
so we can hardly say something about them. */
{
edge e;
edge_iterator ei;
- gimple *last;
FOR_EACH_EDGE (e, ei, bb->succs)
{
- /* Predict edges to user labels with attributes. */
- if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
- {
- gimple_stmt_iterator gi;
- for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
- {
- glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
- tree decl;
-
- if (!label_stmt)
- break;
- decl = gimple_label_label (label_stmt);
- if (DECL_ARTIFICIAL (decl))
- continue;
-
- /* Finally, we have a user-defined label. */
- if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
- predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
- else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
- predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
- }
- }
-
- /* Predict early returns to be probable, as we've already taken
- care for error returns and other cases are often used for
- fast paths through function.
-
- Since we've already removed the return statements, we are
- looking for CFG like:
-
- if (conditional)
- {
- ..
- goto return_block
- }
- some other blocks
- return_block:
- return_stmt. */
- if (e->dest != bb->next_bb
- && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
- && single_succ_p (e->dest)
- && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
- && (last = last_stmt (e->dest)) != NULL
- && gimple_code (last) == GIMPLE_RETURN)
- {
- edge e1;
- edge_iterator ei1;
-
- if (single_succ_p (bb))
- {
- FOR_EACH_EDGE (e1, ei1, bb->preds)
- if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
- && !predicted_by_p (e1->src, PRED_CONST_RETURN)
- && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
- predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
- }
- else
- if (!predicted_by_p (e->src, PRED_NULL_RETURN)
- && !predicted_by_p (e->src, PRED_CONST_RETURN)
- && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
- predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
- }
-
/* Look for block we are guarding (ie we dominate it,
but it doesn't postdominate us). */
if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
BLOCK_INFO (bb)->npredecessors = count;
/* When function never returns, we will never process exit block. */
if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
- {
- bb->count = profile_count::zero ();
- bb->frequency = 0;
- }
+ bb->count = profile_count::zero ();
}
BLOCK_INFO (head)->frequency = 1;
* BLOCK_INFO (e->src)->frequency /
REG_BR_PROB_BASE); */
- sreal tmp = e->probability;
+ /* FIXME: Graphite is producing edges with no profile. Once
+ this is fixed, drop this. */
+ sreal tmp = e->probability.initialized_p () ?
+ e->probability.to_reg_br_prob_base () : 0;
tmp *= BLOCK_INFO (e->src)->frequency;
tmp *= real_inv_br_prob_base;
frequency += tmp;
= ((e->probability * BLOCK_INFO (bb)->frequency)
/ REG_BR_PROB_BASE); */
- sreal tmp = e->probability;
+ /* FIXME: Graphite is producing edges with no profile. Once
+ this is fixed, drop this. */
+ sreal tmp = e->probability.initialized_p () ?
+ e->probability.to_reg_br_prob_base () : 0;
tmp *= BLOCK_INFO (bb)->frequency;
EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
}
node->dump_name ());
}
+ basic_block bb;
+ if (opt_for_fn (node->decl, flag_guess_branch_prob))
+ {
+ bool clear_zeros
+ = !ENTRY_BLOCK_PTR_FOR_FN (fn)->count.nonzero_p ();
+ FOR_ALL_BB_FN (bb, fn)
+ if (clear_zeros || !(bb->count == profile_count::zero ()))
+ bb->count = bb->count.guessed_local ();
+ fn->cfg->count_max = fn->cfg->count_max.guessed_local ();
+ }
+ else
+ {
+ FOR_ALL_BB_FN (bb, fn)
+ bb->count = profile_count::uninitialized ();
+ fn->cfg->count_max = profile_count::uninitialized ();
+ }
+
+ struct cgraph_edge *e;
+ for (e = node->callees; e; e = e->next_callee)
+ e->count = gimple_bb (e->call_stmt)->count;
+ for (e = node->indirect_calls; e; e = e->next_callee)
+ e->count = gimple_bb (e->call_stmt)->count;
+ node->count = ENTRY_BLOCK_PTR_FOR_FN (fn)->count;
+
profile_status_for_fn (fn)
= (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
node->frequency
gcov_type max_tp_first_run = 0;
struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
- if (!(node->count == profile_count::zero ()))
+ if (node->count.ipa ().nonzero_p ())
continue;
for (e = node->callers; e; e = e->next_caller)
- {
- if (e->count.initialized_p () > 0)
+ if (e->count.ipa ().initialized_p () && e->count.ipa () > 0)
{
- call_count = call_count + e->count;
+ call_count = call_count + e->count.ipa ();
if (e->caller->tp_first_run > max_tp_first_run)
max_tp_first_run = e->caller->tp_first_run;
}
- }
/* If time profile is missing, let assign the maximum that comes from
caller functions. */
if (call_count > 0
&& fn && fn->cfg
- && (call_count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs))
+ && (call_count.apply_scale (unlikely_count_fraction, 1)
+ >= profile_info->runs))
{
drop_profile (node, call_count);
worklist.safe_push (node);
struct cgraph_node *callee = e->callee;
struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
- if (callee->count > 0)
+ if (!(e->count.ipa () == profile_count::zero ())
+ && callee->count.ipa ().nonzero_p ())
continue;
- if (DECL_COMDAT (callee->decl) && fn && fn->cfg
+ if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl))
+ && fn && fn->cfg
&& profile_status_for_fn (fn) == PROFILE_READ)
{
drop_profile (node, profile_count::zero ());
Return nonzero iff there was any nonzero execution count. */
bool
-counts_to_freqs (void)
+update_max_bb_count (void)
{
- gcov_type count_max;
- profile_count true_count_max = profile_count::zero ();
+ profile_count true_count_max = profile_count::uninitialized ();
basic_block bb;
- /* Don't overwrite the estimated frequencies when the profile for
- the function is missing. We may drop this function PROFILE_GUESSED
- later in drop_profile (). */
- if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()
- || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
- return 0;
-
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
- if (bb->count > true_count_max)
- true_count_max = bb->count;
-
- count_max = MAX (true_count_max.to_gcov_type (), 1);
+ true_count_max = true_count_max.max (bb->count);
- FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
- if (bb->count.initialized_p ())
- bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max);
+ cfun->cfg->count_max = true_count_max;
- return !(true_count_max == profile_count::zero ());
+ return true_count_max.ipa ().nonzero_p ();
}
/* Return true if function is likely to be expensive, so there is no point to
bool
expensive_function_p (int threshold)
{
- unsigned int sum = 0;
basic_block bb;
- unsigned int limit;
- /* We can not compute accurately for large thresholds due to scaled
- frequencies. */
- gcc_assert (threshold <= BB_FREQ_MAX);
-
- /* Frequencies are out of range. This either means that function contains
- internal loop executing more than BB_FREQ_MAX times or profile feedback
- is available and function has not been executed at all. */
- if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
+ /* If profile was scaled in a way entry block has count 0, then the function
+ is deifnitly taking a lot of time. */
+ if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.nonzero_p ())
return true;
- /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
- limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
+ profile_count limit = ENTRY_BLOCK_PTR_FOR_FN
+ (cfun)->count.apply_scale (threshold, 1);
+ profile_count sum = profile_count::zero ();
FOR_EACH_BB_FN (bb, cfun)
{
rtx_insn *insn;
+ if (!bb->count.initialized_p ())
+ {
+ if (dump_file)
+ fprintf (dump_file, "Function is considered expensive because"
+ " count of bb %i is not initialized\n", bb->index);
+ return true;
+ }
+
FOR_BB_INSNS (bb, insn)
if (active_insn_p (insn))
{
- sum += bb->frequency;
+ sum += bb->count;
if (sum > limit)
return true;
}
return false;
}
-/* Determine basic blocks/edges that are known to be unlikely executed and set
- their counters to zero.
- This is done with first identifying obviously unlikely BBs/edges and then
- propagating in both directions. */
+/* All basic blocks that are reachable only from unlikely basic blocks are
+ unlikely. */
-static void
-determine_unlikely_bbs ()
+void
+propagate_unlikely_bbs_forward (void)
{
- basic_block bb;
auto_vec<basic_block, 64> worklist;
+ basic_block bb;
edge_iterator ei;
edge e;
- FOR_EACH_BB_FN (bb, cfun)
- {
- if (!(bb->count == profile_count::zero ())
- && unlikely_executed_bb_p (bb))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Basic block %i is locally unlikely\n",
- bb->index);
- bb->count = profile_count::zero ();
- }
-
- if (bb->count == profile_count::zero ())
- {
- bb->frequency = 0;
- FOR_EACH_EDGE (e, ei, bb->preds)
- e->count = profile_count::zero ();
- }
-
- FOR_EACH_EDGE (e, ei, bb->succs)
- if (!(e->count == profile_count::zero ())
- && unlikely_executed_edge_p (e))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Edge %i->%i is locally unlikely\n",
- bb->index, e->dest->index);
- e->count = profile_count::zero ();
- }
-
- gcc_checking_assert (!bb->aux);
- }
-
if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()))
{
ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(size_t) 1;
{
bb = worklist.pop ();
FOR_EACH_EDGE (e, ei, bb->succs)
- if (!(e->count == profile_count::zero ())
+ if (!(e->count () == profile_count::zero ())
&& !(e->dest->count == profile_count::zero ())
&& !e->dest->aux)
{
"Basic block %i is marked unlikely by forward prop\n",
bb->index);
bb->count = profile_count::zero ();
- bb->frequency = 0;
- FOR_EACH_EDGE (e, ei, bb->succs)
- e->count = profile_count::zero ();
}
else
bb->aux = NULL;
}
+}
+
+/* Determine basic blocks/edges that are known to be unlikely executed and set
+ their counters to zero.
+ This is done with first identifying obviously unlikely BBs/edges and then
+ propagating in both directions. */
+
+static void
+determine_unlikely_bbs ()
+{
+ basic_block bb;
+ auto_vec<basic_block, 64> worklist;
+ edge_iterator ei;
+ edge e;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (!(bb->count == profile_count::zero ())
+ && unlikely_executed_bb_p (bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Basic block %i is locally unlikely\n",
+ bb->index);
+ bb->count = profile_count::zero ();
+ }
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (!(e->probability == profile_probability::never ())
+ && unlikely_executed_edge_p (e))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Edge %i->%i is locally unlikely\n",
+ bb->index, e->dest->index);
+ e->probability = profile_probability::never ();
+ }
+
+ gcc_checking_assert (!bb->aux);
+ }
+ propagate_unlikely_bbs_forward ();
auto_vec<int, 64> nsuccs;
nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun));
{
nsuccs[bb->index] = 0;
FOR_EACH_EDGE (e, ei, bb->succs)
- if (!(e->count == profile_count::zero ()))
+ if (!(e->probability == profile_probability::never ())
+ && !(e->dest->count == profile_count::zero ()))
nsuccs[bb->index]++;
if (!nsuccs[bb->index])
worklist.safe_push (bb);
while (worklist.length () > 0)
{
bb = worklist.pop ();
+ if (bb->count == profile_count::zero ())
+ continue;
if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
{
bool found = false;
if (found)
continue;
}
- if (!(bb->count == profile_count::zero ())
- && (dump_file && (dump_flags & TDF_DETAILS)))
+ if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"Basic block %i is marked unlikely by backward prop\n",
bb->index);
bb->count = profile_count::zero ();
- bb->frequency = 0;
FOR_EACH_EDGE (e, ei, bb->preds)
- if (!(e->count == profile_count::zero ()))
+ if (!(e->probability == profile_probability::never ()))
{
- e->count = profile_count::zero ();
if (!(e->src->count == profile_count::zero ()))
{
+ gcc_checking_assert (nsuccs[e->src->index] > 0);
nsuccs[e->src->index]--;
if (!nsuccs[e->src->index])
worklist.safe_push (e->src);
}
}
}
+ /* Finally all edges from non-0 regions to 0 are unlikely. */
+ FOR_ALL_BB_FN (bb, cfun)
+ if (!(bb->count == profile_count::zero ()))
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (!(e->probability == profile_probability::never ())
+ && e->dest->count == profile_count::zero ())
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Edge %i->%i is unlikely because "
+ "it enters unlikely block\n",
+ bb->index, e->dest->index);
+ e->probability = profile_probability::never ();
+ }
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
+ cgraph_node::get (current_function_decl)->count = profile_count::zero ();
}
/* Estimate and propagate basic block frequencies using the given branch
determine_unlikely_bbs ();
if (force || profile_status_for_fn (cfun) != PROFILE_READ
- || !counts_to_freqs ())
+ || !update_max_bb_count ())
{
static int real_values_initialized = 0;
{
real_values_initialized = 1;
real_br_prob_base = REG_BR_PROB_BASE;
- real_bb_freq_max = BB_FREQ_MAX;
+ /* Scaling frequencies up to maximal profile count may result in
+ frequent overflows especially when inlining loops.
+ Small scalling results in unnecesary precision loss. Stay in
+ the half of the (exponential) range. */
+ real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
real_one_half = sreal (1, -1);
real_inv_br_prob_base = sreal (1) / real_br_prob_base;
real_almost_one = sreal (1) - real_inv_br_prob_base;
mark_dfs_back_edges ();
single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
- REG_BR_PROB_BASE;
+ profile_probability::always ();
/* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (block_info));
FOR_EACH_EDGE (e, ei, bb->succs)
{
- EDGE_INFO (e)->back_edge_prob = e->probability;
+ /* FIXME: Graphite is producing edges with no profile. Once
+ this is fixed, drop this. */
+ if (e->probability.initialized_p ())
+ EDGE_INFO (e)->back_edge_prob
+ = e->probability.to_reg_br_prob_base ();
+ else
+ EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2;
EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
}
}
freq_max = BLOCK_INFO (bb)->frequency;
freq_max = real_bb_freq_max / freq_max;
+ if (freq_max < 16)
+ freq_max = 16;
+ profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
+ cfun->cfg->count_max = profile_count::uninitialized ();
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
{
sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
- bb->frequency = tmp.to_int ();
+ profile_count count = profile_count::from_gcov_type (tmp.to_int ());
+
+ /* If we have profile feedback in which this function was never
+ executed, then preserve this info. */
+ if (!(bb->count == profile_count::zero ()))
+ bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count);
+ cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
}
free_aux_for_blocks ();
if (profile_status_for_fn (cfun) != PROFILE_READ)
{
int flags = flags_from_decl_or_type (current_function_decl);
- if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()
+ if ((ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ()
+ && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ())
|| lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
!= NULL)
- node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
+ {
+ node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
+ warn_function_cold (current_function_decl);
+ }
else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
!= NULL)
node->frequency = NODE_FREQUENCY_HOT;
return;
}
- /* Only first time try to drop function into unlikely executed.
- After inlining the roundoff errors may confuse us.
- Ipa-profile pass will drop functions only called from unlikely
- functions to unlikely and that is most of what we care about. */
- if (!cfun->after_inlining)
- node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
+ node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
+ warn_function_cold (current_function_decl);
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ())
+ return;
FOR_EACH_BB_FN (bb, cfun)
{
if (maybe_hot_bb_p (cfun, bb))
{
struct loop *loop;
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
- if (loop->header->frequency)
+ if (loop->header->count.initialized_p ())
fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n",
loop->num,
(int)expected_loop_iterations_unbounded (loop));
which may also lead to frequencies incorrectly reduced to 0. There
is less precision in the probabilities, so we only do this for small
max counts. */
- profile_count count_max = profile_count::zero ();
+ cfun->cfg->count_max = profile_count::uninitialized ();
basic_block bb;
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
- if (bb->count > count_max)
- count_max = bb->count;
+ cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
- if (profile_status_for_fn (cfun) == PROFILE_GUESSED
- || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
- && count_max < REG_BR_PROB_BASE / 10))
+ if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
{
loop_optimizer_init (0);
add_noreturn_fake_exit_edges ();
loop_optimizer_finalize ();
}
else if (profile_status_for_fn (cfun) == PROFILE_READ)
- counts_to_freqs ();
+ update_max_bb_count ();
else
gcc_unreachable ();
timevar_pop (TV_REBUILD_FREQUENCIES);
force_edge_cold (edge e, bool impossible)
{
profile_count count_sum = profile_count::zero ();
- int prob_sum = 0;
+ profile_probability prob_sum = profile_probability::never ();
edge_iterator ei;
edge e2;
- profile_count old_count = e->count;
- int old_probability = e->probability;
- int prob_scale = REG_BR_PROB_BASE;
+ bool uninitialized_exit = false;
+
+ /* When branch probability guesses are not known, then do nothing. */
+ if (!impossible && !e->count ().initialized_p ())
+ return;
+
+ profile_probability goal = (impossible ? profile_probability::never ()
+ : profile_probability::very_unlikely ());
/* If edge is already improbably or cold, just return. */
- if (e->probability <= (impossible ? PROB_VERY_UNLIKELY : 0)
- && (!impossible || e->count == profile_count::zero ()))
+ if (e->probability <= goal
+ && (!impossible || e->count () == profile_count::zero ()))
return;
FOR_EACH_EDGE (e2, ei, e->src->succs)
if (e2 != e)
{
- if (e2->count.initialized_p ())
- count_sum += e2->count;
- prob_sum += e2->probability;
+ if (e->flags & EDGE_FAKE)
+ continue;
+ if (e2->count ().initialized_p ())
+ count_sum += e2->count ();
+ if (e2->probability.initialized_p ())
+ prob_sum += e2->probability;
+ else
+ uninitialized_exit = true;
}
+ /* If we are not guessing profiles but have some other edges out,
+ just assume the control flow goes elsewhere. */
+ if (uninitialized_exit)
+ e->probability = goal;
/* If there are other edges out of e->src, redistribute probabilitity
there. */
- if (prob_sum)
- {
- e->probability
- = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
- if (impossible)
- e->count = profile_count::zero ();
- if (old_probability)
- e->count = e->count.apply_scale (e->probability, old_probability);
- else
- e->count = e->count.apply_scale (1, REG_BR_PROB_BASE);
+ else if (prob_sum > profile_probability::never ())
+ {
+ if (!(e->probability < goal))
+ e->probability = goal;
+
+ profile_probability prob_comp = prob_sum / e->probability.invert ();
- prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE,
- prob_sum);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Making edge %i->%i %s by redistributing "
"probability to other edges.\n",
e->src->index, e->dest->index,
impossible ? "impossible" : "cold");
- profile_count count_sum2 = count_sum + old_count - e->count;
FOR_EACH_EDGE (e2, ei, e->src->succs)
if (e2 != e)
{
- if (count_sum > 0)
- e2->count.apply_scale (count_sum2, count_sum);
- e2->probability = RDIV (e2->probability * prob_scale,
- REG_BR_PROB_BASE);
+ e2->probability /= prob_comp;
}
+ if (current_ir_type () != IR_GIMPLE
+ && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
+ update_br_prob_note (e->src);
}
/* If all edges out of e->src are unlikely, the basic block itself
is unlikely. */
else
{
- e->probability = REG_BR_PROB_BASE;
+ if (prob_sum == profile_probability::never ())
+ e->probability = profile_probability::always ();
+ else
+ {
+ if (impossible)
+ e->probability = profile_probability::never ();
+ /* If BB has some edges out that are not impossible, we can not
+ assume that BB itself is. */
+ impossible = false;
+ }
+ if (current_ir_type () != IR_GIMPLE
+ && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
+ update_br_prob_note (e->src);
+ if (e->src->count == profile_count::zero ())
+ return;
+ if (count_sum == profile_count::zero () && impossible)
+ {
+ bool found = false;
+ if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
+ ;
+ else if (current_ir_type () == IR_GIMPLE)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (e->src);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
+ {
+ found = true;
+ break;
+ }
+ }
+ /* FIXME: Implement RTL path. */
+ else
+ found = true;
+ if (!found)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Making bb %i impossible and dropping count to 0.\n",
+ e->src->index);
+ e->src->count = profile_count::zero ();
+ FOR_EACH_EDGE (e2, ei, e->src->preds)
+ force_edge_cold (e2, impossible);
+ return;
+ }
+ }
/* If we did not adjusting, the source basic block has no likely edeges
leaving other direction. In that case force that bb cold, too.
This in general is difficult task to do, but handle special case when
BB has only one predecestor. This is common case when we are updating
after loop transforms. */
- if (!prob_sum && count_sum == profile_count::zero ()
- && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1))
+ if (!(prob_sum > profile_probability::never ())
+ && count_sum == profile_count::zero ()
+ && single_pred_p (e->src) && e->src->count.to_frequency (cfun)
+ > (impossible ? 0 : 1))
{
- int old_frequency = e->src->frequency;
+ int old_frequency = e->src->count.to_frequency (cfun);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
impossible ? "impossible" : "cold");
- e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
+ int new_frequency = MIN (e->src->count.to_frequency (cfun),
+ impossible ? 0 : 1);
if (impossible)
- e->src->count = e->count = profile_count::zero ();
+ e->src->count = profile_count::zero ();
else
- e->src->count = e->count = e->count.apply_scale (e->src->frequency,
- old_frequency);
+ e->src->count = e->count ().apply_scale (new_frequency,
+ old_frequency);
force_edge_cold (single_pred_edge (e->src), impossible);
}
else if (dump_file && (dump_flags & TDF_DETAILS)
struct branch_predictor
{
const char *name;
- unsigned probability;
+ int probability;
};
#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE },
{
branch_predictor predictors[] = {
#include "predict.def"
- {NULL, -1}
+ {NULL, -1U}
};
for (unsigned i = 0; predictors[i].name != NULL; i++)
{
+ if (predictors[i].probability == PROB_UNINITIALIZED)
+ continue;
+
unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE;
- ASSERT_TRUE (p > 50 && p <= 100);
+ ASSERT_TRUE (p >= 50 && p <= 100);
}
}