+2017-11-03 Jan Hubicka <hubicka@ucw.cz>
+
+ * asan.c (create_cond_insert_point): Maintain profile.
+ * ipa-utils.c (ipa_merge_profiles): Be sure only IPA profiles are
+ merged.
+ * basic-block.h (struct basic_block_def): Remove frequency.
+ (EDGE_FREQUENCY): Use to_frequency
+ * bb-reorder.c (push_to_next_round_p): Use only IPA counts for global
+ heuristics.
+ (find_traces): Update to use to_frequency.
+ (find_traces_1_round): Likewise; use only IPA counts.
+ (bb_to_key): Likewise.
+ (connect_traces): Use IPA counts only.
+ (copy_bb_p): Update to use to_frequency.
+ (fix_up_crossing_landing_pad): Likewise.
+ (sanitize_hot_paths): Likewise.
+ * bt-load.c (basic_block_freq): Likewise.
+ * cfg.c (init_flow): Set count_max to uninitialized.
+ (check_bb_profile): Remove frequencies; check counts.
+ (dump_bb_info): Do not dump frequencies.
+ (update_bb_profile_for_threading): Update counts only.
+ (scale_bbs_frequencies_int): Likewise.
+ (MAX_SAFE_MULTIPLIER): Remove.
+ (scale_bbs_frequencies_gcov_type): Update counts only.
+ (scale_bbs_frequencies_profile_count): Update counts only.
+ (scale_bbs_frequencies): Update counts only.
+ * cfg.h (struct control_flow_graph): Add count-max.
+ (update_bb_profile_for_threading): Update prototype.
+ * cfgbuild.c (find_bb_boundaries): Do not update frequencies.
+ (find_many_sub_basic_blocks): Likewise.
+ * cfgcleanup.c (try_forward_edges): Likewise.
+ (try_crossjump_to_edge): Likewise.
+ * cfgexpand.c (expand_gimple_cond): Likewise.
+ (expand_gimple_tailcall): Likewise.
+ (construct_init_block): Likewise.
+ (construct_exit_block): Likewise.
+ * cfghooks.c (verify_flow_info): Check consistency of counts.
+ (dump_bb_for_graph): Do not dump frequencies.
+ (split_block_1): Do not update frequencies.
+ (split_edge): Do not update frequencies.
+ (make_forwarder_block): Do not update frequencies.
+ (duplicate_block): Do not update frequencies.
+ (account_profile_record): Do not update frequencies.
+ * cfgloop.c (find_subloop_latch_edge_by_profile): Use IPA counts
+ for global heuristics.
+ * cfgloopanal.c (average_num_loop_insns): Update to use to_frequency.
+ (expected_loop_iterations_unbounded): Use counts only.
+ * cfgloopmanip.c (scale_loop_profile): Simplify.
+ (create_empty_loop_on_edge): Simplify
+ (loopify): Simplify
+ (duplicate_loop_to_header_edge): Simplify
+ * cfgrtl.c (force_nonfallthru_and_redirect): Update profile.
+ (update_br_prob_note): Take care of removing note when profile
+ becomes undefined.
+ (relink_block_chain): Do not dump frequency.
+ (rtl_account_profile_record): Use to_frequency.
+ * cgraph.c (symbol_table::create_edge): Convert count to ipa count.
+ (cgraph_edge::redirect_call_stmt_to_calle): Conver tcount to ipa count.
+ (cgraph_update_edges_for_call_stmt_node): Likewise.
+ (cgraph_edge::verify_count_and_frequency): Update.
+ (cgraph_node::verify_node): Temporarily disable frequency verification.
+ * cgraphbuild.c (compute_call_stmt_bb_frequency): Use
+ to_cgraph_frequency.
+ (cgraph_edge::rebuild_edges): Convert to ipa counts.
+ * cgraphunit.c (init_lowered_empty_function): Do not initialize
+ frequencies.
+ (cgraph_node::expand_thunk): Update profile.
+ * except.c (dw2_build_landing_pads): Do not update frequency.
+ * final.c (compute_alignments): Use to_frequency.
+ (dump_basic_block_info): Do not dump frequency.
+ * gimple-pretty-print.c (dump_profile): Do not dump frequency.
+ (dump_gimple_bb_header): Do not dump frequency.
+ * gimple-ssa-isolate-paths.c (isolate_path): Do not update frequency;
+ do update count.
+ * gimple-streamer-in.c (input_bb): Do not stream frequency.
+ * gimple-streamer-out.c (output_bb): Do not stream frequency.
+ * haifa-sched.c (sched_pressure_start_bb): Use to_freuqency.
+ (init_before_recovery): Do not update frequency.
+ (sched_create_recovery_edges): Do not update frequency.
+ * hsa-gen.c (convert_switch_statements): Do not update frequency.
+ * ipa-cp.c (ipcp_propagate_stage): Update search for max_count.
+ (ipa_cp_c_finalize): Set max_count to uninitialized.
+ * ipa-fnsummary.c (get_minimal_bb): Use counts.
+ (param_change_prob): Use counts.
+ * ipa-profile.c (ipa_profile_generate_summary): Do not summarize
+ local profiles.
+ * ipa-split.c (consider_split): Use to_frequency.
+ (split_function): Use to_frequency.
+ * ira-build.c (loop_compare_func): Likewise.
+ (mark_loops_for_removal): Likewise.
+ (mark_all_loops_for_removal): Likewise.
+ * loop-doloop.c (doloop_modify): Do not update frequency.
+ * loop-unroll.c (unroll_loop_runtime_iterations): Do not update
+ frequency.
+ * lto-streamer-in.c (input_function): Update count_max.
+ * omp-expand.c (expand_omp_taskreg): Update count_max.
+ * omp-simd-clone.c (simd_clone_adjust): Update profile.
+ * predict.c (maybe_hot_frequency_p): Use to_frequency.
+ (maybe_hot_count_p): Use ipa counts only.
+ (maybe_hot_bb_p): Simplify.
+ (maybe_hot_edge_p): Simplify.
+ (probably_never_executed): Do not take frequency argument.
+ (probably_never_executed_bb_p): Do not pass frequency.
+ (probably_never_executed_edge_p): Likewise.
+ (combine_predictions_for_bb): Check that profile is nonzero.
+ (propagate_freq): Do not set frequency.
+ (drop_profile): Simplify.
+ (counts_to_freqs): Simplify.
+ (expensive_function_p): Use to_frequency.
+ (propagate_unlikely_bbs_forward): Simplify.
+ (determine_unlikely_bbs): Simplify.
+ (estimate_bb_frequencies): Add hack to silence graphite issues.
+ (compute_function_frequency): Use ipa counts.
+ (pass_profile::execute): Update.
+ (rebuild_frequencies): Use counts only.
+ (force_edge_cold): Use counts only.
+ * profile-count.c (profile_count::dump): Dump new count types.
+ (profile_count::differs_from_p): Check compatiblity.
+ (profile_count::to_frequency): New function.
+ (profile_count::to_cgraph_frequency): New function.
+ * profile-count.h (struct function): Declare.
+ (enum profile_quality): Add profile_guessed_local and
+ profile_guessed_global0.
+ (class profile_proability): Decrease number of bits to 29;
+ update from_reg_br_prob_note and to_reg_br_prob_note.
+ (class profile_count: Update comment; decrease number of bits
+ to 61. Check compatibility.
+ (profile_count::compatible_p): New private member function.
+ (profile_count::ipa_p): New member function.
+ (profile_count::operator<): Handle global zero correctly.
+ (profile_count::operator>): Handle global zero correctly.
+ (profile_count::operator<=): Handle global zero correctly.
+ (profile_count::operator>=): Handle global zero correctly.
+ (profile_count::nonzero_p): New member function.
+ (profile_count::force_nonzero): New member function.
+ (profile_count::max): New member function.
+ (profile_count::apply_scale): Handle IPA scalling.
+ (profile_count::guessed_local): New member function.
+ (profile_count::global0): New member function.
+ (profile_count::ipa): New member function.
+ (profile_count::to_frequency): Declare.
+ (profile_count::to_cgraph_frequency): Declare.
+ * profile.c (OVERLAP_BASE): Delete.
+ (compute_frequency_overlap): Delete.
+ (compute_branch_probabilities): Do not use compute_frequency_overlap.
+ * regs.h (REG_FREQ_FROM_BB): Use to_frequency.
+ * sched-ebb.c (rank): Use counts only.
+ * shrink-wrap.c (handle_simple_exit): Use counts only.
+ (try_shrink_wrapping): Use counts only.
+ (place_prologue_for_one_component): Use counts only.
+ * tracer.c (find_best_predecessor): Use to_frequency.
+ (find_trace): Use to_frequency.
+ (tail_duplicate): Use to_frequency.
+ * trans-mem.c (expand_transaction): Do not update frequency.
+ * tree-call-cdce.c: Do not update frequency.
+ * tree-cfg.c (gimple_find_sub_bbs): Likewise.
+ (gimple_merge_blocks): Likewise.
+ (gimple_split_edge): Likewise.
+ (gimple_duplicate_sese_region): Likewise.
+ (gimple_duplicate_sese_tail): Likewise.
+ (move_sese_region_to_fn): Likewise.
+ (gimple_account_profile_record): Likewise.
+ (insert_cond_bb): Likewise.
+ * tree-complex.c (expand_complex_div_wide): Likewise.
+ * tree-eh.c (lower_resx): Update profile.
+ * tree-inline.c (copy_bb): Simplify count scaling; do not scale
+ frequencies.
+ (initialize_cfun): Do not initialize frequencies
+ (freqs_to_counts): Delete.
+ (copy_cfg_body): Ignore count parameter.
+ (copy_body): Update.
+ (expand_call_inline): Update count_max.
+ (optimize_inline_calls): Update count_max.
+ (tree_function_versioning): Update count_max.
+ * tree-ssa-coalesce.c (coalesce_cost_bb): Use to_frequency.
+ * tree-ssa-ifcombine.c (update_profile_after_ifcombine): Do not update
+ frequency.
+ * tree-ssa-loop-im.c (execute_sm_if_changed): Use counts only.
+ * tree-ssa-loop-ivcanon.c (unloop_loops): Do not update freuqency.
+ (try_peel_loop): Likewise.
+ * tree-ssa-loop-ivopts.c (get_scaled_computation_cost_at): Use
+ to_frequency.
+ * tree-ssa-loop-manip.c (niter_for_unrolled_loop): Pass -1.
+ (tree_transform_and_unroll_loop): Do not use frequencies
+ * tree-ssa-loop-niter.c (estimate_numbers_of_iterations):
+ Use reliable prediction only.
+ * tree-ssa-loop-unswitch.c (hoist_guard): Do not use frequencies.
+ * tree-ssa-sink.c (select_best_block): Use to_frequency.
+ * tree-ssa-tail-merge.c (replace_block_by): Temporarily disable
+ probability scaling.
+ * tree-ssa-threadupdate.c (create_block_for_threading): Do
+ not update frequency
+ (any_remaining_duplicated_blocks): Likewise.
+ (update_profile): Likewise.
+ (estimated_freqs_path): Delete.
+ (freqs_to_counts_path): Delete.
+ (clear_counts_path): Delete.
+ (ssa_fix_duplicate_block_edges): Likewise.
+ (duplicate_thread_path): Likewise.
+ * tree-switch-conversion.c (gen_inbound_check): Use counts.
+ * tree-tailcall.c (decrease_profile): Do not update frequency.
+ (eliminate_tail_call): Likewise.
+ * tree-vect-loop-manip.c (vect_do_peeling): Likewise.
+ * tree-vect-loop.c (scale_profile_for_vect_loop): Likewise.
+ (optimize_mask_stores): Likewise.
+ * tree-vect-stmts.c (vectorizable_simd_clone_call): Likewise.
+ * ubsan.c (ubsan_expand_null_ifn): Update profile.
+ (ubsan_expand_ptr_ifn): Update profile.
+ * value-prof.c (gimple_ic): Simplify.
+ * value-prof.h (gimple_ic): Update prototype.
+ * ipa-inline-transform.c (inline_transform): Fix scaling conditoins.
+ * ipa-inline.c (compute_uninlined_call_time): Be sure that
+ counts are nonzero.
+ (want_inline_self_recursive_call_p): Likewise.
+ (resolve_noninline_speculation): Only cummulate defined counts.
+ (inline_small_functions): Use nonzero_p.
+ (ipa_inline): Do not access freed node.
+
2017-11-03 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c (aarch64_override_options_internal):
? profile_probability::very_unlikely ()
: profile_probability::very_likely ();
e->probability = fallthrough_probability.invert ();
+ then_bb->count = e->count ();
if (create_then_fallthru_edge)
make_single_succ_edge (then_bb, fallthru_bb, EDGE_FALLTHRU);
/* Expected number of executions: calculated in profile.c. */
profile_count count;
- /* Expected frequency. Normalized to be in range 0 to BB_FREQ_MAX. */
- int frequency;
-
/* The discriminator for this block. The discriminator distinguishes
among several basic blocks that share a common locus, allowing for
more accurate sample-based profiling. */
? EDGE_SUCC ((bb), 1) : EDGE_SUCC ((bb), 0))
/* Return expected execution frequency of the edge E. */
-#define EDGE_FREQUENCY(e) e->probability.apply (e->src->frequency)
+#define EDGE_FREQUENCY(e) e->count ().to_frequency (cfun)
/* Compute a scale factor (or probability) suitable for scaling of
gcov_type values via apply_probability() and apply_scale(). */
there_exists_another_round = round < number_of_rounds - 1;
- block_not_hot_enough = (bb->frequency < exec_th
- || bb->count < count_th
+ block_not_hot_enough = (bb->count.to_frequency (cfun) < exec_th
+ || bb->count.ipa () < count_th
|| probably_never_executed_bb_p (cfun, bb));
if (there_exists_another_round
{
bbd[e->dest->index].heap = heap;
bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
- if (e->dest->frequency > max_entry_frequency)
- max_entry_frequency = e->dest->frequency;
- if (e->dest->count.initialized_p () && e->dest->count > max_entry_count)
+ if (e->dest->count.to_frequency (cfun) > max_entry_frequency)
+ max_entry_frequency = e->dest->count.to_frequency (cfun);
+ if (e->dest->count.ipa_p () && e->dest->count > max_entry_count)
max_entry_count = e->dest->count;
}
for (bb = traces[i].first;
bb != traces[i].last;
bb = (basic_block) bb->aux)
- fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
- fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
+ fprintf (dump_file, "%d [%d] ", bb->index,
+ bb->count.to_frequency (cfun));
+ fprintf (dump_file, "%d [%d]\n", bb->index,
+ bb->count.to_frequency (cfun));
}
fflush (dump_file);
}
continue;
prob = e->probability;
- freq = e->dest->frequency;
+ freq = e->dest->count.to_frequency (cfun);
/* The only sensible preference for a call instruction is the
fallthru edge. Don't bother selecting anything else. */
|| !prob.initialized_p ()
|| ((prob.to_reg_br_prob_base () < branch_th
|| EDGE_FREQUENCY (e) < exec_th
- || e->count () < count_th) && (!for_size)))
+ || e->count ().ipa () < count_th) && (!for_size)))
continue;
if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
|| !prob.initialized_p ()
|| prob.to_reg_br_prob_base () < branch_th
|| freq < exec_th
- || e->count () < count_th)
+ || e->count ().ipa () < count_th)
{
/* When partitioning hot/cold basic blocks, make sure
the cold blocks (and only the cold blocks) all get
if (best_edge->dest != bb)
{
if (EDGE_FREQUENCY (best_edge)
- > 4 * best_edge->dest->frequency / 5)
+ > 4 * best_edge->dest->count.to_frequency (cfun) / 5)
{
/* The loop has at least 4 iterations. If the loop
header is not the first block of the function
& EDGE_CAN_FALLTHRU)
&& !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
&& single_succ (e->dest) == best_edge->dest
- && (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
- || for_size))
+ && (2 * e->dest->count.to_frequency (cfun)
+ >= EDGE_FREQUENCY (best_edge) || for_size))
{
best_edge = e;
if (dump_file)
if (priority)
/* The block with priority should have significantly lower key. */
- return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
+ return -(100 * BB_FREQ_MAX + 100 * priority + bb->count.to_frequency (cfun));
- return -bb->frequency;
+ return -bb->count.to_frequency (cfun);
}
/* Return true when the edge E from basic block BB is better than the temporary
&& !connected[bbd[di].start_of_trace]
&& BB_PARTITION (e2->dest) == current_partition
&& EDGE_FREQUENCY (e2) >= freq_threshold
- && e2->count () >= count_threshold
+ && e2->count ().ipa () >= count_threshold
&& (!best2
|| e2->probability > best2->probability
|| (e2->probability == best2->probability
optimize_edge_for_speed_p (best)
&& EDGE_FREQUENCY (best) >= freq_threshold
&& (!best->count ().initialized_p ()
- || best->count () >= count_threshold)))
+ || best->count ().ipa () >= count_threshold)))
{
basic_block new_bb;
int max_size = uncond_jump_length;
rtx_insn *insn;
- if (!bb->frequency)
+ if (!bb->count.to_frequency (cfun))
return false;
if (EDGE_COUNT (bb->preds) < 2)
return false;
last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
new_bb = create_basic_block (new_label, jump, last_bb);
new_bb->aux = last_bb->aux;
- new_bb->frequency = post_bb->frequency;
new_bb->count = post_bb->count;
last_bb->aux = new_bb;
edge_iterator ei;
profile_probability highest_probability
= profile_probability::uninitialized ();
- int highest_freq = 0;
profile_count highest_count = profile_count::uninitialized ();
bool found = false;
/* The following loop will look for the hottest edge via
the edge count, if it is non-zero, then fallback to the edge
frequency and finally the edge probability. */
- if (!highest_count.initialized_p () || e->count () > highest_count)
+ if (!(e->count () > highest_count))
highest_count = e->count ();
- int edge_freq = EDGE_FREQUENCY (e);
- if (edge_freq > highest_freq)
- highest_freq = edge_freq;
if (!highest_probability.initialized_p ()
|| e->probability > highest_probability)
highest_probability = e->probability;
/* Select the hottest edge using the edge count, if it is non-zero,
then fallback to the edge frequency and finally the edge
probability. */
- if (highest_count > 0)
+ if (highest_count.initialized_p ())
{
- if (e->count () < highest_count)
+ if (!(e->count () >= highest_count))
continue;
}
- else if (highest_freq)
- {
- if (EDGE_FREQUENCY (e) < highest_freq)
- continue;
- }
- else if (e->probability < highest_probability)
+ else if (!(e->probability >= highest_probability))
continue;
basic_block reach_bb = walk_up ? e->src : e->dest;
static int
basic_block_freq (const_basic_block bb)
{
- return bb->frequency;
+ return bb->count.to_frequency (cfun);
}
/* If the rtx at *XP references (sets or reads) any branch target
if (!the_fun->cfg)
the_fun->cfg = ggc_cleared_alloc<control_flow_graph> ();
n_edges_for_fn (the_fun) = 0;
+ the_fun->cfg->count_max = profile_count::uninitialized ();
ENTRY_BLOCK_PTR_FOR_FN (the_fun)
= alloc_block ();
ENTRY_BLOCK_PTR_FOR_FN (the_fun)->index = ENTRY_BLOCK;
}
if (bb != ENTRY_BLOCK_PTR_FOR_FN (fun))
{
- int sum = 0;
+ profile_count sum = profile_count::zero ();
FOR_EACH_EDGE (e, ei, bb->preds)
- sum += EDGE_FREQUENCY (e);
- if (abs (sum - bb->frequency) > 100)
- fprintf (file,
- ";; %sInvalid sum of incoming frequencies %i, should be %i\n",
- s_indent, sum, bb->frequency);
+ sum += e->count ();
+ if (sum.differs_from_p (bb->count))
+ {
+ fprintf (file, ";; %sInvalid sum of incoming counts ",
+ s_indent);
+ sum.dump (file);
+ fprintf (file, ", should be ");
+ bb->count.dump (file);
+ fprintf (file, "\n");
+ }
}
if (BB_PARTITION (bb) == BB_COLD_PARTITION)
{
fputs (", count ", outf);
bb->count.dump (outf);
}
- fprintf (outf, ", freq %i", bb->frequency);
if (maybe_hot_bb_p (fun, bb))
fputs (", maybe hot", outf);
if (probably_never_executed_bb_p (fun, bb))
}
}
-/* An edge originally destinating BB of FREQUENCY and COUNT has been proved to
+/* An edge originally destinating BB of COUNT has been proved to
leave the block by TAKEN_EDGE. Update profile of BB such that edge E can be
redirected to destination of TAKEN_EDGE.
This function may leave the profile inconsistent in the case TAKEN_EDGE
- frequency or count is believed to be lower than FREQUENCY or COUNT
+ frequency or count is believed to be lower than COUNT
respectively. */
void
-update_bb_profile_for_threading (basic_block bb, int edge_frequency,
+update_bb_profile_for_threading (basic_block bb,
profile_count count, edge taken_edge)
{
edge c;
}
bb->count -= count;
- bb->frequency -= edge_frequency;
- if (bb->frequency < 0)
- bb->frequency = 0;
-
/* Compute the probability of TAKEN_EDGE being reached via threaded edge.
Watch for overflows. */
- if (bb->frequency)
- /* FIXME: We should get edge frequency as count. */
- prob = profile_probability::probability_in_gcov_type
- (edge_frequency, bb->frequency);
+ if (bb->count.nonzero_p ())
+ prob = count.probability_in (bb->count);
else
prob = profile_probability::never ();
if (prob > taken_edge->probability)
if (prob == profile_probability::never ())
{
if (dump_file)
- fprintf (dump_file, "Edge frequencies of bb %i has been reset, "
- "frequency of block should end up being 0, it is %i\n",
- bb->index, bb->frequency);
+ fprintf (dump_file, "Edge probabilities of bb %i has been reset, "
+ "count of block should end up being 0, it is non-zero\n",
+ bb->index);
EDGE_SUCC (bb, 0)->probability = profile_probability::guessed_always ();
ei = ei_start (bb->succs);
ei_next (&ei);
for (i = 0; i < nbbs; i++)
{
- bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den);
- /* Make sure the frequencies do not grow over BB_FREQ_MAX. */
- if (bbs[i]->frequency > BB_FREQ_MAX)
- bbs[i]->frequency = BB_FREQ_MAX;
bbs[i]->count = bbs[i]->count.apply_scale (num, den);
}
}
-/* numbers smaller than this value are safe to multiply without getting
- 64bit overflow. */
-#define MAX_SAFE_MULTIPLIER (1 << (sizeof (int64_t) * 4 - 1))
-
/* Multiply all frequencies of basic blocks in array BBS of length NBBS
by NUM/DEN, in gcov_type arithmetic. More accurate than previous
function but considerably slower. */
gcov_type den)
{
int i;
- gcov_type fraction = RDIV (num * 65536, den);
-
- gcc_assert (fraction >= 0);
- if (num < MAX_SAFE_MULTIPLIER)
- for (i = 0; i < nbbs; i++)
- {
- bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den);
- if (bbs[i]->count <= MAX_SAFE_MULTIPLIER)
- bbs[i]->count = bbs[i]->count.apply_scale (num, den);
- else
- bbs[i]->count = bbs[i]->count.apply_scale (fraction, 65536);
- }
- else
- for (i = 0; i < nbbs; i++)
- {
- if (sizeof (gcov_type) > sizeof (int))
- bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den);
- else
- bbs[i]->frequency = RDIV (bbs[i]->frequency * fraction, 65536);
- bbs[i]->count = bbs[i]->count.apply_scale (fraction, 65536);
- }
+ for (i = 0; i < nbbs; i++)
+ bbs[i]->count = bbs[i]->count.apply_scale (num, den);
}
/* Multiply all frequencies of basic blocks in array BBS of length NBBS
profile_count num, profile_count den)
{
int i;
-
- for (i = 0; i < nbbs; i++)
- {
- bbs[i]->frequency = RDIV (bbs[i]->frequency * num.to_gcov_type (),
- den.to_gcov_type ());
+ if (num == profile_count::zero () || den.nonzero_p ())
+ for (i = 0; i < nbbs; i++)
bbs[i]->count = bbs[i]->count.apply_scale (num, den);
- }
}
/* Multiply all frequencies of basic blocks in array BBS of length NBBS
int i;
for (i = 0; i < nbbs; i++)
- {
- bbs[i]->frequency = p.apply (bbs[i]->frequency);
- bbs[i]->count = bbs[i]->count.apply_probability (p);
- }
+ bbs[i]->count = bbs[i]->count.apply_probability (p);
}
/* Helper types for hash tables. */
/* Maximal number of entities in the single jumptable. Used to estimate
final flowgraph size. */
int max_jumptable_ents;
+
+ /* Maximal count of BB in function. */
+ profile_count count_max;
};
extern basic_block debug_bb_n (int);
extern void dump_bb_info (FILE *, basic_block, int, dump_flags_t, bool, bool);
extern void brief_dump_cfg (FILE *, dump_flags_t);
-extern void update_bb_profile_for_threading (basic_block, int, profile_count, edge);
+extern void update_bb_profile_for_threading (basic_block, profile_count, edge);
extern void scale_bbs_frequencies_int (basic_block *, int, int, int);
extern void scale_bbs_frequencies_gcov_type (basic_block *, int, gcov_type,
gcov_type);
remove_edge (fallthru);
/* BB is unreachable at this point - we need to determine its profile
once edges are built. */
- bb->frequency = 0;
bb->count = profile_count::uninitialized ();
flow_transfer_insn = NULL;
debug_insn = NULL;
{
bool initialized_src = false, uninitialized_src = false;
bb->count = profile_count::zero ();
- bb->frequency = 0;
FOR_EACH_EDGE (e, ei, bb->preds)
{
if (e->count ().initialized_p ())
}
else
uninitialized_src = true;
- if (e->probability.initialized_p ())
- bb->frequency += EDGE_FREQUENCY (e);
}
/* When some edges are missing with read profile, this is
most likely because RTL expansion introduced loop.
precisely once. */
if (!initialized_src
|| (uninitialized_src
- && profile_status_for_fn (cfun) != PROFILE_READ))
+ && profile_status_for_fn (cfun) < PROFILE_GUESSED))
bb->count = profile_count::uninitialized ();
}
/* If nothing changed, there is no need to create new BBs. */
{
/* Save the values now, as the edge may get removed. */
profile_count edge_count = e->count ();
- profile_probability edge_probability = e->probability;
- int edge_frequency;
int n = 0;
e->goto_locus = goto_locus;
/* We successfully forwarded the edge. Now update profile
data: for each edge we traversed in the chain, remove
the original edge's execution count. */
- edge_frequency = edge_probability.apply (b->frequency);
-
do
{
edge t;
gcc_assert (n < nthreaded_edges);
t = threaded_edges [n++];
gcc_assert (t->src == first);
- update_bb_profile_for_threading (first, edge_frequency,
- edge_count, t);
+ update_bb_profile_for_threading (first, edge_count, t);
update_br_prob_note (first);
}
else
{
first->count -= edge_count;
- first->frequency -= edge_frequency;
- if (first->frequency < 0)
- first->frequency = 0;
/* It is possible that as the result of
threading we've removed edge as it is
threaded to the fallthru edge. Avoid
else
redirect_edges_to = osrc2;
- /* Recompute the frequencies and counts of outgoing edges. */
+ /* Recompute the counts of destinations of outgoing edges. */
FOR_EACH_EDGE (s, ei, redirect_edges_to->succs)
{
edge s2;
that there is no more than one in the chain, so we can't run
into infinite loop. */
if (FORWARDER_BLOCK_P (s->dest))
- {
- s->dest->frequency += EDGE_FREQUENCY (s);
- }
+ s->dest->count += s->count ();
if (FORWARDER_BLOCK_P (s2->dest))
- {
- s2->dest->frequency -= EDGE_FREQUENCY (s);
- if (s2->dest->frequency < 0)
- s2->dest->frequency = 0;
- }
+ s2->dest->count -= s->count ();
- if (!redirect_edges_to->frequency && !src1->frequency)
+ /* FIXME: Is this correct? Should be rewritten to count API. */
+ if (redirect_edges_to->count.nonzero_p () && src1->count.nonzero_p ())
s->probability = s->probability.combine_with_freq
- (redirect_edges_to->frequency,
- s2->probability, src1->frequency);
+ (redirect_edges_to->count.to_frequency (cfun),
+ s2->probability, src1->count.to_frequency (cfun));
}
- /* Adjust count and frequency for the block. An earlier jump
+ /* Adjust count for the block. An earlier jump
threading pass may have left the profile in an inconsistent
state (see update_bb_profile_for_threading) so we must be
prepared for overflows. */
do
{
tmp->count += src1->count;
- tmp->frequency += src1->frequency;
- if (tmp->frequency > BB_FREQ_MAX)
- tmp->frequency = BB_FREQ_MAX;
if (tmp == redirect_edges_to)
break;
tmp = find_fallthru_edge (tmp->succs)->dest;
redirect_edge_succ (false_edge, new_bb);
false_edge->flags |= EDGE_FALLTHRU;
new_bb->count = false_edge->count ();
- new_bb->frequency = EDGE_FREQUENCY (false_edge);
loop_p loop = find_common_loop (bb->loop_father, dest->loop_father);
add_bb_to_loop (new_bb, loop);
if (loop->latch == bb
if (!(e->flags & (EDGE_ABNORMAL | EDGE_EH)))
{
if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
- {
- e->dest->frequency -= EDGE_FREQUENCY (e);
- if (e->dest->frequency < 0)
- e->dest->frequency = 0;
- }
+ e->dest->count -= e->count ();
probability += e->probability;
remove_edge (e);
}
init_block = create_basic_block (NEXT_INSN (get_insns ()),
get_last_insn (),
ENTRY_BLOCK_PTR_FOR_FN (cfun));
- init_block->frequency = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
init_block->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
add_bb_to_loop (init_block, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father);
if (e)
while (NEXT_INSN (head) && NOTE_P (NEXT_INSN (head)))
head = NEXT_INSN (head);
/* But make sure exit_block starts with RETURN_LABEL, otherwise the
- bb frequency counting will be confused. Any instructions before that
+ bb count counting will be confused. Any instructions before that
label are emitted for the case where PREV_BB falls through into the
exit block, so append those instructions to prev_bb in that case. */
if (NEXT_INSN (head) != return_label)
}
}
exit_block = create_basic_block (NEXT_INSN (head), end, prev_bb);
- exit_block->frequency = EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency;
exit_block->count = EXIT_BLOCK_PTR_FOR_FN (cfun)->count;
add_bb_to_loop (exit_block, EXIT_BLOCK_PTR_FOR_FN (cfun)->loop_father);
if (e2 != e)
{
exit_block->count -= e2->count ();
- exit_block->frequency -= EDGE_FREQUENCY (e2);
}
- if (exit_block->frequency < 0)
- exit_block->frequency = 0;
update_bb_for_insn (exit_block);
}
error ("verify_flow_info: Wrong count of block %i", bb->index);
err = 1;
}
- if (bb->frequency < 0)
+ /* FIXME: Graphite and SLJL and target code still tends to produce
+ edges with no probablity. */
+ if (profile_status_for_fn (cfun) >= PROFILE_GUESSED
+ && !bb->count.initialized_p () && !flag_graphite && 0)
{
- error ("verify_flow_info: Wrong frequency of block %i %i",
- bb->index, bb->frequency);
+ error ("verify_flow_info: Missing count of block %i", bb->index);
err = 1;
}
/* FIXME: Graphite and SLJL and target code still tends to produce
edges with no probablity. */
if (profile_status_for_fn (cfun) >= PROFILE_GUESSED
- && !e->probability.initialized_p () && 0)
+ && !e->probability.initialized_p () && !flag_graphite && 0)
{
error ("Uninitialized probability of edge %i->%i", e->src->index,
e->dest->index);
/* TODO: Add pretty printer for counter. */
if (bb->count.initialized_p ())
pp_printf (pp, "COUNT:" "%" PRId64, bb->count.to_gcov_type ());
- pp_printf (pp, " FREQ:%i |", bb->frequency);
pp_write_text_to_stream (pp);
if (!(dump_flags & TDF_SLIM))
cfg_hooks->dump_bb_for_graph (pp, bb);
return NULL;
new_bb->count = bb->count;
- new_bb->frequency = bb->frequency;
new_bb->discriminator = bb->discriminator;
if (dom_info_available_p (CDI_DOMINATORS))
{
basic_block ret;
profile_count count = e->count ();
- int freq = EDGE_FREQUENCY (e);
edge f;
bool irr = (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
struct loop *loop;
ret = cfg_hooks->split_edge (e);
ret->count = count;
- ret->frequency = freq;
single_succ_edge (ret)->probability = profile_probability::always ();
if (irr)
fallthru = split_block_after_labels (bb);
dummy = fallthru->src;
dummy->count = profile_count::zero ();
- dummy->frequency = 0;
bb = fallthru->dest;
/* Redirect back edges we want to keep. */
if (redirect_edge_p (e))
{
- dummy->frequency += EDGE_FREQUENCY (e);
- if (dummy->frequency > BB_FREQ_MAX)
- dummy->frequency = BB_FREQ_MAX;
-
dummy->count += e->count ();
ei_next (&ei);
continue;
new_bb->count = new_count;
bb->count -= new_count;
- new_bb->frequency = EDGE_FREQUENCY (e);
- bb->frequency -= EDGE_FREQUENCY (e);
-
redirect_edge_and_branch_force (e, new_bb);
-
- if (bb->frequency < 0)
- bb->frequency = 0;
}
else
- {
- new_bb->count = bb->count;
- new_bb->frequency = bb->frequency;
- }
+ new_bb->count = bb->count;
set_bb_original (new_bb, bb);
set_bb_copy (bb, new_bb);
if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
&& profile_status_for_fn (cfun) != PROFILE_ABSENT)
{
- int sum = 0;
- FOR_EACH_EDGE (e, ei, bb->preds)
- sum += EDGE_FREQUENCY (e);
- if (abs (sum - bb->frequency) > 100
- || (MAX (sum, bb->frequency) > 10
- && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10))
- record->num_mismatched_freq_in[after_pass]++;
profile_count lsum = profile_count::zero ();
FOR_EACH_EDGE (e, ei, bb->preds)
lsum += e->count ();
tcount += e->count();
}
- if (!tcount.initialized_p () || tcount < HEAVY_EDGE_MIN_SAMPLES
+ if (!tcount.initialized_p () || !(tcount.ipa () > HEAVY_EDGE_MIN_SAMPLES)
|| (tcount - mcount).apply_scale (HEAVY_EDGE_RATIO, 1) > tcount)
return NULL;
if (NONDEBUG_INSN_P (insn))
binsns++;
- ratio = loop->header->frequency == 0
+ ratio = loop->header->count.to_frequency (cfun) == 0
? BB_FREQ_MAX
- : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
+ : (bb->count.to_frequency (cfun) * BB_FREQ_MAX)
+ / loop->header->count.to_frequency (cfun);
ninsns += binsns * ratio;
}
free (bbs);
/* If we have no profile at all, use AVG_LOOP_NITER. */
if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
- else if (loop->latch && (loop->latch->count.reliable_p ()
- || loop->header->count.reliable_p ()))
+ else if (loop->latch && (loop->latch->count.initialized_p ()
+ || loop->header->count.initialized_p ()))
{
profile_count count_in = profile_count::zero (),
count_latch = profile_count::zero ();
count_in += e->count ();
if (!count_latch.initialized_p ())
- ;
- else if (!(count_in > profile_count::zero ()))
+ expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
+ else if (!count_in.nonzero_p ())
expected = count_latch.to_gcov_type () * 2;
else
{
expected = (count_latch.to_gcov_type () + count_in.to_gcov_type ()
- 1) / count_in.to_gcov_type ();
- if (read_profile_p)
+ if (read_profile_p
+ && count_latch.reliable_p () && count_in.reliable_p ())
*read_profile_p = true;
}
}
- if (expected == -1)
- {
- int freq_in, freq_latch;
-
- freq_in = 0;
- freq_latch = 0;
-
- FOR_EACH_EDGE (e, ei, loop->header->preds)
- if (flow_bb_inside_loop_p (loop, e->src))
- freq_latch += EDGE_FREQUENCY (e);
- else
- freq_in += EDGE_FREQUENCY (e);
-
- if (freq_in == 0)
- {
- /* If we have no profile at all, use AVG_LOOP_NITER iterations. */
- if (!freq_latch)
- expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
- else
- expected = freq_latch * 2;
- }
- else
- expected = (freq_latch + freq_in - 1) / freq_in;
- }
+ else
+ expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
if (max != -1 && max < expected)
return max;
+
return expected;
}
if (e)
{
edge other_e;
- int freq_delta;
profile_count count_delta;
FOR_EACH_EDGE (other_e, ei, e->src->succs)
break;
/* Probability of exit must be 1/iterations. */
- freq_delta = EDGE_FREQUENCY (e);
count_delta = e->count ();
e->probability = profile_probability::always ()
.apply_scale (1, iteration_bound);
other_e->probability = e->probability.invert ();
- freq_delta -= EDGE_FREQUENCY (e);
count_delta -= e->count ();
- /* If latch exists, change its frequency and count, since we changed
+ /* If latch exists, change its count, since we changed
probability of exit. Theoretically we should update everything from
source of exit edge to latch, but for vectorizer this is enough. */
if (loop->latch
&& loop->latch != e->src)
{
- loop->latch->frequency += freq_delta;
- if (loop->latch->frequency < 0)
- loop->latch->frequency = 0;
loop->latch->count += count_delta;
}
}
we look at the actual profile, if it is available. */
p = p.apply_scale (iteration_bound, iterations);
- bool determined = false;
if (loop->header->count.initialized_p ())
{
profile_count count_in = profile_count::zero ();
{
p = count_in.probability_in (loop->header->count.apply_scale
(iteration_bound, 1));
- determined = true;
}
}
- if (!determined && loop->header->frequency)
- {
- int freq_in = 0;
-
- FOR_EACH_EDGE (e, ei, loop->header->preds)
- if (e->src != loop->latch)
- freq_in += EDGE_FREQUENCY (e);
-
- if (freq_in != 0)
- p = profile_probability::probability_in_gcov_type
- (freq_in * iteration_bound, loop->header->frequency);
- }
if (!(p > profile_probability::never ()))
p = profile_probability::very_unlikely ();
}
loop->latch = loop_latch;
add_loop (loop, outer);
- /* TODO: Fix frequencies and counts. */
+ /* TODO: Fix counts. */
scale_loop_frequencies (loop, profile_probability::even ());
/* Update dominators. */
basic_block pred_bb = header_edge->src;
struct loop *loop = alloc_loop ();
struct loop *outer = loop_outer (succ_bb->loop_father);
- int freq;
profile_count cnt;
loop->header = header_edge->dest;
loop->latch = latch_edge->src;
- freq = EDGE_FREQUENCY (header_edge);
cnt = header_edge->count ();
/* Redirect edges. */
remove_bb_from_loops (switch_bb);
add_bb_to_loop (switch_bb, outer);
- /* Fix frequencies. */
+ /* Fix counts. */
if (redirect_all_edges)
{
- switch_bb->frequency = freq;
switch_bb->count = cnt;
}
scale_loop_frequencies (loop, false_scale);
{
/* Calculate coefficients by that we have to scale frequencies
of duplicated loop bodies. */
- freq_in = header->frequency;
+ freq_in = header->count.to_frequency (cfun);
freq_le = EDGE_FREQUENCY (latch_edge);
if (freq_in == 0)
freq_in = 1;
basic_block bb = create_basic_block (BB_HEAD (e->dest), NULL,
ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ bb->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
/* Change the existing edge's source to be the new block, and add
a new edge from the entry block to the new block. */
jump_block = create_basic_block (new_head, NULL, e->src);
jump_block->count = count;
- jump_block->frequency = EDGE_FREQUENCY (e);
/* Make sure new block ends up in correct hot/cold section. */
{
new_edge->probability = new_edge->probability.apply_scale (1, 2);
jump_block->count = jump_block->count.apply_scale (1, 2);
- jump_block->frequency /= 2;
edge new_edge2 = make_edge (new_edge->src, target,
e->flags & ~EDGE_FALLTHRU);
new_edge2->probability = probability - new_edge->probability;
update_br_prob_note (basic_block bb)
{
rtx note;
- if (!JUMP_P (BB_END (bb)) || !BRANCH_EDGE (bb)->probability.initialized_p ())
- return;
note = find_reg_note (BB_END (bb), REG_BR_PROB, NULL_RTX);
+ if (!JUMP_P (BB_END (bb)) || !BRANCH_EDGE (bb)->probability.initialized_p ())
+ {
+ if (note)
+ {
+ rtx *note_link, this_rtx;
+
+ note_link = ®_NOTES (BB_END (bb));
+ for (this_rtx = *note_link; this_rtx; this_rtx = XEXP (this_rtx, 1))
+ if (this_rtx == note)
+ {
+ *note_link = XEXP (this_rtx, 1);
+ break;
+ }
+ }
+ return;
+ }
if (!note
|| XINT (note, 0) == BRANCH_EDGE (bb)->probability.to_reg_br_prob_note ())
return;
fprintf (dump_file, "compensation ");
else
fprintf (dump_file, "bb %i ", bb->index);
- fprintf (dump_file, " [%i]\n", bb->frequency);
}
}
+= insn_cost (insn, true) * bb->count.to_gcov_type ();
else if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
record->time[after_pass]
- += insn_cost (insn, true) * bb->frequency;
+ += insn_cost (insn, true) * bb->count.to_frequency (cfun);
}
}
edge->next_callee = NULL;
edge->lto_stmt_uid = 0;
- edge->count = count;
+ edge->count = count.ipa ();
edge->frequency = freq;
gcc_checking_assert (freq >= 0);
gcc_checking_assert (freq <= CGRAPH_FREQ_MAX);
/* We are producing the final function body and will throw away the
callgraph edges really soon. Reset the counts/frequencies to
keep verifier happy in the case of roundoff errors. */
- e->count = gimple_bb (e->call_stmt)->count;
+ e->count = gimple_bb (e->call_stmt)->count.ipa ();
e->frequency = compute_call_stmt_bb_frequency
(e->caller->decl, gimple_bb (e->call_stmt));
}
prob = profile_probability::even ();
new_stmt = gimple_ic (e->call_stmt,
dyn_cast<cgraph_node *> (ref->referred),
- prob, e->count, e->count + e2->count);
+ prob);
e->speculative = false;
e->caller->set_call_stmt_including_clones (e->call_stmt, new_stmt,
false);
/* Otherwise remove edge and create new one; we can't simply redirect
since function has changed, so inline plan and other information
attached to edge is invalid. */
- count = e->count;
+ count = e->count.ipa ();
frequency = e->frequency;
if (e->indirect_unknown_callee || e->inline_failed)
e->remove ();
{
/* We are seeing new direct call; compute profile info based on BB. */
basic_block bb = gimple_bb (new_stmt);
- count = bb->count;
+ count = bb->count.ipa ();
frequency = compute_call_stmt_bb_frequency (current_function_decl,
bb);
}
cgraph_edge::verify_count_and_frequency ()
{
bool error_found = false;
- if (count < 0)
+ if (!count.verify ())
{
- error ("caller edge count is negative");
+ error ("caller edge count invalid");
+ error_found = true;
+ }
+ if (count.initialized_p () && !(count.ipa () == count))
+ {
+ error ("caller edge count is local");
error_found = true;
}
if (frequency < 0)
identifier_to_locale (e->callee->name ()));
error_found = true;
}
- if (count < 0)
+ if (!count.verify ())
+ {
+ error ("cgraph count invalid");
+ error_found = true;
+ }
+ if (count.initialized_p () && !(count.ipa () == count))
{
- error ("execution count is negative");
+ error ("cgraph count is local");
error_found = true;
}
if (global.inlined_to && same_comdat_group)
{
if (e->verify_count_and_frequency ())
error_found = true;
+ /* FIXME: re-enable once cgraph is converted to counts. */
if (gimple_has_body_p (e->caller->decl)
+ && 0
&& !e->caller->global.inlined_to
&& !e->speculative
/* Optimized out calls are redirected to __builtin_unreachable. */
{
if (e->verify_count_and_frequency ())
error_found = true;
+ /* FIXME: re-enable once cgraph is converted to counts. */
if (gimple_has_body_p (e->caller->decl)
&& !e->caller->global.inlined_to
&& !e->speculative
+ && 0
&& (e->frequency
!= compute_call_stmt_bb_frequency (e->caller->decl,
gimple_bb (e->call_stmt))))
int
compute_call_stmt_bb_frequency (tree decl, basic_block bb)
{
- int entry_freq = ENTRY_BLOCK_PTR_FOR_FN
- (DECL_STRUCT_FUNCTION (decl))->frequency;
- int freq = bb->frequency;
-
- if (profile_status_for_fn (DECL_STRUCT_FUNCTION (decl)) == PROFILE_ABSENT)
- return CGRAPH_FREQ_BASE;
-
- if (!entry_freq)
- entry_freq = 1, freq++;
-
- freq = freq * CGRAPH_FREQ_BASE / entry_freq;
- if (freq > CGRAPH_FREQ_MAX)
- freq = CGRAPH_FREQ_MAX;
-
- return freq;
+ return bb->count.to_cgraph_frequency
+ (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (decl))->count);
}
/* Mark address taken in STMT. */
node->remove_callees ();
node->remove_all_references ();
- node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
FOR_EACH_BB_FN (bb, cfun)
{
/* Create BB for body of the function and connect it properly. */
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = count;
- ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency = BB_FREQ_MAX;
EXIT_BLOCK_PTR_FOR_FN (cfun)->count = count;
- EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency = BB_FREQ_MAX;
bb = create_basic_block (NULL, ENTRY_BLOCK_PTR_FOR_FN (cfun));
bb->count = count;
- bb->frequency = BB_FREQ_MAX;
e = make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), bb, EDGE_FALLTHRU);
e->probability = profile_probability::always ();
e = make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
else
resdecl = DECL_RESULT (thunk_fndecl);
+ profile_count cfg_count = count;
+ if (!cfg_count.initialized_p ())
+ cfg_count = profile_count::from_gcov_type (BB_FREQ_MAX).guessed_local ();
+
bb = then_bb = else_bb = return_bb
- = init_lowered_empty_function (thunk_fndecl, true, count);
+ = init_lowered_empty_function (thunk_fndecl, true, cfg_count);
bsi = gsi_start_bb (bb);
adjustment, because that's why we're emitting a
thunk. */
then_bb = create_basic_block (NULL, bb);
- then_bb->count = count - count.apply_scale (1, 16);
- then_bb->frequency = BB_FREQ_MAX - BB_FREQ_MAX / 16;
+ then_bb->count = cfg_count - cfg_count.apply_scale (1, 16);
return_bb = create_basic_block (NULL, then_bb);
- return_bb->count = count;
- return_bb->frequency = BB_FREQ_MAX;
+ return_bb->count = cfg_count;
else_bb = create_basic_block (NULL, else_bb);
- then_bb->count = count.apply_scale (1, 16);
- then_bb->frequency = BB_FREQ_MAX / 16;
+ else_bb->count = cfg_count.apply_scale (1, 16);
add_bb_to_loop (then_bb, bb->loop_father);
add_bb_to_loop (return_bb, bb->loop_father);
add_bb_to_loop (else_bb, bb->loop_father);
}
cfun->gimple_df->in_ssa_p = true;
+ counts_to_freqs ();
profile_status_for_fn (cfun)
- = count.initialized_p () ? PROFILE_READ : PROFILE_GUESSED;
+ = cfg_count.initialized_p () && cfg_count.ipa_p ()
+ ? PROFILE_READ : PROFILE_GUESSED;
/* FIXME: C++ FE should stop setting TREE_ASM_WRITTEN on thunks. */
TREE_ASM_WRITTEN (thunk_fndecl) = false;
delete_unreachable_blocks ();
bb = emit_to_new_bb_before (seq, label_rtx (lp->post_landing_pad));
bb->count = bb->next_bb->count;
- bb->frequency = bb->next_bb->frequency;
make_single_succ_edge (bb, bb->next_bb, e_flags);
if (current_loops)
{
}
loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
FOR_EACH_BB_FN (bb, cfun)
- if (bb->frequency > freq_max)
- freq_max = bb->frequency;
+ if (bb->count.to_frequency (cfun) > freq_max)
+ freq_max = bb->count.to_frequency (cfun);
freq_threshold = freq_max / PARAM_VALUE (PARAM_ALIGN_THRESHOLD);
if (dump_file)
if (dump_file)
fprintf (dump_file,
"BB %4i freq %4i loop %2i loop_depth %2i skipped.\n",
- bb->index, bb->frequency, bb->loop_father->num,
+ bb->index, bb->count.to_frequency (cfun),
+ bb->loop_father->num,
bb_loop_depth (bb));
continue;
}
{
fprintf (dump_file, "BB %4i freq %4i loop %2i loop_depth"
" %2i fall %4i branch %4i",
- bb->index, bb->frequency, bb->loop_father->num,
+ bb->index, bb->count.to_frequency (cfun), bb->loop_father->num,
bb_loop_depth (bb),
fallthru_frequency, branch_frequency);
if (!bb->loop_father->inner && bb->loop_father->num)
if (!has_fallthru
&& (branch_frequency > freq_threshold
- || (bb->frequency > bb->prev_bb->frequency * 10
- && (bb->prev_bb->frequency
- <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency / 2))))
+ || (bb->count.to_frequency (cfun)
+ > bb->prev_bb->count.to_frequency (cfun) * 10
+ && (bb->prev_bb->count.to_frequency (cfun)
+ <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) / 2))))
{
log = JUMP_ALIGN (label);
if (dump_file)
edge_iterator ei;
fprintf (file, "%s BLOCK %d", ASM_COMMENT_START, bb->index);
- if (bb->frequency)
- fprintf (file, " freq:%d", bb->frequency);
if (bb->count.initialized_p ())
{
fprintf (file, ", count:");
by xstrdup_for_dump. */
static const char *
-dump_profile (int frequency, profile_count &count)
+dump_profile (profile_count &count)
{
- float minimum = 0.01f;
-
- gcc_assert (0 <= frequency && frequency <= REG_BR_PROB_BASE);
- float fvalue = frequency * 100.0f / REG_BR_PROB_BASE;
- if (fvalue < minimum && frequency > 0)
- return "[0.01%]";
-
char *buf;
- if (count.initialized_p ())
- buf = xasprintf ("[%.2f%%] [count: %" PRId64 "]", fvalue,
+ if (!count.initialized_p ())
+ return NULL;
+ if (count.ipa_p ())
+ buf = xasprintf ("[count: %" PRId64 "]",
+ count.to_gcov_type ());
+ else if (count.initialized_p ())
+ buf = xasprintf ("[local count: %" PRId64 "]",
count.to_gcov_type ());
- else
- buf = xasprintf ("[%.2f%%] [count: INV]", fvalue);
const char *ret = xstrdup_for_dump (buf);
free (buf);
fprintf (outf, "%*sbb_%d:\n", indent, "", bb->index);
else
fprintf (outf, "%*s<bb %d> %s:\n",
- indent, "", bb->index, dump_profile (bb->frequency,
- bb->count));
+ indent, "", bb->index, dump_profile (bb->count));
}
}
if (!duplicate)
{
duplicate = duplicate_block (bb, NULL, NULL);
- bb->frequency = 0;
bb->count = profile_count::zero ();
if (!ret_zero)
for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); )
flush_pending_stmts (e2);
/* Update profile only when redirection is really processed. */
- bb->frequency += EDGE_FREQUENCY (e);
+ bb->count += e->count ();
}
/* There may be more than one statement in DUPLICATE which exhibits
bb->count = profile_count::stream_in (ib).apply_scale
(count_materialization_scale, REG_BR_PROB_BASE);
- bb->frequency = streamer_read_hwi (ib);
bb->flags = streamer_read_hwi (ib);
/* LTO_bb1 has statements. LTO_bb0 does not. */
streamer_write_uhwi (ob, bb->index);
bb->count.stream_out (ob);
- streamer_write_hwi (ob, bb->frequency);
streamer_write_hwi (ob, bb->flags);
if (!gsi_end_p (bsi) || phi_nodes (bb))
- call_saved_regs_num[cl]). */
{
int i;
- int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
- int bb_freq = bb->frequency;
+ int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun);
+ int bb_freq = bb->count.to_frequency (cfun);
if (bb_freq == 0)
{
single->count = last->count;
empty->count = last->count;
- single->frequency = last->frequency;
- empty->frequency = last->frequency;
BB_COPY_PARTITION (single, last);
BB_COPY_PARTITION (empty, last);
in sel-sched.c `check_ds' in create_speculation_check. */
e->probability = profile_probability::very_unlikely ();
rec->count = e->count ();
- rec->frequency = EDGE_FREQUENCY (e);
e2->probability = e->probability.invert ();
rtx_code_label *label = block_label (second_bb);
edge next_edge = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
next_edge->probability = new_edge->probability.invert ();
- next_bb->frequency = EDGE_FREQUENCY (next_edge);
+ next_bb->count = next_edge->count ();
cur_bb = next_bb;
}
else /* Link last IF statement and default label
if (dump_file)
fprintf (dump_file, "\n Propagating constants:\n\n");
+ max_count = profile_count::uninitialized ();
+
FOR_EACH_DEFINED_FUNCTION (node)
{
struct ipa_node_params *info = IPA_NODE_REF (node);
}
if (node->definition && !node->alias)
overall_size += ipa_fn_summaries->get (node)->self_size;
- if (node->count > max_count)
- max_count = node->count;
+ max_count = max_count.max (node->count);
}
max_new_size = overall_size;
void
ipa_cp_c_finalize (void)
{
- max_count = profile_count::zero ();
+ max_count = profile_count::uninitialized ();
overall_size = 0;
max_new_size = 0;
}
get_minimal_bb (basic_block init_bb, basic_block use_bb)
{
struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
- if (l && l->header->frequency < init_bb->frequency)
+ if (l && l->header->count < init_bb->count)
return l->header;
return init_bb;
}
{
int init_freq;
- if (!bb->frequency)
+ if (!bb->count.to_frequency (cfun))
return REG_BR_PROB_BASE;
if (SSA_NAME_IS_DEFAULT_DEF (base))
- init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
+ init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun);
else
init_freq = get_minimal_bb
(gimple_bb (SSA_NAME_DEF_STMT (base)),
- gimple_bb (stmt))->frequency;
+ gimple_bb (stmt))->count.to_frequency (cfun);
if (!init_freq)
init_freq = 1;
- if (init_freq < bb->frequency)
- return MAX (GCOV_COMPUTE_SCALE (init_freq, bb->frequency), 1);
+ if (init_freq < bb->count.to_frequency (cfun))
+ return MAX (GCOV_COMPUTE_SCALE (init_freq,
+ bb->count.to_frequency (cfun)), 1);
else
return REG_BR_PROB_BASE;
}
if (init != error_mark_node)
return 0;
- if (!bb->frequency)
+ if (!bb->count.to_frequency (cfun))
return REG_BR_PROB_BASE;
ao_ref_init (&refd, op);
info.stmt = stmt;
/* Assume that every memory is initialized at entry.
TODO: Can we easilly determine if value is always defined
and thus we may skip entry block? */
- if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
- max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun))
+ max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun);
else
max = 1;
EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
- max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->frequency);
+ max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->count.to_frequency (cfun));
BITMAP_FREE (info.bb_set);
- if (max < bb->frequency)
- return MAX (GCOV_COMPUTE_SCALE (max, bb->frequency), 1);
+ if (max < bb->count.to_frequency (cfun))
+ return MAX (GCOV_COMPUTE_SCALE (max, bb->count.to_frequency (cfun)), 1);
else
return REG_BR_PROB_BASE;
}
{
profile_count num = node->count;
profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
- bool scale = num.initialized_p ()
- && (den > 0 || num == profile_count::zero ())
- && !(num == den);
+ bool scale = num.initialized_p () && den.ipa_p ()
+ && (den.nonzero_p () || num == profile_count::zero ())
+ && !(num == den.ipa ());
if (scale)
{
if (dump_file)
? edge->caller->global.inlined_to
: edge->caller);
- if (edge->count > profile_count::zero ()
- && caller->count > profile_count::zero ())
+ if (edge->count.nonzero_p ()
+ && caller->count.nonzero_p ())
uninlined_call_time *= (sreal)edge->count.to_gcov_type ()
/ caller->count.to_gcov_type ();
if (edge->frequency)
: edge->caller);
sreal caller_time = ipa_fn_summaries->get (caller)->time;
- if (edge->count > profile_count::zero ()
- && caller->count > profile_count::zero ())
+ if (edge->count.nonzero_p ()
+ && caller->count.nonzero_p ())
time *= (sreal)edge->count.to_gcov_type () / caller->count.to_gcov_type ();
if (edge->frequency)
time *= cgraph_freq_base_rec * edge->frequency;
want_inline = false;
}
else if ((DECL_DECLARED_INLINE_P (callee->decl)
- || e->count > profile_count::zero ())
+ || e->count.nonzero_p ())
&& ipa_fn_summaries->get (callee)->min_size
- ipa_call_summaries->get (e)->call_stmt_size
> 16 * MAX_INLINE_INSNS_SINGLE)
reason = "recursive call is cold";
want_inline = false;
}
- else if (outer_node->count == profile_count::zero ())
+ else if (!outer_node->count.nonzero_p ())
{
reason = "not executed in profile";
want_inline = false;
int i;
for (i = 1; i < depth; i++)
max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
- if (max_count > profile_count::zero () && edge->count > profile_count::zero ()
+ if (max_count.nonzero_p () && edge->count.nonzero_p ()
&& (edge->count.to_gcov_type () * CGRAPH_FREQ_BASE
/ outer_node->count.to_gcov_type ()
>= max_prob))
reason = "profile of recursive call is too large";
want_inline = false;
}
- if (max_count == profile_count::zero ()
+ if (!max_count.nonzero_p ()
&& (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
>= max_prob))
{
methods. */
else
{
- if (max_count > profile_count::zero () && edge->count.initialized_p ()
+ if (max_count.nonzero_p () && edge->count.initialized_p ()
&& (edge->count.to_gcov_type () * 100
/ outer_node->count.to_gcov_type ()
<= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
reason = "profile of recursive call is too small";
want_inline = false;
}
- else if ((max_count == profile_count::zero ()
+ else if ((!max_count.nonzero_p ()
|| !edge->count.initialized_p ())
&& (edge->frequency * 100 / caller_freq
<= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
then calls without.
*/
else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
- || caller->count > profile_count::zero ())
+ || caller->count.nonzero_p ())
{
sreal numerator, denominator;
int overall_growth;
- inlined_time);
if (numerator == 0)
numerator = ((sreal) 1 >> 8);
- if (caller->count > profile_count::zero ())
+ if (caller->count.nonzero_p ())
numerator *= caller->count.to_gcov_type ();
else if (caller->count.initialized_p ())
numerator = numerator >> 11;
{
fprintf (dump_file,
" Inlining call of depth %i", depth);
- if (node->count > profile_count::zero ())
+ if (node->count.nonzero_p ())
{
fprintf (dump_file, " called approx. %.2f times per call",
(double)curr->count.to_gcov_type ()
? node->global.inlined_to : node;
auto_bitmap updated_nodes;
- spec_rem += edge->count;
+ if (edge->count.initialized_p ())
+ spec_rem += edge->count;
edge->resolve_speculation ();
reset_edge_caches (where);
ipa_update_overall_fn_summary (where);
}
for (edge = node->callers; edge; edge = edge->next_caller)
- if (!(max_count >= edge->count))
- max_count = edge->count;
+ max_count = max_count.max (edge->count);
}
ipa_free_postorder_info ();
initialize_growth_caches ();
update_caller_keys (&edge_heap, where, updated_nodes, NULL);
/* Offline copy count has possibly changed, recompute if profile is
available. */
- if (max_count > profile_count::zero ())
+ if (max_count.nonzero_p ())
{
struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
if (n != edge->callee && n->analyzed)
ipa_dump_fn_summaries (dump_file);
nnodes = ipa_reverse_postorder (order);
+ spec_rem = profile_count::zero ();
FOR_EACH_FUNCTION (node)
{
next = edge->next_callee;
if (edge->speculative && !speculation_useful_p (edge, false))
{
+ if (edge->count.initialized_p ())
+ spec_rem += edge->count;
edge->resolve_speculation ();
- spec_rem += edge->count;
update = true;
remove_functions = true;
}
hash_table<histogram_hash> hashtable (10);
FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
- FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
- {
- int time = 0;
- int size = 0;
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple *stmt = gsi_stmt (gsi);
- if (gimple_code (stmt) == GIMPLE_CALL
- && !gimple_call_fndecl (stmt))
- {
- histogram_value h;
- h = gimple_histogram_value_of_type
- (DECL_STRUCT_FUNCTION (node->decl),
- stmt, HIST_TYPE_INDIR_CALL);
- /* No need to do sanity check: gimple_ic_transform already
- takes away bad histograms. */
- if (h)
- {
- /* counter 0 is target, counter 1 is number of execution we called target,
- counter 2 is total number of executions. */
- if (h->hvalue.counters[2])
- {
- struct cgraph_edge * e = node->get_edge (stmt);
- if (e && !e->indirect_unknown_callee)
- continue;
- e->indirect_info->common_target_id
- = h->hvalue.counters [0];
- e->indirect_info->common_target_probability
- = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
- if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
- {
- if (dump_file)
- fprintf (dump_file, "Probability capped to 1\n");
- e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
- }
- }
- gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl),
- stmt, h);
- }
- }
- time += estimate_num_insns (stmt, &eni_time_weights);
- size += estimate_num_insns (stmt, &eni_size_weights);
- }
- if (bb->count.initialized_p ())
- account_time_size (&hashtable, histogram, bb->count.to_gcov_type (),
- time, size);
- }
+ if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
+ FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
+ {
+ int time = 0;
+ int size = 0;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (gimple_code (stmt) == GIMPLE_CALL
+ && !gimple_call_fndecl (stmt))
+ {
+ histogram_value h;
+ h = gimple_histogram_value_of_type
+ (DECL_STRUCT_FUNCTION (node->decl),
+ stmt, HIST_TYPE_INDIR_CALL);
+ /* No need to do sanity check: gimple_ic_transform already
+ takes away bad histograms. */
+ if (h)
+ {
+ /* counter 0 is target, counter 1 is number of execution we called target,
+ counter 2 is total number of executions. */
+ if (h->hvalue.counters[2])
+ {
+ struct cgraph_edge * e = node->get_edge (stmt);
+ if (e && !e->indirect_unknown_callee)
+ continue;
+ e->indirect_info->common_target_id
+ = h->hvalue.counters [0];
+ e->indirect_info->common_target_probability
+ = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
+ if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Probability capped to 1\n");
+ e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
+ }
+ }
+ gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl),
+ stmt, h);
+ }
+ }
+ time += estimate_num_insns (stmt, &eni_time_weights);
+ size += estimate_num_insns (stmt, &eni_size_weights);
+ }
+ if (bb->count.ipa_p () && bb->count.initialized_p ())
+ account_time_size (&hashtable, histogram, bb->count.ipa ().to_gcov_type (),
+ time, size);
+ }
histogram.qsort (cmp_counts);
}
/* Do not split when we would end up calling function anyway. */
if (incoming_freq
- >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency
+ >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)
* PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
{
/* When profile is guessed, we can not expect it to give us
is likely noticeable win. */
if (back_edge
&& profile_status_for_fn (cfun) != PROFILE_READ
- && incoming_freq < ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
+ && incoming_freq
+ < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
" Split before loop, accepting despite low frequencies %i %i.\n",
incoming_freq,
- ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun));
}
else
{
out smallest size of header.
In future we might re-consider this heuristics. */
if (!best_split_point.split_bbs
- || best_split_point.entry_bb->frequency > current->entry_bb->frequency
- || (best_split_point.entry_bb->frequency == current->entry_bb->frequency
+ || best_split_point.entry_bb->count.to_frequency (cfun)
+ > current->entry_bb->count.to_frequency (cfun)
+ || (best_split_point.entry_bb->count.to_frequency (cfun)
+ == current->entry_bb->count.to_frequency (cfun)
&& best_split_point.split_size < current->split_size))
{
FOR_EACH_EDGE (e, ei, return_bb->preds)
if (bitmap_bit_p (split_point->split_bbs, e->src->index))
{
- new_return_bb->frequency += EDGE_FREQUENCY (e);
+ new_return_bb->count += e->count ();
redirect_edge_and_branch (e, new_return_bb);
redirected = true;
break;
unsigned int i;
dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
- if (!dstbb->count.initialized_p ())
+
+ /* Either sum the profiles if both are IPA and not global0, or
+ pick more informative one (that is nonzero IPA if other is
+ uninitialized, guessed or global0). */
+ if (!dstbb->count.ipa ().initialized_p ()
+ || (dstbb->count.ipa () == profile_count::zero ()
+ && (srcbb->count.ipa ().initialized_p ()
+ && !(srcbb->count.ipa () == profile_count::zero ()))))
{
dstbb->count = srcbb->count;
for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
dste->probability = srce->probability;
}
}
- else if (srcbb->count.initialized_p ())
+ else if (srcbb->count.ipa ().initialized_p ()
+ && !(srcbb->count.ipa () == profile_count::zero ()))
{
for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
{
{
if (e->speculative)
continue;
- e->count = gimple_bb (e->call_stmt)->count;
+ e->count = gimple_bb (e->call_stmt)->count.ipa ();
e->frequency = compute_call_stmt_bb_frequency
(dst->decl,
gimple_bb (e->call_stmt));
ipa_ref *ref;
e2->speculative_call_info (direct, indirect, ref);
- e->count = count;
+ e->count = count.ipa ();
e->frequency = freq;
int prob = direct->count.probability_in (e->count)
.to_reg_br_prob_base ();
}
else
{
- e->count = count;
+ e->count = count.ipa ();
e->frequency = freq;
}
}
return -1;
if (! l1->to_remove_p && l2->to_remove_p)
return 1;
- if ((diff = l1->loop->header->frequency - l2->loop->header->frequency) != 0)
+ if ((diff = l1->loop->header->count.to_frequency (cfun)
+ - l2->loop->header->count.to_frequency (cfun)) != 0)
return diff;
if ((diff = (int) loop_depth (l1->loop) - (int) loop_depth (l2->loop)) != 0)
return diff;
(ira_dump_file,
" Mark loop %d (header %d, freq %d, depth %d) for removal (%s)\n",
sorted_loops[i]->loop_num, sorted_loops[i]->loop->header->index,
- sorted_loops[i]->loop->header->frequency,
+ sorted_loops[i]->loop->header->count.to_frequency (cfun),
loop_depth (sorted_loops[i]->loop),
low_pressure_loop_node_p (sorted_loops[i]->parent)
&& low_pressure_loop_node_p (sorted_loops[i])
" Mark loop %d (header %d, freq %d, depth %d) for removal\n",
ira_loop_nodes[i].loop_num,
ira_loop_nodes[i].loop->header->index,
- ira_loop_nodes[i].loop->header->frequency,
+ ira_loop_nodes[i].loop->header->count.to_frequency (cfun),
loop_depth (ira_loop_nodes[i].loop));
}
}
set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
set_zero->count = profile_count::uninitialized ();
- set_zero->frequency = 0;
te = single_succ_edge (preheader);
for (; ass; ass = XEXP (ass, 1))
also be very hard to show that it is impossible, so we must
handle this case. */
set_zero->count = preheader->count;
- set_zero->frequency = preheader->frequency;
}
if (EDGE_COUNT (set_zero->preds) == 0)
unsigned i, j;
profile_probability p;
basic_block preheader, *body, swtch, ezc_swtch = NULL;
- int may_exit_copy, iter_freq, new_freq;
+ int may_exit_copy;
profile_count iter_count, new_count;
unsigned n_peel;
edge e;
/* Record the place where switch will be built for preconditioning. */
swtch = split_edge (loop_preheader_edge (loop));
- /* Compute frequency/count increments for each switch block and initialize
+ /* Compute count increments for each switch block and initialize
innermost switch block. Switch blocks and peeled loop copies are built
from innermost outward. */
- iter_freq = new_freq = swtch->frequency / (max_unroll + 1);
iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
- swtch->frequency = new_freq;
swtch->count = new_count;
for (i = 0; i < n_peel; i++)
p = profile_probability::always ().apply_scale (1, i + 2);
preheader = split_edge (loop_preheader_edge (loop));
- /* Add in frequency/count of edge from switch block. */
- preheader->frequency += iter_freq;
+ /* Add in count of edge from switch block. */
preheader->count += iter_count;
branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
block_label (preheader), p,
swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
single_succ_edge (swtch)->probability = p.invert ();
- new_freq += iter_freq;
new_count += iter_count;
- swtch->frequency = new_freq;
swtch->count = new_count;
e = make_edge (swtch, preheader,
single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
p = profile_probability::always ().apply_scale (1, max_unroll + 1);
swtch = ezc_swtch;
preheader = split_edge (loop_preheader_edge (loop));
- /* Recompute frequency/count adjustments since initial peel copy may
+ /* Recompute count adjustments since initial peel copy may
have exited and reduced those values that were computed above. */
- iter_freq = swtch->frequency / (max_unroll + 1);
iter_count = swtch->count.apply_scale (1, max_unroll + 1);
- /* Add in frequency/count of edge from switch block. */
- preheader->frequency += iter_freq;
+ /* Add in count of edge from switch block. */
preheader->count += iter_count;
branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
block_label (preheader), p,
gimple_set_body (fn_decl, bb_seq (ei_edge (ei)->dest));
}
+ counts_to_freqs ();
fixup_call_stmt_edges (node, stmts);
execute_all_ipa_stmt_fixups (node, stmts);
if (optimize)
optimize_omp_library_calls (entry_stmt);
+ counts_to_freqs ();
cgraph_edge::rebuild_edges ();
/* Some EH regions might become dead, see PR34608. If
{
basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
incr_bb = create_empty_bb (orig_exit);
+ incr_bb->count = profile_count::zero ();
add_bb_to_loop (incr_bb, body_bb->loop_father);
/* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty
flag. Set it now to be a FALLTHRU_EDGE. */
{
edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), i);
redirect_edge_succ (e, incr_bb);
+ incr_bb->count += e->count ();
}
}
else if (node->simdclone->inbranch)
{
incr_bb = create_empty_bb (entry_bb);
+ incr_bb->count = profile_count::zero ();
add_bb_to_loop (incr_bb, body_bb->loop_father);
}
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
edge e = make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
e->probability = profile_probability::unlikely ().guessed ();
+ incr_bb->count += e->count ();
edge fallthru = FALLTHRU_EDGE (loop->header);
fallthru->flags = EDGE_FALSE_VALUE;
fallthru->probability = profile_probability::likely ().guessed ();
if (profile_status_for_fn (fun) == PROFILE_ABSENT)
return true;
if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
- && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
+ && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.to_frequency (cfun) * 2 / 3))
return false;
if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
return false;
if (freq * PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)
- < ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency)
+ < ENTRY_BLOCK_PTR_FOR_FN (fun)->count.to_frequency (cfun))
return false;
return true;
}
/* Return TRUE if frequency FREQ is considered to be hot. */
bool
-maybe_hot_count_p (struct function *, profile_count count)
+maybe_hot_count_p (struct function *fun, profile_count count)
{
if (!count.initialized_p ())
return true;
+ if (!count.ipa_p ())
+ return maybe_hot_frequency_p (fun, count.to_frequency (fun));
+ if (count.ipa () == profile_count::zero ())
+ return false;
/* Code executed at most once is not hot. */
if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
return false;
maybe_hot_bb_p (struct function *fun, const_basic_block bb)
{
gcc_checking_assert (fun);
- if (!maybe_hot_count_p (fun, bb->count))
- return false;
- return maybe_hot_frequency_p (fun, bb->frequency);
+ return maybe_hot_count_p (fun, bb->count);
}
/* Return true in case BB can be CPU intensive and should be optimized
bool
maybe_hot_edge_p (edge e)
{
- if (!maybe_hot_count_p (cfun, e->count ()))
- return false;
- return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
+ return maybe_hot_count_p (cfun, e->count ());
}
/* Return true if profile COUNT and FREQUENCY, or function FUN static
static bool
probably_never_executed (struct function *fun,
- profile_count count, int)
+ profile_count count)
{
gcc_checking_assert (fun);
if (count == profile_count::zero ())
bool
probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
{
- return probably_never_executed (fun, bb->count, bb->frequency);
+ return probably_never_executed (fun, bb->count);
}
{
if (unlikely_executed_edge_p (e))
return true;
- return probably_never_executed (fun, e->count (), EDGE_FREQUENCY (e));
+ return probably_never_executed (fun, e->count ());
}
/* Return true when current function should always be optimized for size. */
}
clear_bb_predictions (bb);
- if (!bb->count.initialized_p () && !dry_run)
+ if ((!bb->count.nonzero_p () || !first->probability.initialized_p ())
+ && !dry_run)
{
first->probability
= profile_probability::from_reg_br_prob_base (combined_probability);
BLOCK_INFO (bb)->npredecessors = count;
/* When function never returns, we will never process exit block. */
if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
- {
- bb->count = profile_count::zero ();
- bb->frequency = 0;
- }
+ bb->count = profile_count::zero ();
}
BLOCK_INFO (head)->frequency = 1;
* BLOCK_INFO (e->src)->frequency /
REG_BR_PROB_BASE); */
- sreal tmp = e->probability.to_reg_br_prob_base ();
+ /* FIXME: Graphite is producing edges with no profile. Once
+ this is fixed, drop this. */
+ sreal tmp = e->probability.initialized_p () ?
+ e->probability.to_reg_br_prob_base () : 0;
tmp *= BLOCK_INFO (e->src)->frequency;
tmp *= real_inv_br_prob_base;
frequency += tmp;
= ((e->probability * BLOCK_INFO (bb)->frequency)
/ REG_BR_PROB_BASE); */
- sreal tmp = e->probability.to_reg_br_prob_base ();
+ /* FIXME: Graphite is producing edges with no profile. Once
+ this is fixed, drop this. */
+ sreal tmp = e->probability.initialized_p () ?
+ e->probability.to_reg_br_prob_base () : 0;
tmp *= BLOCK_INFO (bb)->frequency;
EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
}
}
basic_block bb;
- FOR_ALL_BB_FN (bb, fn)
+ push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+ if (flag_guess_branch_prob)
{
- bb->count = profile_count::uninitialized ();
+ bool clear_zeros
+ = ENTRY_BLOCK_PTR_FOR_FN
+ (DECL_STRUCT_FUNCTION (node->decl))->count.nonzero_p ();
+ FOR_ALL_BB_FN (bb, fn)
+ if (clear_zeros || !(bb->count == profile_count::zero ()))
+ bb->count = bb->count.guessed_local ();
+ DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max =
+ DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max.guessed_local ();
}
+ else
+ {
+ FOR_ALL_BB_FN (bb, fn)
+ bb->count = profile_count::uninitialized ();
+ DECL_STRUCT_FUNCTION (node->decl)->cfg->count_max
+ = profile_count::uninitialized ();
+ }
+ pop_cfun ();
struct cgraph_edge *e;
for (e = node->callees; e; e = e->next_caller)
bool
counts_to_freqs (void)
{
- gcov_type count_max;
- profile_count true_count_max = profile_count::zero ();
+ profile_count true_count_max = profile_count::uninitialized ();
basic_block bb;
- /* Don't overwrite the estimated frequencies when the profile for
- the function is missing. We may drop this function PROFILE_GUESSED
- later in drop_profile (). */
- if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()
- || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
- return false;
-
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
- if (bb->count > true_count_max)
- true_count_max = bb->count;
-
- /* If we have no counts to base frequencies on, keep those that are
- already there. */
- if (!(true_count_max > 0))
- return false;
+ if (!(bb->count < true_count_max))
+ true_count_max = true_count_max.max (bb->count);
- count_max = true_count_max.to_gcov_type ();
-
- FOR_ALL_BB_FN (bb, cfun)
- if (bb->count.initialized_p ())
- bb->frequency = RDIV (bb->count.to_gcov_type () * BB_FREQ_MAX, count_max);
+ cfun->cfg->count_max = true_count_max;
- return true;
+ return true_count_max.nonzero_p ();
}
/* Return true if function is likely to be expensive, so there is no point to
/* Frequencies are out of range. This either means that function contains
internal loop executing more than BB_FREQ_MAX times or profile feedback
is available and function has not been executed at all. */
- if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) == 0)
return true;
/* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
- limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
+ limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) * threshold;
FOR_EACH_BB_FN (bb, cfun)
{
rtx_insn *insn;
FOR_BB_INSNS (bb, insn)
if (active_insn_p (insn))
{
- sum += bb->frequency;
+ sum += bb->count.to_frequency (cfun);
if (sum > limit)
return true;
}
"Basic block %i is marked unlikely by forward prop\n",
bb->index);
bb->count = profile_count::zero ();
- bb->frequency = 0;
}
else
bb->aux = NULL;
bb->count = profile_count::zero ();
}
- if (bb->count == profile_count::zero ())
- bb->frequency = 0;
-
FOR_EACH_EDGE (e, ei, bb->succs)
if (!(e->probability == profile_probability::never ())
&& unlikely_executed_edge_p (e))
"Basic block %i is marked unlikely by backward prop\n",
bb->index);
bb->count = profile_count::zero ();
- bb->frequency = 0;
FOR_EACH_EDGE (e, ei, bb->preds)
if (!(e->probability == profile_probability::never ()))
{
FOR_EACH_EDGE (e, ei, bb->succs)
{
- EDGE_INFO (e)->back_edge_prob
- = e->probability.to_reg_br_prob_base ();
+ /* FIXME: Graphite is producing edges with no profile. Once
+ this is fixed, drop this. */
+ if (e->probability.initialized_p ())
+ EDGE_INFO (e)->back_edge_prob
+ = e->probability.to_reg_br_prob_base ();
+ else
+ EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2;
EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
}
}
to outermost to examine frequencies for back edges. */
estimate_loops ();
+ bool global0 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()
+ && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ();
+
freq_max = 0;
FOR_EACH_BB_FN (bb, cfun)
if (freq_max < BLOCK_INFO (bb)->frequency)
freq_max = BLOCK_INFO (bb)->frequency;
freq_max = real_bb_freq_max / freq_max;
+ cfun->cfg->count_max = profile_count::uninitialized ();
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
{
sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
- bb->frequency = tmp.to_int ();
+ profile_count count = profile_count::from_gcov_type (tmp.to_int ());
+
+ /* If we have profile feedback in which this function was never
+ executed, then preserve this info. */
+ if (global0)
+ bb->count = count.global0 ();
+ else if (!(bb->count == profile_count::zero ()))
+ bb->count = count.guessed_local ();
+ cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
}
free_aux_for_blocks ();
if (profile_status_for_fn (cfun) != PROFILE_READ)
{
int flags = flags_from_decl_or_type (current_function_decl);
- if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()
+ if ((ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ()
+ && ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ())
|| lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
!= NULL)
{
{
struct loop *loop;
FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
- if (loop->header->frequency)
+ if (loop->header->count.initialized_p ())
fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n",
loop->num,
(int)expected_loop_iterations_unbounded (loop));
which may also lead to frequencies incorrectly reduced to 0. There
is less precision in the probabilities, so we only do this for small
max counts. */
- profile_count count_max = profile_count::zero ();
+ cfun->cfg->count_max = profile_count::uninitialized ();
basic_block bb;
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
- if (bb->count > count_max)
- count_max = bb->count;
+ cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
- if (profile_status_for_fn (cfun) == PROFILE_GUESSED
- || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
- && count_max < REG_BR_PROB_BASE / 10))
+ if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
{
loop_optimizer_init (0);
add_noreturn_fake_exit_edges ();
after loop transforms. */
if (!(prob_sum > profile_probability::never ())
&& count_sum == profile_count::zero ()
- && single_pred_p (e->src) && e->src->frequency > (impossible ? 0 : 1))
+ && single_pred_p (e->src) && e->src->count.to_frequency (cfun)
+ > (impossible ? 0 : 1))
{
- int old_frequency = e->src->frequency;
+ int old_frequency = e->src->count.to_frequency (cfun);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
impossible ? "impossible" : "cold");
- e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
+ int new_frequency = MIN (e->src->count.to_frequency (cfun),
+ impossible ? 0 : 1);
if (impossible)
e->src->count = profile_count::zero ();
else
- e->src->count = e->count ().apply_scale (e->src->frequency,
+ e->src->count = e->count ().apply_scale (new_frequency,
old_frequency);
force_edge_cold (single_pred_edge (e->src), impossible);
}
else
{
fprintf (f, "%" PRId64, m_val);
- if (m_quality == profile_adjusted)
+ if (m_quality == profile_guessed_local)
+ fprintf (f, " (estimated locally)");
+ else if (m_quality == profile_guessed_global0)
+ fprintf (f, " (estimated locally, globally 0)");
+ else if (m_quality == profile_adjusted)
fprintf (f, " (adjusted)");
else if (m_quality == profile_afdo)
fprintf (f, " (auto FDO)");
bool
profile_count::differs_from_p (profile_count other) const
{
+ gcc_checking_assert (compatible_p (other));
if (!initialized_p () || !other.initialized_p ())
return false;
if ((uint64_t)m_val - (uint64_t)other.m_val < 100
*res = (uint64_t) -1;
return false;
}
+
+/* Return count as frequency within FUN scaled in range 0 to REG_FREQ_MAX
+ Used for legacy code and should not be used anymore. */
+
+int
+profile_count::to_frequency (struct function *fun) const
+{
+ if (!initialized_p ())
+ return BB_FREQ_MAX;
+ if (*this == profile_count::zero ())
+ return 0;
+ gcc_assert (REG_BR_PROB_BASE == BB_FREQ_MAX
+ && fun->cfg->count_max.initialized_p ());
+ profile_probability prob = probability_in (fun->cfg->count_max);
+ if (!prob.initialized_p ())
+ return REG_BR_PROB_BASE;
+ return prob.to_reg_br_prob_base ();
+}
+
+/* Return count as frequency within FUN scaled in range 0 to CGRAPH_FREQ_MAX
+ where CGRAPH_FREQ_BASE means that count equals to entry block count.
+ Used for legacy code and should not be used anymore. */
+
+int
+profile_count::to_cgraph_frequency (profile_count entry_bb_count) const
+{
+ if (!initialized_p ())
+ return CGRAPH_FREQ_BASE;
+ if (*this == profile_count::zero ())
+ return 0;
+ gcc_checking_assert (entry_bb_count.initialized_p ());
+ uint64_t scale;
+ if (!safe_scale_64bit (!entry_bb_count.m_val ? m_val + 1 : m_val,
+ CGRAPH_FREQ_BASE, MAX (1, entry_bb_count.m_val), &scale))
+ return CGRAPH_FREQ_MAX;
+ return MIN (scale, CGRAPH_FREQ_MAX);
+}
#ifndef GCC_PROFILE_COUNT_H
#define GCC_PROFILE_COUNT_H
+struct function;
+
/* Quality of the profile count. Because gengtype does not support enums
inside of classes, this is in global namespace. */
enum profile_quality {
+ /* Profile is based on static branch prediction heuristics and may
+ or may not match reality. It is local to function and can not be compared
+ inter-procedurally. Never used by probabilities (they are always local).
+ */
+ profile_guessed_local = 0,
+ /* Profile was read by feedback and was 0, we used local heuristics to guess
+ better. This is the case of functions not run in profile fedback.
+ Never used by probabilities. */
+ profile_guessed_global0 = 1,
+
+
/* Profile is based on static branch prediction heuristics. It may or may
- not reflect the reality. */
- profile_guessed = 0,
+ not reflect the reality but it can be compared interprocedurally
+ (for example, we inlined function w/o profile feedback into function
+ with feedback and propagated from that).
+ Never used by probablities. */
+ profile_guessed = 2,
/* Profile was determined by autofdo. */
- profile_afdo = 1,
+ profile_afdo = 3,
/* Profile was originally based on feedback but it was adjusted
by code duplicating optimization. It may not precisely reflect the
particular code path. */
- profile_adjusted = 2,
+ profile_adjusted = 4,
/* Profile was read from profile feedback or determined by accurate static
method. */
- profile_precise = 3
+ profile_precise = 5
};
/* The base value for branch probability notes and edge probabilities. */
class GTY((user)) profile_probability
{
- static const int n_bits = 30;
+ static const int n_bits = 29;
/* We can technically use ((uint32_t) 1 << (n_bits - 1)) - 2 but that
will lead to harder multiplication sequences. */
static const uint32_t max_probability = (uint32_t) 1 << (n_bits - 2);
static const uint32_t uninitialized_probability
= ((uint32_t) 1 << (n_bits - 1)) - 1;
- uint32_t m_val : 30;
- enum profile_quality m_quality : 2;
+ uint32_t m_val : 29;
+ enum profile_quality m_quality : 3;
friend class profile_count;
public:
static profile_probability from_reg_br_prob_note (int v)
{
profile_probability ret;
- ret.m_val = ((unsigned int)v) / 4;
- ret.m_quality = (enum profile_quality)(v & 3);
+ ret.m_val = ((unsigned int)v) / 8;
+ ret.m_quality = (enum profile_quality)(v & 7);
return ret;
}
int to_reg_br_prob_note () const
{
gcc_checking_assert (initialized_p ());
- int ret = m_val * 4 + m_quality;
+ int ret = m_val * 8 + m_quality;
gcc_checking_assert (profile_probability::from_reg_br_prob_note (ret)
== *this);
return ret;
{
if (m_val == uninitialized_probability)
return m_quality == profile_guessed;
- else
- return m_val <= max_probability;
+ else if (m_quality < profile_guessed)
+ return false;
+ return m_val <= max_probability;
}
/* Comparsions are three-state and conservative. False is returned if
void stream_out (struct lto_output_stream *);
};
-/* Main data type to hold profile counters in GCC. In most cases profile
- counts originate from profile feedback. They are 64bit integers
- representing number of executions during the train run.
+/* Main data type to hold profile counters in GCC. Profile counts originate
+ either from profile feedback, static profile estimation or both. We do not
+ perform whole program profile propagation and thus profile estimation
+ counters are often local to function, while counters from profile feedback
+ (or special cases of profile estimation) can be used inter-procedurally.
+
+ There are 3 basic types
+ 1) local counters which are result of intra-procedural static profile
+ estimation.
+ 2) ipa counters which are result of profile feedback or special case
+ of static profile estimation (such as in function main).
+ 3) counters which counts as 0 inter-procedurally (beause given function
+ was never run in train feedback) but they hold local static profile
+ estimate.
+
+ Counters of type 1 and 3 can not be mixed with counters of different type
+ within operation (because whole function should use one type of counter)
+ with exception that global zero mix in most operations where outcome is
+ well defined.
+
+ To take local counter and use it inter-procedurally use ipa member function
+ which strips information irelevant at the inter-procedural level.
+
+ Counters are 61bit integers representing number of executions during the
+ train run or normalized frequency within the function.
+
As the profile is maintained during the compilation, many adjustments are
made. Not all transformations can be made precisely, most importantly
when code is being duplicated. It also may happen that part of CFG has
64bit. Although a counter cannot be negative, we use a signed
type to hold various extra stages. */
- static const int n_bits = 62;
+ static const int n_bits = 61;
static const uint64_t max_count = ((uint64_t) 1 << n_bits) - 2;
static const uint64_t uninitialized_count = ((uint64_t) 1 << n_bits) - 1;
uint64_t m_val : n_bits;
- enum profile_quality m_quality : 2;
+ enum profile_quality m_quality : 3;
+
+ /* Return true if both values can meaningfully appear in single function
+ body. We have either all counters in function local or global, otherwise
+ operations between them are not really defined well. */
+ bool compatible_p (const profile_count other) const
+ {
+ if (!initialized_p () || !other.initialized_p ())
+ return true;
+ if (*this == profile_count::zero ()
+ || other == profile_count::zero ())
+ return true;
+ return ipa_p () == other.ipa_p ();
+ }
public:
/* Used for counters which are expected to be never executed. */
{
profile_count c;
c.m_val = uninitialized_count;
- c.m_quality = profile_guessed;
+ c.m_quality = profile_guessed_local;
return c;
}
{
return m_quality >= profile_adjusted;
}
+ /* Return true if vlaue can be operated inter-procedurally. */
+ bool ipa_p () const
+ {
+ return !initialized_p () || m_quality >= profile_guessed_global0;
+ }
/* When merging basic blocks, the two different profile counts are unified.
Return true if this can be done without losing info about profile.
return profile_count::uninitialized ();
profile_count ret;
+ gcc_checking_assert (compatible_p (other));
ret.m_val = m_val + other.m_val;
ret.m_quality = MIN (m_quality, other.m_quality);
return ret;
return *this = profile_count::uninitialized ();
else
{
+ gcc_checking_assert (compatible_p (other));
m_val += other.m_val;
m_quality = MIN (m_quality, other.m_quality);
}
return *this;
if (!initialized_p () || !other.initialized_p ())
return profile_count::uninitialized ();
+ gcc_checking_assert (compatible_p (other));
profile_count ret;
ret.m_val = m_val >= other.m_val ? m_val - other.m_val : 0;
ret.m_quality = MIN (m_quality, other.m_quality);
return *this = profile_count::uninitialized ();
else
{
+ gcc_checking_assert (compatible_p (other));
m_val = m_val >= other.m_val ? m_val - other.m_val: 0;
m_quality = MIN (m_quality, other.m_quality);
}
/* Return false if profile_count is bogus. */
bool verify () const
{
- return m_val != uninitialized_count || m_quality == profile_guessed;
+ return m_val != uninitialized_count || m_quality == profile_guessed_local;
}
/* Comparsions are three-state and conservative. False is returned if
the inequality can not be decided. */
bool operator< (const profile_count &other) const
{
- return initialized_p () && other.initialized_p () && m_val < other.m_val;
+ if (!initialized_p () || !other.initialized_p ())
+ return false;
+ if (*this == profile_count::zero ())
+ return !(other == profile_count::zero ());
+ if (other == profile_count::zero ())
+ return false;
+ gcc_checking_assert (compatible_p (other));
+ return m_val < other.m_val;
}
bool operator> (const profile_count &other) const
{
+ if (!initialized_p () || !other.initialized_p ())
+ return false;
+ if (*this == profile_count::zero ())
+ return false;
+ if (other == profile_count::zero ())
+ return !(*this == profile_count::zero ());
+ gcc_checking_assert (compatible_p (other));
return initialized_p () && other.initialized_p () && m_val > other.m_val;
}
bool operator< (const gcov_type other) const
{
+ gcc_checking_assert (ipa_p ());
gcc_checking_assert (other >= 0);
return initialized_p () && m_val < (uint64_t) other;
}
bool operator> (const gcov_type other) const
{
+ gcc_checking_assert (ipa_p ());
gcc_checking_assert (other >= 0);
return initialized_p () && m_val > (uint64_t) other;
}
bool operator<= (const profile_count &other) const
{
- return initialized_p () && other.initialized_p () && m_val <= other.m_val;
+ if (!initialized_p () || !other.initialized_p ())
+ return false;
+ if (*this == profile_count::zero ())
+ return true;
+ if (other == profile_count::zero ())
+ return (*this == profile_count::zero ());
+ gcc_checking_assert (compatible_p (other));
+ return m_val <= other.m_val;
}
bool operator>= (const profile_count &other) const
{
- return initialized_p () && other.initialized_p () && m_val >= other.m_val;
+ if (!initialized_p () || !other.initialized_p ())
+ return false;
+ if (other == profile_count::zero ())
+ return true;
+ if (*this == profile_count::zero ())
+ return !(other == profile_count::zero ());
+ gcc_checking_assert (compatible_p (other));
+ return m_val >= other.m_val;
}
bool operator<= (const gcov_type other) const
{
+ gcc_checking_assert (ipa_p ());
gcc_checking_assert (other >= 0);
return initialized_p () && m_val <= (uint64_t) other;
}
bool operator>= (const gcov_type other) const
{
+ gcc_checking_assert (ipa_p ());
gcc_checking_assert (other >= 0);
return initialized_p () && m_val >= (uint64_t) other;
}
+ /* Return true when value is not zero and can be used for scaling.
+ This is different from *this > 0 because that requires counter to
+ be IPA. */
+ bool nonzero_p () const
+ {
+ return initialized_p () && m_val != 0;
+ }
+
+ /* Make counter forcingly nonzero. */
+ profile_count force_nonzero () const
+ {
+ if (!initialized_p ())
+ return *this;
+ profile_count ret = *this;
+ if (ret.m_val == 0)
+ ret.m_val = 1;
+ return ret;
+ }
+
+ profile_count max (profile_count other) const
+ {
+ if (!initialized_p ())
+ return other;
+ if (!other.initialized_p ())
+ return *this;
+ if (*this == profile_count::zero ())
+ return other;
+ if (other == profile_count::zero ())
+ return *this;
+ gcc_checking_assert (compatible_p (other));
+ if (m_val < other.m_val || (m_val == other.m_val
+ && m_quality < other.m_quality))
+ return other;
+ return *this;
+ }
/* PROB is a probability in scale 0...REG_BR_PROB_BASE. Scale counter
accordingly. */
}
profile_count apply_scale (profile_count num, profile_count den) const
{
- if (m_val == 0)
+ if (*this == profile_count::zero ())
return *this;
- if (num.m_val == 0)
+ if (num == profile_count::zero ())
return num;
if (!initialized_p () || !num.initialized_p () || !den.initialized_p ())
return profile_count::uninitialized ();
- gcc_checking_assert (den > 0);
+ gcc_checking_assert (den.m_val);
if (num == den)
return *this;
uint64_t val;
safe_scale_64bit (m_val, num.m_val, den.m_val, &val);
ret.m_val = MIN (val, max_count);
- ret.m_quality = MIN (m_quality, profile_adjusted);
+ ret.m_quality = MIN (MIN (MIN (m_quality, profile_adjusted),
+ num.m_quality), den.m_quality);
+ if (num.ipa_p () && !ret.ipa_p ())
+ ret.m_quality = MIN (num.m_quality, profile_guessed);
+ return ret;
+ }
+
+ /* Return THIS with quality dropped to GUESSED_LOCAL. */
+ profile_count guessed_local () const
+ {
+ profile_count ret = *this;
+ if (!initialized_p ())
+ return *this;
+ ret.m_quality = profile_guessed_local;
+ return ret;
+ }
+
+ /* We know that profile is globally0 but keep local profile if present. */
+ profile_count global0 () const
+ {
+ profile_count ret = *this;
+ if (!initialized_p ())
+ return *this;
+ ret.m_quality = profile_guessed_global0;
return ret;
}
profile_count guessed () const
{
profile_count ret = *this;
- ret.m_quality = profile_guessed;
+ ret.m_quality = MIN (ret.m_quality, profile_guessed);
return ret;
}
+ /* Return variant of profile counte which is always safe to compare
+ acorss functions. */
+ profile_count ipa () const
+ {
+ if (m_quality > profile_guessed_global0)
+ return *this;
+ if (m_quality == profile_guessed_global0)
+ return profile_count::zero ();
+ return profile_count::uninitialized ();
+ }
+
/* Return THIS with quality dropped to AFDO. */
profile_count afdo () const
{
OVERALL. */
profile_probability probability_in (const profile_count overall) const
{
- if (!m_val)
+ if (*this == profile_count::zero ())
return profile_probability::never ();
if (!initialized_p () || !overall.initialized_p ()
|| !overall.m_val)
return profile_probability::uninitialized ();
profile_probability ret;
- if (overall < m_val)
+ gcc_checking_assert (compatible_p (overall));
+
+ if (overall.m_val < m_val)
ret.m_val = profile_probability::max_probability;
else
ret.m_val = RDIV (m_val * profile_probability::max_probability,
overall.m_val);
- ret.m_quality = MIN (m_quality, overall.m_quality);
+ ret.m_quality = MAX (MIN (m_quality, overall.m_quality), profile_guessed);
return ret;
}
+ int to_frequency (struct function *fun) const;
+ int to_cgraph_frequency (profile_count entry_bb_count) const;
+
/* Output THIS to F. */
void dump (FILE *f) const;
return num_edges;
}
-#define OVERLAP_BASE 10000
-
-/* Compare the static estimated profile to the actual profile, and
- return the "degree of overlap" measure between them.
-
- Degree of overlap is a number between 0 and OVERLAP_BASE. It is
- the sum of each basic block's minimum relative weights between
- two profiles. And overlap of OVERLAP_BASE means two profiles are
- identical. */
-
-static int
-compute_frequency_overlap (void)
-{
- gcov_type count_total = 0, freq_total = 0;
- int overlap = 0;
- basic_block bb;
-
- FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
- {
- count_total += bb_gcov_count (bb);
- freq_total += bb->frequency;
- }
-
- if (count_total == 0 || freq_total == 0)
- return 0;
-
- FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
- overlap += MIN (bb_gcov_count (bb) * OVERLAP_BASE / count_total,
- bb->frequency * OVERLAP_BASE / freq_total);
-
- return overlap;
-}
/* Compute the branch probabilities for the various branches.
Annotate them accordingly.
}
}
}
- if (dump_file)
- {
- int overlap = compute_frequency_overlap ();
- gimple_dump_cfg (dump_file, dump_flags);
- fprintf (dump_file, "Static profile overlap: %d.%d%%\n",
- overlap / (OVERLAP_BASE / 100),
- overlap % (OVERLAP_BASE / 100));
- }
total_num_passes += passes;
if (dump_file)
}
}
- FOR_ALL_BB_FN (bb, cfun)
- {
+ /* If we have real data, use them! */
+ if (bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun))
+ || !flag_guess_branch_prob)
+ FOR_ALL_BB_FN (bb, cfun)
bb->count = profile_count::from_gcov_type (bb_gcov_count (bb));
- }
+ /* If function was not trained, preserve local estimates including statically
+ determined zero counts. */
+ else
+ FOR_ALL_BB_FN (bb, cfun)
+ if (!(bb->count == profile_count::zero ()))
+ bb->count = bb->count.global0 ();
+
bb_gcov_counts.release ();
delete edge_gcov_counts;
edge_gcov_counts = NULL;
frequency. */
#define REG_FREQ_FROM_BB(bb) (optimize_function_for_size_p (cfun) \
? REG_FREQ_MAX \
- : ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\
- ? ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\
+ : ((bb)->count.to_frequency (cfun) \
+ * REG_FREQ_MAX / BB_FREQ_MAX) \
+ ? ((bb)->count.to_frequency (cfun) \
+ * REG_FREQ_MAX / BB_FREQ_MAX) \
: 1)
/* Indexed by N, gives number of insns in which register N dies.
basic_block bb1 = BLOCK_FOR_INSN (insn1);
basic_block bb2 = BLOCK_FOR_INSN (insn2);
- if (bb1->count > bb2->count
- || bb1->frequency > bb2->frequency)
+ if (bb1->count > bb2->count)
return -1;
- if (bb1->count < bb2->count
- || bb1->frequency < bb2->frequency)
+ if (bb1->count < bb2->count)
return 1;
return 0;
}
BB_END (old_bb) = end;
redirect_edge_succ (e, new_bb);
- new_bb->frequency = EDGE_FREQUENCY (e);
+ new_bb->count = e->count ();
e->flags |= EDGE_FALLTHRU;
e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
{
num += EDGE_FREQUENCY (e);
- den += e->src->frequency;
+ den += e->src->count.to_frequency (cfun);
}
if (den == 0)
if (dump_file)
fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
- bb->frequency = RDIV (num * bb->frequency, den);
- dup->frequency -= bb->frequency;
bb->count = bb->count.apply_scale (num, den);
dup->count -= bb->count;
}
continue;
}
- new_bb->count += e->src->count.apply_probability (e->probability);
- new_bb->frequency += EDGE_FREQUENCY (e);
+ new_bb->count += e->count ();
redirect_edge_and_branch_force (e, new_bb);
if (dump_file)
work: this does not always add up to the block frequency at
all, and even if it does, rounding error makes for bad
decisions. */
- SW (bb)->own_cost = bb->frequency;
+ SW (bb)->own_cost = bb->count.to_frequency (cfun);
edge e;
edge_iterator ei;
+2017-11-03 Jan Hubicka <hubicka@ucw.cz>
+
+ * gcc.dg/no-strict-overflow-3.c (foo): Update magic
+ value to not clash with frequency.
+ * gcc.dg/strict-overflow-3.c (foo): Likewise.
+ * gcc.dg/tree-ssa/builtin-sprintf-2.c: Update template.
+ * gcc.dg/tree-ssa/dump-2.c: Update template.
+ * gcc.dg/tree-ssa/ifc-10.c: Update template.
+ * gcc.dg/tree-ssa/ifc-11.c: Update template.
+ * gcc.dg/tree-ssa/ifc-12.c: Update template.
+ * gcc.dg/tree-ssa/ifc-20040816-1.c: Update template.
+ * gcc.dg/tree-ssa/ifc-20040816-2.c: Update template.
+ * gcc.dg/tree-ssa/ifc-5.c: Update template.
+ * gcc.dg/tree-ssa/ifc-8.c: Update template.
+ * gcc.dg/tree-ssa/ifc-9.c: Update template.
+ * gcc.dg/tree-ssa/ifc-cd.c: Update template.
+ * gcc.dg/tree-ssa/ifc-pr56541.c: Update template.
+ * gcc.dg/tree-ssa/ifc-pr68583.c: Update template.
+ * gcc.dg/tree-ssa/ifc-pr69489-1.c: Update template.
+ * gcc.dg/tree-ssa/ifc-pr69489-2.c: Update template.
+ * gcc.target/i386/pr61403.c: Update template.
+
2017-11-03 Nathan Sidwell <nathan@acm.org>
* lib/scanlang.exp: Fix error message to refer to scan-lang-dump.
int
foo (int i, int j)
{
- return i + 100 < j + 1000;
+ return i + 100 < j + 1234;
}
-/* { dg-final { scan-tree-dump "1000" "optimized" } } */
+/* { dg-final { scan-tree-dump "1234" "optimized" } } */
int
foo (int i, int j)
{
- return i + 100 < j + 1000;
+ return i + 100 < j + 1234;
}
-/* { dg-final { scan-tree-dump-not "1000" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1234" "optimized" } } */
/* Only conditional calls to must_not_eliminate must be made (with
any probability):
- { dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 127 "optimized" { target { ilp32 || lp64 } } } }
- { dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 96 "optimized" { target { { ! ilp32 } && { ! lp64 } } } } }
+ { dg-final { scan-tree-dump-times "> \\\[local count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 127 "optimized" { target { ilp32 || lp64 } } } }
+ { dg-final { scan-tree-dump-times "> \\\[local count: \[0-9INV\]*\\\]:\n *must_not_eliminate" 96 "optimized" { target { { ! ilp32 } && { ! lp64 } } } } }
No unconditional calls to abort should be made:
{ dg-final { scan-tree-dump-not ";\n *must_not_eliminate" "optimized" } } */
return 0;
}
-/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[100\\\.00%\\\] \\\[count: INV\\\]:" "optimized" } } */
+/* { dg-final { scan-tree-dump "<bb \[0-9\]> \\\[local count: 10000\\\]:" "optimized" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* Sum is wrong here, but not enough for error to be reported. */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* Sum is wrong here, but not enough for error to be reported. */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* Sum is wrong here, but not enough for error to be reported. */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* Sum is wrong here, but not enough for error to be reported. */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* Sum is wrong here, but not enough for error to be reported. */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* Sum is wrong here, but not enough for error to be reported. */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* Sum is wrong here, but not enough for error to be reported. */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 0 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming frequencies" 1 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
}
}
-/* { dg-final { scan-assembler "blend" } } */
+/* { dg-final { scan-assembler "rsqrtps" } } */
if (!best || ignore_bb_p (best->src))
return NULL;
if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
- < bb->frequency * branch_ratio_cutoff)
+ < bb->count.to_frequency (cfun) * branch_ratio_cutoff)
return NULL;
return best;
}
edge e;
if (dump_file)
- fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
+ fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun));
while ((e = find_best_predecessor (bb)) != NULL)
{
|| find_best_successor (bb2) != e)
break;
if (dump_file)
- fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
+ fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
bb = bb2;
}
if (dump_file)
- fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency);
+ fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun));
trace[i++] = bb;
/* Follow the trace in forward direction. */
|| find_best_predecessor (bb) != e)
break;
if (dump_file)
- fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
+ fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
trace[i++] = bb;
}
if (dump_file)
{
int n = count_insns (bb);
if (!ignore_bb_p (bb))
- blocks[bb->index] = heap.insert (-bb->frequency, bb);
+ blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb);
counts [bb->index] = n;
ninsns += n;
- weighted_insns += n * bb->frequency;
+ weighted_insns += n * bb->count.to_frequency (cfun);
}
if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
n = find_trace (bb, trace);
bb = trace[0];
- traced_insns += bb->frequency * counts [bb->index];
+ traced_insns += bb->count.to_frequency (cfun) * counts [bb->index];
if (blocks[bb->index])
{
heap.delete_node (blocks[bb->index]);
heap.delete_node (blocks[bb2->index]);
blocks[bb2->index] = NULL;
}
- traced_insns += bb2->frequency * counts [bb2->index];
+ traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index];
if (EDGE_COUNT (bb2->preds) > 1
&& can_duplicate_block_p (bb2)
/* We have the tendency to duplicate the loop header
/* Reconsider the original copy of block we've duplicated.
Removing the most common predecessor may make it to be
head. */
- blocks[bb2->index] = heap.insert (-bb2->frequency, bb2);
+ blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2);
if (dump_file)
fprintf (dump_file, "Duplicated %i as %i [%i]\n",
- bb2->index, copy->index, copy->frequency);
+ bb2->index, copy->index, copy->count.to_frequency (cfun));
bb2 = copy;
changed = true;
edge ef = make_edge (test_bb, join_bb, EDGE_FALSE_VALUE);
redirect_edge_pred (fallthru_edge, join_bb);
- join_bb->frequency = test_bb->frequency = transaction_bb->frequency;
join_bb->count = test_bb->count = transaction_bb->count;
ei->probability = profile_probability::always ();
ef->probability = profile_probability::unlikely ();
code_bb->count = et->count ();
- code_bb->frequency = EDGE_FREQUENCY (et);
transaction_bb = join_bb;
}
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU);
- test_bb->frequency = transaction_bb->frequency;
test_bb->count = transaction_bb->count;
ei->probability = profile_probability::always ();
edge e = make_edge (transaction_bb, test_bb, fallthru_edge->flags);
e->probability = fallthru_edge->probability;
test_bb->count = fallthru_edge->count ();
- test_bb->frequency = EDGE_FREQUENCY (e);
// Now update the edges to the inst/uninist implementations.
// For now assume that the paths are equally likely. When using HTM,
Here we take the second approach because it's slightly simpler
and because it's easy to see that it doesn't lose profile counts. */
bi_call_bb->count = profile_count::zero ();
- bi_call_bb->frequency = 0;
while (!edges.is_empty ())
{
edge_pair e = edges.pop ();
nocall_edge->probability = profile_probability::always ()
- call_edge->probability;
- unsigned int call_frequency
- = call_edge->probability.apply (src_bb->frequency);
-
bi_call_bb->count += call_edge->count ();
- bi_call_bb->frequency += call_frequency;
if (nocall_edge->dest != join_tgt_bb)
- {
- nocall_edge->dest->frequency = src_bb->frequency - call_frequency;
- }
+ nocall_edge->dest->count = src_bb->count - bi_call_bb->count;
}
if (dom_info_available_p (CDI_DOMINATORS))
tree_guess_outgoing_edge_probabilities (bb);
if (all || profile_status_for_fn (cfun) == PROFILE_READ)
bb->count = cnt;
- bb->frequency = freq;
bb = bb->next_bb;
}
if (a->loop_father == b->loop_father)
{
a->count = a->count.merge (b->count);
- a->frequency = MAX (a->frequency, b->frequency);
}
/* Merge the sequences. */
after_bb = split_edge_bb_loc (edge_in);
new_bb = create_empty_bb (after_bb);
- new_bb->frequency = EDGE_FREQUENCY (edge_in);
new_bb->count = edge_in->count ();
e = redirect_edge_and_branch (edge_in, new_bb);
bool free_region_copy = false, copying_header = false;
struct loop *loop = entry->dest->loop_father;
edge exit_copy;
- vec<basic_block> doms;
+ vec<basic_block> doms = vNULL;
edge redirected;
- int total_freq = 0, entry_freq = 0;
profile_count total_count = profile_count::uninitialized ();
profile_count entry_count = profile_count::uninitialized ();
if (entry_count > total_count)
entry_count = total_count;
}
- if (!(total_count > 0) || !(entry_count > 0))
- {
- total_freq = entry->dest->frequency;
- entry_freq = EDGE_FREQUENCY (entry);
- /* Fix up corner cases, to avoid division by zero or creation of negative
- frequencies. */
- if (total_freq == 0)
- total_freq = 1;
- else if (entry_freq > total_freq)
- entry_freq = total_freq;
- }
copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
split_edge_bb_loc (entry), update_dominance);
- if (total_count > 0 && entry_count > 0)
+ if (total_count.initialized_p () && entry_count.initialized_p ())
{
scale_bbs_frequencies_profile_count (region, n_region,
total_count - entry_count,
scale_bbs_frequencies_profile_count (region_copy, n_region, entry_count,
total_count);
}
- else
- {
- scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
- total_freq);
- scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
- }
if (copying_header)
{
struct loop *orig_loop = entry->dest->loop_father;
basic_block switch_bb, entry_bb, nentry_bb;
vec<basic_block> doms;
- int total_freq = 0, exit_freq = 0;
profile_count total_count = profile_count::uninitialized (),
exit_count = profile_count::uninitialized ();
edge exits[2], nexits[2], e;
inside. */
doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region);
- if (exit->src->count > 0)
- {
- total_count = exit->src->count;
- exit_count = exit->count ();
- /* Fix up corner cases, to avoid division by zero or creation of negative
- frequencies. */
- if (exit_count > total_count)
- exit_count = total_count;
- }
- else
- {
- total_freq = exit->src->frequency;
- exit_freq = EDGE_FREQUENCY (exit);
- /* Fix up corner cases, to avoid division by zero or creation of negative
- frequencies. */
- if (total_freq == 0)
- total_freq = 1;
- if (exit_freq > total_freq)
- exit_freq = total_freq;
- }
+ total_count = exit->src->count;
+ exit_count = exit->count ();
+ /* Fix up corner cases, to avoid division by zero or creation of negative
+ frequencies. */
+ if (exit_count > total_count)
+ exit_count = total_count;
copy_bbs (region, n_region, region_copy, exits, 2, nexits, orig_loop,
split_edge_bb_loc (exit), true);
- if (total_count.initialized_p ())
+ if (total_count.initialized_p () && exit_count.initialized_p ())
{
scale_bbs_frequencies_profile_count (region, n_region,
total_count - exit_count,
scale_bbs_frequencies_profile_count (region_copy, n_region, exit_count,
total_count);
}
- else
- {
- scale_bbs_frequencies_int (region, n_region, total_freq - exit_freq,
- total_freq);
- scale_bbs_frequencies_int (region_copy, n_region, exit_freq, total_freq);
- }
/* Create the switch block, and put the exit condition to it. */
entry_bb = entry->dest;
FIXME, this is silly. The CFG ought to become a parameter to
these helpers. */
push_cfun (dest_cfun);
- make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), entry_bb, EDGE_FALLTHRU);
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = entry_bb->count;
+ make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), entry_bb, EDGE_FALLTHRU);
if (exit_bb)
- make_edge (exit_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
+ {
+ make_single_succ_edge (exit_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
+ EXIT_BLOCK_PTR_FOR_FN (cfun)->count = exit_bb->count;
+ }
+ else
+ EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero ();
pop_cfun ();
/* Back in the original function, the SESE region has disappeared,
else if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
record->time[after_pass]
+= estimate_num_insns (gsi_stmt (i),
- &eni_time_weights) * bb->frequency;
+ &eni_time_weights) * bb->count.to_frequency (cfun);
}
}
edge e = make_edge (bb, new_bb, EDGE_TRUE_VALUE);
e->probability = prob;
new_bb->count = e->count ();
- new_bb->frequency = prob.apply (bb->frequency);
make_single_succ_edge (new_bb, fall->dest, EDGE_FALLTHRU);
/* Fix edge for split bb. */
cgraph_node *node = cgraph_node::get (current_function_decl);
profile_count num = node->count;
profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
- bool scale = num.initialized_p ()
- && (den > 0 || num == profile_count::zero ())
- && !(num == den);
+ bool scale = num.initialized_p () && den.ipa_p ()
+ && (den.nonzero_p () || num == profile_count::zero ())
+ && !(num == den.ipa ());
if (scale)
{
bb_join = e->dest;
bb_true = create_empty_bb (bb_cond);
bb_false = create_empty_bb (bb_true);
- bb_true->frequency = bb_false->frequency = bb_cond->frequency / 2;
bb_true->count = bb_false->count
= bb_cond->count.apply_probability (profile_probability::even ());
gimple_stmt_iterator gsi2;
new_bb = create_empty_bb (bb);
+ new_bb->count = bb->count;
add_bb_to_loop (new_bb, bb->loop_father);
lab = gimple_block_label (new_bb);
gsi2 = gsi_start_bb (new_bb);
later */
static basic_block
-copy_bb (copy_body_data *id, basic_block bb, int frequency_scale,
+copy_bb (copy_body_data *id, basic_block bb,
profile_count num, profile_count den)
{
gimple_stmt_iterator gsi, copy_gsi, seq_gsi;
basic_block copy_basic_block;
tree decl;
- gcov_type freq;
basic_block prev;
- bool scale = num.initialized_p ()
- && (den > 0 || num == profile_count::zero ());
+ bool scale = !num.initialized_p ()
+ || (den.nonzero_p () || num == profile_count::zero ());
/* Search for previous copied basic block. */
prev = bb->prev_bb;
copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
if (scale)
copy_basic_block->count = bb->count.apply_scale (num, den);
-
- /* We are going to rebuild frequencies from scratch. These values
- have just small importance to drive canonicalize_loop_headers. */
- freq = apply_scale ((gcov_type)bb->frequency, frequency_scale);
-
- /* We recompute frequencies after inlining, so this is quite safe. */
- if (freq > BB_FREQ_MAX)
- freq = BB_FREQ_MAX;
- copy_basic_block->frequency = freq;
+ else if (num.initialized_p ())
+ copy_basic_block->count = bb->count;
copy_gsi = gsi_start_bb (copy_basic_block);
fprintf (dump_file,
"Orig bb: %i, orig bb freq %i, new bb freq %i\n",
bb->index,
- bb->frequency,
- copy_basic_block->frequency);
+ bb->count.to_frequency (cfun),
+ copy_basic_block->count.to_frequency (cfun));
}
}
}
profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun);
- /* FIXME: When all counts are known to be zero, scaling is also meaningful.
- */
if (ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p ()
- && count.initialized_p ()
- && ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.initialized_p ())
+ && count.ipa ().initialized_p ())
{
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count =
ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count.apply_scale (count,
ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count);
}
- ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency
- = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->frequency;
- EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency =
- EXIT_BLOCK_PTR_FOR_FN (src_cfun)->frequency;
+ else
+ {
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
+ = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count;
+ EXIT_BLOCK_PTR_FOR_FN (cfun)->count
+ = EXIT_BLOCK_PTR_FOR_FN (src_cfun)->count;
+ }
if (src_cfun->eh)
init_eh_for_function ();
}
}
-/* Convert estimated frequencies into counts for NODE, scaling COUNT
- with each bb's frequency. Used when NODE has a 0-weight entry
- but we are about to inline it into a non-zero count call bb.
- See the comments for handle_missing_profiles() in predict.c for
- when this can happen for COMDATs. */
-
-void
-freqs_to_counts (struct cgraph_node *node, profile_count count)
-{
- basic_block bb;
- struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
-
- FOR_ALL_BB_FN(bb, fn)
- bb->count = count.apply_scale (bb->frequency, BB_FREQ_MAX);
-}
-
/* Make a copy of the body of FN so that it can be inserted inline in
another function. Walks FN via CFG, returns new fndecl. */
static tree
-copy_cfg_body (copy_body_data * id, profile_count count, int frequency_scale,
+copy_cfg_body (copy_body_data * id, profile_count,
basic_block entry_block_map, basic_block exit_block_map,
basic_block new_entry)
{
tree new_fndecl = NULL;
bool need_debug_cleanup = false;
int last;
- int incoming_frequency = 0;
- profile_count incoming_count = profile_count::zero ();
- profile_count num = count;
profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count;
- bool scale = num.initialized_p ()
- && (den > 0 || num == profile_count::zero ());
-
- /* This can happen for COMDAT routines that end up with 0 counts
- despite being called (see the comments for handle_missing_profiles()
- in predict.c as to why). Apply counts to the blocks in the callee
- before inlining, using the guessed edge frequencies, so that we don't
- end up with a 0-count inline body which can confuse downstream
- optimizations such as function splitting. */
- if (!(ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count > 0) && count > 0)
- {
- /* Apply the larger of the call bb count and the total incoming
- call edge count to the callee. */
- profile_count in_count = profile_count::zero ();
- struct cgraph_edge *in_edge;
- for (in_edge = id->src_node->callers; in_edge;
- in_edge = in_edge->next_caller)
- if (in_edge->count.initialized_p ())
- in_count += in_edge->count;
- freqs_to_counts (id->src_node, count > in_count ? count : in_count);
- }
+ profile_count num = entry_block_map->count;
+
+ cfun_to_copy = id->src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl);
/* Register specific tree functions. */
gimple_register_cfg_hooks ();
{
edge e;
edge_iterator ei;
+ den = profile_count::zero ();
FOR_EACH_EDGE (e, ei, new_entry->preds)
if (!e->src->aux)
- incoming_frequency += EDGE_FREQUENCY (e);
- if (scale)
- incoming_count = incoming_count.apply_scale (num, den);
- else
- incoming_count = profile_count::uninitialized ();
- incoming_frequency
- = apply_scale ((gcov_type)incoming_frequency, frequency_scale);
- ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = incoming_count;
- ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency = incoming_frequency;
+ den += e->count ();
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
}
/* Must have a CFG here at this point. */
gcc_assert (ENTRY_BLOCK_PTR_FOR_FN
(DECL_STRUCT_FUNCTION (callee_fndecl)));
- cfun_to_copy = id->src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl);
ENTRY_BLOCK_PTR_FOR_FN (cfun_to_copy)->aux = entry_block_map;
EXIT_BLOCK_PTR_FOR_FN (cfun_to_copy)->aux = exit_block_map;
FOR_EACH_BB_FN (bb, cfun_to_copy)
if (!id->blocks_to_copy || bitmap_bit_p (id->blocks_to_copy, bb->index))
{
- basic_block new_bb = copy_bb (id, bb, frequency_scale, num, den);
+ basic_block new_bb = copy_bb (id, bb, num, den);
bb->aux = new_bb;
new_bb->aux = bb;
new_bb->loop_father = entry_block_map->loop_father;
another function. */
static tree
-copy_body (copy_body_data *id, profile_count count, int frequency_scale,
+copy_body (copy_body_data *id, profile_count count,
basic_block entry_block_map, basic_block exit_block_map,
basic_block new_entry)
{
/* If this body has a CFG, walk CFG and copy. */
gcc_assert (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (fndecl)));
- body = copy_cfg_body (id, count, frequency_scale, entry_block_map, exit_block_map,
+ body = copy_cfg_body (id, count, entry_block_map, exit_block_map,
new_entry);
copy_debug_stmts (id);
a self-referential call; if we're calling ourselves, we need to
duplicate our body before altering anything. */
copy_body (id, cg_edge->callee->count,
- GCOV_COMPUTE_SCALE (cg_edge->frequency, CGRAPH_FREQ_BASE),
bb, return_block, NULL);
reset_debug_bindings (id, stmt_gsi);
}
/* Fold queued statements. */
+ counts_to_freqs ();
fold_marked_statements (last, id.statements_to_fold);
delete id.statements_to_fold;
}
/* Copy the Function's body. */
- copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE,
+ copy_body (&id, old_entry_block->count,
ENTRY_BLOCK_PTR_FOR_FN (cfun), EXIT_BLOCK_PTR_FOR_FN (cfun),
new_entry);
free_dominance_info (CDI_DOMINATORS);
free_dominance_info (CDI_POST_DOMINATORS);
+ counts_to_freqs ();
fold_marked_statements (0, id.statements_to_fold);
delete id.statements_to_fold;
delete_unreachable_blocks_update_callgraph (&id);
struct cgraph_edge *e;
rebuild_frequencies ();
- new_version_node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ new_version_node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
for (e = new_version_node->callees; e; e = e->next_callee)
{
basic_block bb = gimple_bb (e->call_stmt);
e->frequency = compute_call_stmt_bb_frequency (current_function_decl,
bb);
- e->count = bb->count;
+ e->count = bb->count.ipa ();
}
for (e = new_version_node->indirect_calls; e; e = e->next_callee)
{
basic_block bb = gimple_bb (e->call_stmt);
e->frequency = compute_call_stmt_bb_frequency (current_function_decl,
bb);
- e->count = bb->count;
+ e->count = bb->count.ipa ();
}
}
static inline int
coalesce_cost_bb (basic_block bb)
{
- return coalesce_cost (bb->frequency, optimize_bb_for_size_p (bb));
+ return coalesce_cost (bb->count.to_frequency (cfun), optimize_bb_for_size_p (bb));
}
- inner_taken->probability;
outer_to_inner->probability = profile_probability::always ();
- inner_cond_bb->frequency = outer_cond_bb->frequency;
outer2->probability = profile_probability::never ();
}
for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
it != flag_bbs->end (); ++it)
{
- freq_sum += (*it)->frequency;
+ freq_sum += (*it)->count.to_frequency (cfun);
if ((*it)->count.initialized_p ())
count_sum += (*it)->count, ncount ++;
if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
if (flag_probability.initialized_p ())
;
- else if (ncount == nbbs && count_sum > 0 && preheader->count () >= count_sum)
+ else if (ncount == nbbs
+ && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
{
flag_probability = count_sum.probability_in (preheader->count ());
if (flag_probability > cap)
flag_probability = cap;
}
- else if (freq_sum > 0 && EDGE_FREQUENCY (preheader) >= freq_sum)
- {
- flag_probability = profile_probability::from_reg_br_prob_base
- (GCOV_COMPUTE_SCALE (freq_sum, EDGE_FREQUENCY (preheader)));
- if (flag_probability > cap)
- flag_probability = cap;
- }
- else
+
+ if (!flag_probability.initialized_p ())
flag_probability = cap;
/* ?? Insert store after previous store if applicable. See note
old_dest = ex->dest;
new_bb = split_edge (ex);
then_bb = create_empty_bb (new_bb);
- then_bb->frequency = flag_probability.apply (new_bb->frequency);
then_bb->count = new_bb->count.apply_probability (flag_probability);
if (irr)
then_bb->flags = BB_IRREDUCIBLE_LOOP;
add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
latch_edge->dest->count = profile_count::zero ();
- latch_edge->dest->frequency = 0;
set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
gsi = gsi_start_bb (latch_edge->dest);
}
}
profile_count entry_count = profile_count::zero ();
- int entry_freq = 0;
edge e;
edge_iterator ei;
{
if (e->src->count.initialized_p ())
entry_count = e->src->count + e->src->count;
- entry_freq += e->src->frequency;
gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
}
profile_probability p = profile_probability::very_unlikely ();
- if (loop->header->count > 0)
- p = entry_count.probability_in (loop->header->count);
- else if (loop->header->frequency)
- p = profile_probability::probability_in_gcov_type
- (entry_freq, loop->header->frequency);
+ p = entry_count.probability_in (loop->header->count);
scale_loop_profile (loop, p, 0);
bitmap_set_bit (peeled_loops, loop->num);
return true;
static comp_cost
get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
{
- int loop_freq = data->current_loop->header->frequency;
- int bb_freq = gimple_bb (at)->frequency;
+ int loop_freq = data->current_loop->header->count.to_frequency (cfun);
+ int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
if (loop_freq != 0)
{
gcc_assert (cost.scratch <= cost.cost);
converts back. */
gcov_type new_est_niter = est_niter / factor;
+ if (est_niter == -1)
+ return -1;
+
/* Without profile feedback, loops for which we do not know a better estimate
are assumed to roll 10 times. When we unroll such loop, it appears to
roll too little, and it may even seem to be cold. To avoid this, we
freq_h = loop->header->count;
freq_e = (loop_preheader_edge (loop))->count ();
- /* Use frequency only if counts are zero. */
- if (!(freq_h > 0) && !(freq_e > 0))
- {
- freq_h = profile_count::from_gcov_type (loop->header->frequency);
- freq_e = profile_count::from_gcov_type
- (EDGE_FREQUENCY (loop_preheader_edge (loop)));
- }
- if (freq_h > 0)
+ if (freq_h.nonzero_p ())
{
/* Avoid dropping loop body profile counter to 0 because of zero count
in loop's preheader. */
.apply_scale (1, new_est_niter + 1);
rest->count += new_exit->count ();
- rest->frequency += EDGE_FREQUENCY (new_exit);
new_nonexit = single_pred_edge (loop->latch);
prob = new_nonexit->probability;
recomputing iteration bounds later in the compilation process will just
introduce random roundoff errors. */
if (!loop->any_estimate
- && loop->header->count > 0)
+ && loop->header->count.reliable_p ())
{
gcov_type nit = expected_loop_iterations_unbounded (loop);
bound = gcov_type_to_wide_int (nit);
/* Determine the probability that we skip the loop. Assume that loop has
same average number of iterations regardless outcome of guard. */
new_edge->probability = guard->probability;
- profile_count skip_count = guard->src->count > 0
+ profile_count skip_count = guard->src->count.nonzero_p ()
? guard->count ().apply_scale (pre_header->count,
guard->src->count)
: guard->count ().apply_probability (new_edge->probability);
to loop header... */
e->probability = new_edge->probability.invert ();
e->dest->count = e->count ();
- e->dest->frequency = EDGE_FREQUENCY (e);
/* ... now update profile to represent that original guard will be optimized
away ... */
/* If BEST_BB is at the same nesting level, then require it to have
significantly lower execution frequency to avoid gratutious movement. */
if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
- && best_bb->frequency < (early_bb->frequency * threshold / 100.0))
+ && best_bb->count.to_frequency (cfun)
+ < (early_bb->count.to_frequency (cfun) * threshold / 100.0))
return best_bb;
/* No better block found, so return EARLY_BB, which happens to be the
replace_block_by (basic_block bb1, basic_block bb2)
{
edge pred_edge;
- edge e1, e2;
- edge_iterator ei;
unsigned int i;
gphi *bb2_phi;
bb2->count += bb1->count;
+ /* FIXME: Fix merging of probabilities. They need to be redistributed
+ according to the relative counts of merged BBs. */
+#if 0
/* Merge the outgoing edge counts from bb1 onto bb2. */
profile_count out_sum = profile_count::zero ();
int out_freq_sum = 0;
+ edge e1, e2;
/* Recompute the edge probabilities from the new merged edge count.
Use the sum of the new merged edge counts computed above instead
out_sum += e1->count ();
out_freq_sum += EDGE_FREQUENCY (e1);
}
-
FOR_EACH_EDGE (e1, ei, bb1->succs)
{
e2 = find_edge (bb2, e1->dest);
{
e2->probability = e2->count ().probability_in (bb2->count);
}
- else if (bb1->frequency && bb2->frequency)
+ else if (bb1->count.to_frequency (cfun) && bb2->count.to_frequency (cfun))
e2->probability = e1->probability;
- else if (bb2->frequency && !bb1->frequency)
+ else if (bb2->count.to_frequency (cfun) && !bb1->count.to_frequency (cfun))
;
else if (out_freq_sum)
e2->probability = profile_probability::from_reg_br_prob_base
out_freq_sum));
out_sum += e2->count ();
}
- bb2->frequency += bb1->frequency;
- if (bb2->frequency > BB_FREQ_MAX)
- bb2->frequency = BB_FREQ_MAX;
+#endif
/* Move over any user labels from bb1 after the bb2 labels. */
gimple_stmt_iterator gsi1 = gsi_start_bb (bb1);
e->aux = NULL;
/* Zero out the profile, since the block is unreachable for now. */
- rd->dup_blocks[count]->frequency = 0;
rd->dup_blocks[count]->count = profile_count::uninitialized ();
if (duplicate_blocks)
bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
}
-/* Compute the amount of profile count/frequency coming into the jump threading
+/* Compute the amount of profile count coming into the jump threading
path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
edges that need to be ignored in the analysis. Return true if path contains
a joiner, false otherwise.
- In the non-joiner case, this is straightforward - all the counts/frequency
+ In the non-joiner case, this is straightforward - all the counts
flowing into the jump threading path should flow through the duplicated
block and out of the duplicated path.
/* Update the counts and frequencies for both an original path
edge EPATH and its duplicate EDUP. The duplicate source block
- will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
+ will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
static void
update_profile (edge epath, edge edup, profile_count path_in_count,
- profile_count path_out_count, int path_in_freq)
+ profile_count path_out_count)
{
- if (!(path_in_count > 0))
- return;
- /* First update the duplicated block's count / frequency. */
+ /* First update the duplicated block's count. */
if (edup)
{
basic_block dup_block = edup->src;
if (esucc != edup)
esucc->probability *= scale;
}
- edup->probability = edup_prob;
+ if (edup_prob.initialized_p ())
+ edup->probability = edup_prob;
- /* FIXME once freqs_to_counts is dropped re-enable this check. */
- gcc_assert (!dup_block->count.initialized_p () || 1);
- gcc_assert (dup_block->frequency == 0);
+ gcc_assert (!dup_block->count.initialized_p ());
dup_block->count = path_in_count;
- dup_block->frequency = path_in_freq;
}
+ if (path_in_count == profile_count::zero ())
+ return;
+
profile_count final_count = epath->count () - path_out_count;
- /* Now update the original block's count and frequency in the
+ /* Now update the original block's count in the
opposite manner - remove the counts/freq that will flow
into the duplicated block. Handle underflow due to precision/
rounding issues. */
epath->src->count -= path_in_count;
- epath->src->frequency -= path_in_freq;
- if (epath->src->frequency < 0)
- epath->src->frequency = 0;
/* Next update this path edge's original and duplicated counts. We know
that the duplicated path will have path_out_count flowing
out of it (in the joiner case this is the count along the duplicated path
out of the duplicated joiner). This count can then be removed from the
original path edge. */
- if (epath->src->count > 0)
- {
- edge esucc;
- edge_iterator ei;
- profile_probability epath_prob = final_count.probability_in (epath->src->count);
-
- if (epath->probability > epath_prob)
- {
- profile_probability rev_scale
- = (profile_probability::always () - epath->probability)
- / (profile_probability::always () - epath_prob);
- FOR_EACH_EDGE (esucc, ei, epath->src->succs)
- if (esucc != epath)
- esucc->probability /= rev_scale;
- }
- else if (epath->probability < epath_prob)
- {
- profile_probability scale
- = (profile_probability::always () - epath_prob)
- / (profile_probability::always () - epath->probability);
- FOR_EACH_EDGE (esucc, ei, epath->src->succs)
- if (esucc != epath)
- esucc->probability *= scale;
- }
- epath->probability = epath_prob;
- }
-}
-
-
-/* Check if the paths through RD all have estimated frequencies but zero
- profile counts. This is more accurate than checking the entry block
- for a zero profile count, since profile insanities sometimes creep in. */
-
-static bool
-estimated_freqs_path (struct redirection_data *rd)
-{
- edge e = rd->incoming_edges->e;
- vec<jump_thread_edge *> *path = THREAD_PATH (e);
- edge ein;
- edge_iterator ei;
- bool non_zero_freq = false;
- FOR_EACH_EDGE (ein, ei, e->dest->preds)
- {
- if (ein->count () > 0)
- return false;
- non_zero_freq |= ein->src->frequency != 0;
- }
-
- for (unsigned int i = 1; i < path->length (); i++)
- {
- edge epath = (*path)[i]->e;
- if (epath->src->count > 0)
- return false;
- non_zero_freq |= epath->src->frequency != 0;
- edge esucc;
- FOR_EACH_EDGE (esucc, ei, epath->src->succs)
- {
- if (esucc->count () > 0)
- return false;
- non_zero_freq |= esucc->src->frequency != 0;
- }
- }
- return non_zero_freq;
-}
-
-
-/* Invoked for routines that have guessed frequencies and no profile
- counts to record the block and edge frequencies for paths through RD
- in the profile count fields of those blocks and edges. This is because
- ssa_fix_duplicate_block_edges incrementally updates the block and
- edge counts as edges are redirected, and it is difficult to do that
- for edge frequencies which are computed on the fly from the source
- block frequency and probability. When a block frequency is updated
- its outgoing edge frequencies are affected and become difficult to
- adjust. */
-
-static void
-freqs_to_counts_path (struct redirection_data *rd)
-{
- edge e = rd->incoming_edges->e;
- vec<jump_thread_edge *> *path = THREAD_PATH (e);
- edge ein;
- edge_iterator ei;
-
- FOR_EACH_EDGE (ein, ei, e->dest->preds)
- ein->src->count = profile_count::from_gcov_type
- (ein->src->frequency * REG_BR_PROB_BASE);
- for (unsigned int i = 1; i < path->length (); i++)
- {
- edge epath = (*path)[i]->e;
- /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
- errors applying the edge probability when the frequencies are very
- small. */
- epath->src->count =
- profile_count::from_gcov_type
- (epath->src->frequency * REG_BR_PROB_BASE);
- }
-}
-
-
-/* For routines that have guessed frequencies and no profile counts, where we
- used freqs_to_counts_path to record block and edge frequencies for paths
- through RD, we clear the counts after completing all updates for RD.
- The updates in ssa_fix_duplicate_block_edges are based off the count fields,
- but the block frequencies and edge probabilities were updated as well,
- so we can simply clear the count fields. */
-
-static void
-clear_counts_path (struct redirection_data *rd)
-{
- edge e = rd->incoming_edges->e;
- vec<jump_thread_edge *> *path = THREAD_PATH (e);
- profile_count val = profile_count::uninitialized ();
- if (profile_status_for_fn (cfun) == PROFILE_READ)
- val = profile_count::zero ();
- edge ein;
+ edge esucc;
edge_iterator ei;
+ profile_probability epath_prob = final_count.probability_in (epath->src->count);
- FOR_EACH_EDGE (ein, ei, e->dest->preds)
- ein->src->count = val;
-
- /* First clear counts along original path. */
- for (unsigned int i = 1; i < path->length (); i++)
+ if (epath->probability > epath_prob)
{
- edge epath = (*path)[i]->e;
- epath->src->count = val;
+ profile_probability rev_scale
+ = (profile_probability::always () - epath->probability)
+ / (profile_probability::always () - epath_prob);
+ FOR_EACH_EDGE (esucc, ei, epath->src->succs)
+ if (esucc != epath)
+ esucc->probability /= rev_scale;
}
- /* Also need to clear the counts along duplicated path. */
- for (unsigned int i = 0; i < 2; i++)
+ else if (epath->probability < epath_prob)
{
- basic_block dup = rd->dup_blocks[i];
- if (!dup)
- continue;
- dup->count = val;
+ profile_probability scale
+ = (profile_probability::always () - epath_prob)
+ / (profile_probability::always () - epath->probability);
+ FOR_EACH_EDGE (esucc, ei, epath->src->succs)
+ if (esucc != epath)
+ esucc->probability *= scale;
}
+ if (epath_prob.initialized_p ())
+ epath->probability = epath_prob;
}
/* Wire up the outgoing edges from the duplicate blocks and
profile_count path_out_count = profile_count::zero ();
int path_in_freq = 0;
- /* This routine updates profile counts, frequencies, and probabilities
- incrementally. Since it is difficult to do the incremental updates
- using frequencies/probabilities alone, for routines without profile
- data we first take a snapshot of the existing block and edge frequencies
- by copying them into the empty profile count fields. These counts are
- then used to do the incremental updates, and cleared at the end of this
- routine. If the function is marked as having a profile, we still check
- to see if the paths through RD are using estimated frequencies because
- the routine had zero profile counts. */
- bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
- || estimated_freqs_path (rd));
- if (do_freqs_to_counts)
- freqs_to_counts_path (rd);
-
/* First determine how much profile count to move from original
path to the duplicate path. This is tricky in the presence of
a joiner (see comments for compute_path_counts), where some portion
&path_in_count, &path_out_count,
&path_in_freq);
- int cur_path_freq = path_in_freq;
for (unsigned int count = 0, i = 1; i < path->length (); i++)
{
edge epath = (*path)[i]->e;
}
}
- /* Update the counts and frequency of both the original block
+ /* Update the counts of both the original block
and path edge, and the duplicates. The path duplicate's
- incoming count and frequency are the totals for all edges
+ incoming count are the totals for all edges
incoming to this jump threading path computed earlier.
And we know that the duplicated path will have path_out_count
flowing out of it (i.e. along the duplicated path out of the
duplicated joiner). */
- update_profile (epath, e2, path_in_count, path_out_count,
- path_in_freq);
-
- /* Record the frequency flowing to the downstream duplicated
- path blocks. */
- cur_path_freq = EDGE_FREQUENCY (e2);
+ update_profile (epath, e2, path_in_count, path_out_count);
}
else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
{
if (count == 1)
single_succ_edge (rd->dup_blocks[1])->aux = NULL;
- /* Update the counts and frequency of both the original block
+ /* Update the counts of both the original block
and path edge, and the duplicates. Since we are now after
any joiner that may have existed on the path, the count
flowing along the duplicated threaded path is path_out_count.
been updated at the end of that handling to the edge frequency
along the duplicated joiner path edge. */
update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
- path_out_count, path_out_count, cur_path_freq);
+ path_out_count, path_out_count);
}
else
{
thread path (path_in_freq). If we had a joiner, it would have
been updated at the end of that handling to the edge frequency
along the duplicated joiner path edge. */
- update_profile (epath, NULL, path_out_count, path_out_count,
- cur_path_freq);
+ update_profile (epath, NULL, path_out_count, path_out_count);
}
/* Increment the index into the duplicated path when we processed
count++;
}
}
-
- /* Done with all profile and frequency updates, clear counts if they
- were copied. */
- if (do_freqs_to_counts)
- clear_counts_path (rd);
}
/* Hash table traversal callback routine to create duplicate blocks. */
struct loop *loop = entry->dest->loop_father;
edge exit_copy;
edge redirected;
- int curr_freq;
profile_count curr_count;
if (!can_copy_bbs_p (region, n_region))
the jump-thread path in order. */
curr_count = entry->count ();
- curr_freq = EDGE_FREQUENCY (entry);
for (i = 0; i < n_region; i++)
{
/* Watch inconsistent profile. */
if (curr_count > region[i]->count)
curr_count = region[i]->count;
- if (curr_freq > region[i]->frequency)
- curr_freq = region[i]->frequency;
/* Scale current BB. */
- if (region[i]->count > 0 && curr_count.initialized_p ())
+ if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
{
/* In the middle of the path we only scale the frequencies.
In last BB we need to update probabilities of outgoing edges
region[i]->count);
else
update_bb_profile_for_threading (region[i],
- curr_freq, curr_count,
+ curr_count,
exit);
scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
region_copy[i]->count);
}
- else if (region[i]->frequency)
- {
- if (i + 1 != n_region)
- scale_bbs_frequencies_int (region + i, 1,
- region[i]->frequency - curr_freq,
- region[i]->frequency);
- else
- update_bb_profile_for_threading (region[i],
- curr_freq, curr_count,
- exit);
- scale_bbs_frequencies_int (region_copy + i, 1, curr_freq,
- region_copy[i]->frequency);
- }
if (single_succ_p (bb))
{
|| region_copy[i + 1] == single_succ_edge (bb)->dest);
if (i + 1 != n_region)
{
- curr_freq = EDGE_FREQUENCY (single_succ_edge (bb));
curr_count = single_succ_edge (bb)->count ();
}
continue;
}
else
{
- curr_freq = EDGE_FREQUENCY (e);
curr_count = e->count ();
}
}
}
/* frequencies of the new BBs */
- bb1->frequency = EDGE_FREQUENCY (e01);
- bb2->frequency = EDGE_FREQUENCY (e02);
+ bb1->count = e01->count ();
+ bb2->count = e02->count ();
if (!info->default_case_nonstandard)
- bbf->frequency = EDGE_FREQUENCY (e1f) + EDGE_FREQUENCY (e2f);
+ bbf->count = e1f->count () + e2f->count ();
/* Tidy blocks that have become unreachable. */
prune_bbs (bbd, info->final_bb,
/* Subtract COUNT and FREQUENCY from the basic block and it's
outgoing edge. */
static void
-decrease_profile (basic_block bb, profile_count count, int frequency)
+decrease_profile (basic_block bb, profile_count count)
{
bb->count = bb->count - count;
- bb->frequency -= frequency;
- if (bb->frequency < 0)
- bb->frequency = 0;
if (!single_succ_p (bb))
{
gcc_assert (!EDGE_COUNT (bb->succs));
/* Number of executions of function has reduced by the tailcall. */
e = single_succ_edge (gsi_bb (t->call_gsi));
- decrease_profile (EXIT_BLOCK_PTR_FOR_FN (cfun), e->count (), EDGE_FREQUENCY (e));
- decrease_profile (ENTRY_BLOCK_PTR_FOR_FN (cfun), e->count (),
- EDGE_FREQUENCY (e));
+ decrease_profile (EXIT_BLOCK_PTR_FOR_FN (cfun), e->count ());
+ decrease_profile (ENTRY_BLOCK_PTR_FOR_FN (cfun), e->count ());
if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
- decrease_profile (e->dest, e->count (), EDGE_FREQUENCY (e));
+ decrease_profile (e->dest, e->count ());
/* Replace the call by a jump to the start of function. */
e = redirect_edge_and_branch (single_succ_edge (gsi_bb (t->call_gsi)),
/* Simply propagate profile info from guard_bb to guard_to which is
a merge point of control flow. */
- guard_to->frequency = guard_bb->frequency;
guard_to->count = guard_bb->count;
/* Scale probability of epilog loop back.
FIXME: We should avoid scaling down and back up. Profile may
gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf);
profile_count freq_h = loop->header->count, freq_e = preheader->count ();
- /* Use frequency only if counts are zero. */
- if (!(freq_h > 0) && !(freq_e > 0))
- {
- freq_h = profile_count::from_gcov_type (loop->header->frequency);
- freq_e = profile_count::from_gcov_type (EDGE_FREQUENCY (preheader));
- }
- if (freq_h > 0)
+ if (freq_h.nonzero_p ())
{
profile_probability p;
/* Avoid dropping loop body profile counter to 0 because of zero count
in loop's preheader. */
- if (!(freq_e > profile_count::from_gcov_type (1)))
- freq_e = profile_count::from_gcov_type (1);
+ if (!(freq_e == profile_count::zero ()))
+ freq_e = freq_e.force_nonzero ();
p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h);
scale_loop_frequencies (loop, p);
}
efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
/* Put STORE_BB to likely part. */
efalse->probability = profile_probability::unlikely ();
- store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse);
+ store_bb->count = efalse->count ();
make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU);
if (dom_info_available_p (CDI_DOMINATORS))
set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
vec<tree> vargs = vNULL;
size_t i, nargs;
tree lhs, rtype, ratype;
- vec<constructor_elt, va_gc> *ret_ctor_elts;
+ vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
/* Is STMT a vectorizable call? */
if (!is_gimple_call (stmt))
this edge is unlikely taken, so set up the probability accordingly. */
e = make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
e->probability = profile_probability::very_unlikely ();
+ then_bb->count = e->count ();
/* Connect 'then block' with the 'else block'. This is needed
as the ubsan routines we call in the 'then block' are not noreturn.
accordingly. */
e = make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
e->probability = profile_probability::very_unlikely ();
+ then_bb->count = e->count ();
}
else
{
e = make_edge (cond_neg_bb, then_bb, EDGE_TRUE_VALUE);
e->probability = profile_probability::very_unlikely ();
+ then_bb->count = e->count ();
cond_pos_bb = create_empty_bb (cond_bb);
add_bb_to_loop (cond_pos_bb, cond_bb->loop_father);
e = make_edge (cond_bb, cond_pos_bb, EDGE_TRUE_VALUE);
e->probability = profile_probability::even ();
+ cond_pos_bb->count = e->count ();
e = make_edge (cond_pos_bb, then_bb, EDGE_TRUE_VALUE);
e->probability = profile_probability::very_unlikely ();
check_counter (gimple *stmt, const char * name,
gcov_type *count, gcov_type *all, profile_count bb_count_d)
{
- gcov_type bb_count = bb_count_d.to_gcov_type ();
+ gcov_type bb_count = bb_count_d.ipa ().to_gcov_type ();
if (*all != bb_count || *count > *all)
{
location_t locus;
gcall *
gimple_ic (gcall *icall_stmt, struct cgraph_node *direct_call,
- profile_probability prob, profile_count count, profile_count all)
+ profile_probability prob)
{
gcall *dcall_stmt;
gassign *load_stmt;
/* Edge e_cd connects cond_bb to dcall_bb, etc; note the first letters. */
e_cd = split_block (cond_bb, cond_stmt);
dcall_bb = e_cd->dest;
- dcall_bb->count = count;
+ dcall_bb->count = cond_bb->count.apply_probability (prob);
e_di = split_block (dcall_bb, dcall_stmt);
icall_bb = e_di->dest;
- icall_bb->count = all - count;
+ icall_bb->count = cond_bb->count - dcall_bb->count;
/* Do not disturb existing EH edges from the indirect call. */
if (!stmt_ends_bb_p (icall_stmt))
if (e_ij != NULL)
{
join_bb = e_ij->dest;
- join_bb->count = all;
+ join_bb->count = cond_bb->count;
}
e_cd->flags = (e_cd->flags & ~EDGE_FALLTHRU) | EDGE_TRUE_VALUE;
count = histogram->hvalue.counters [1];
all = histogram->hvalue.counters [2];
- bb_all = gimple_bb (stmt)->count.to_gcov_type ();
+ bb_all = gimple_bb (stmt)->count.ipa ().to_gcov_type ();
/* The order of CHECK_COUNTER calls is important -
since check_counter can correct the third parameter
and we want to make count <= all <= bb_all. */
void verify_histograms (void);
void free_histograms (function *);
void stringop_block_profile (gimple *, unsigned int *, HOST_WIDE_INT *);
-gcall *gimple_ic (gcall *, struct cgraph_node *, profile_probability,
- profile_count, profile_count);
+gcall *gimple_ic (gcall *, struct cgraph_node *, profile_probability);
bool check_ic_target (gcall *, struct cgraph_node *);