From 52c76998c7109adca55106b881b9945fea860015 Mon Sep 17 00:00:00 2001 From: Paul Yuan Date: Mon, 18 Aug 2008 19:02:44 +0000 Subject: [PATCH] cgraph.c (cgraph_edge): Handle inconsistent counts when setting count_scale. 2008-08-18 Paul Yuan Vinodha Ramasamy * cgraph.c (cgraph_edge): Handle inconsistent counts when setting count_scale. * value-prof.c (check_counter): Fix the counter if flag_profile_correction is true. (tree_divmod_fixed_value_transform, tree_mod_pow2_value_transform, tree_mod_subtract_transform): Follow check_counter parameter change. * common.opt (fprofile-correction): New option. * mcf.c: New file. * profile.c (edge_info, EDGE_INFO): Moved to new file profile.h. (sum_edge_counts, is_edge_inconsistent, correct_negative_edge_counts, is_inconsistent, set_bb_counts, read_profile_edge_counts): New functions. (compute_branch_probabilities): Refactored. Invokes mcf_smooth_cfg if flag_profile_correction is set. Co-Authored-By: Vinodha Ramasamy From-SVN: r139208 --- gcc/ChangeLog | 19 +++++ gcc/Makefile.in | 7 +- gcc/cgraph.c | 9 +- gcc/common.opt | 4 + gcc/doc/invoke.texi | 10 ++- gcc/profile.c | 200 ++++++++++++++++++++++++++++++++------------ gcc/value-prof.c | 53 +++++++++--- 7 files changed, 229 insertions(+), 73 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c3ddeabbedc..e6f8549fd32 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2008-08-18 Paul Yuan + Vinodha Ramasamy + + * cgraph.c (cgraph_edge): Handle inconsistent counts when setting + count_scale. + * value-prof.c (check_counter): Fix the counter if + flag_profile_correction is true. + (tree_divmod_fixed_value_transform, tree_mod_pow2_value_transform, + tree_mod_subtract_transform): + Follow check_counter parameter change. + * common.opt (fprofile-correction): New option. + * mcf.c: New file. + * profile.c (edge_info, EDGE_INFO): Moved to new file profile.h. + (sum_edge_counts, is_edge_inconsistent, correct_negative_edge_counts, + is_inconsistent, set_bb_counts, read_profile_edge_counts): New + functions. + (compute_branch_probabilities): Refactored. Invokes mcf_smooth_cfg if + flag_profile_correction is set. + 2008-08-18 Richard Sandiford * rtlanal.c (subreg_offset_representable_p): Check HARD_REGNO_MODE_OK. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 3c6318ee694..e50c2d5600d 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1112,6 +1112,7 @@ OBJS-common = \ loop-unroll.o \ loop-unswitch.o \ lower-subreg.o \ + mcf.o \ mode-switching.o \ modulo-sched.o \ omega.o \ @@ -2717,7 +2718,9 @@ var-tracking.o : var-tracking.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ profile.o : profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) $(FUNCTION_H) \ $(TOPLEV_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h cfghooks.h \ - $(CFGLOOP_H) $(TIMEVAR_H) tree-pass.h + $(CFGLOOP_H) $(TIMEVAR_H) tree-pass.h profile.h +mcf.o : mcf.c profile.h $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h \ + $(BASIC_BLOCK_H) output.h langhooks.h $(GCOV_IO_H) $(TREE_H) tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) \ $(FUNCTION_H) $(TOPLEV_H) $(COVERAGE_H) $(TREE_H) value-prof.h $(TREE_DUMP_H) \ @@ -3213,7 +3216,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/emit-rtl.c $(srcdir)/except.c $(srcdir)/explow.c $(srcdir)/expr.c \ $(srcdir)/function.c $(srcdir)/except.h \ $(srcdir)/gcse.c $(srcdir)/integrate.c $(srcdir)/lists.c $(srcdir)/optabs.c \ - $(srcdir)/profile.c $(srcdir)/regclass.c \ + $(srcdir)/profile.c $(srcdir)/regclass.c $(srcdir)/mcf.c \ $(srcdir)/reg-stack.c $(srcdir)/cfglayout.c $(srcdir)/cfglayout.h \ $(srcdir)/sdbout.c $(srcdir)/stor-layout.c \ $(srcdir)/stringpool.c $(srcdir)/tree.c $(srcdir)/varasm.c \ diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 37ad9f1606f..a8463d40368 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -516,7 +516,7 @@ cgraph_edge (struct cgraph_node *node, gimple call_stmt) if (node->call_site_hash) return (struct cgraph_edge *) htab_find_with_hash (node->call_site_hash, call_stmt, - htab_hash_pointer (call_stmt)); + htab_hash_pointer (call_stmt)); /* This loop may turn out to be performance problem. In such case adding hashtables into call nodes with very many edges is probably best @@ -1208,7 +1208,12 @@ cgraph_clone_node (struct cgraph_node *n, gcov_type count, int freq, new_node->master_clone = n->master_clone; new_node->count = count; if (n->count) - count_scale = new_node->count * REG_BR_PROB_BASE / n->count; + { + if (new_node->count > n->count) + count_scale = REG_BR_PROB_BASE; + else + count_scale = new_node->count * REG_BR_PROB_BASE / n->count; + } else count_scale = 0; if (update_original) diff --git a/gcc/common.opt b/gcc/common.opt index bad670903e3..9fc5db3e158 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -821,6 +821,10 @@ Common Joined RejectNegative Set the top-level directory for storing the profile data. The default is 'pwd'. +fprofile-correction +Common Report Var(flag_profile_correction) +Enable correction of flow inconsistent profile data input + fprofile-generate Common Enable common options for generating profile info for profile feedback directed optimizations diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 3d034536a28..1280e4928b1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -342,7 +342,8 @@ Objective-C and Objective-C++ Dialects}. -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol -fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls @gol -fpeel-loops -fpredictive-commoning -fprefetch-loop-arrays @gol --fprofile-dir=@var{path} -fprofile-generate -fprofile-generate=@var{path} @gol +-fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol +-fprofile-generate=@var{path} @gol -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol -freciprocal-math -fregmove -frename-registers -freorder-blocks @gol -freorder-blocks-and-partition -freorder-functions @gol @@ -6369,6 +6370,13 @@ and occasionally eliminate the copy. Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}. +@item -fprofile-correction +@opindex fprofile-correction +Profiles collected using an instrumented binary for multi-threaded programs may +be inconsistent due to missed counter updates. When this option is specified, +GCC will use heuristics to correct or smooth out such inconsistencies. By +default, GCC will emit an error message when an inconsistent profile is detected. + @item -fprofile-dir=@var{path} @opindex fprofile-dir diff --git a/gcc/profile.c b/gcc/profile.c index 7489579ca27..761c8ad4b07 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -69,21 +69,11 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "tree-pass.h" +#include "profile.h" + /* Hooks for profiling. */ static struct profile_hooks* profile_hooks; -/* Additional information about the edges we need. */ -struct edge_info { - unsigned int count_valid : 1; - - /* Is on the spanning tree. */ - unsigned int on_tree : 1; - - /* Pretend this edge does not exist (it is abnormal and we've - inserted a fake to compensate). */ - unsigned int ignore : 1; -}; - struct bb_info { unsigned int count_valid : 1; @@ -92,7 +82,6 @@ struct bb_info { gcov_type pred_count; }; -#define EDGE_INFO(e) ((struct edge_info *) (e)->aux) #define BB_INFO(b) ((struct bb_info *) (b)->aux) @@ -124,7 +113,6 @@ static gcov_type * get_exec_counts (void); static basic_block find_group (basic_block); static void union_groups (basic_block, basic_block); - /* Add edge instrumentation code to the entire insn chain. F is the first insn of the chain. @@ -278,64 +266,84 @@ get_exec_counts (void) return counts; } - -/* Compute the branch probabilities for the various branches. - Annotate them accordingly. */ + +static bool +is_edge_inconsistent (VEC(edge,gc) *edges) +{ + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, edges) + { + if (!EDGE_INFO (e)->ignore) + { + if (e->count < 0) + return true; + } + } + return false; +} static void -compute_branch_probabilities (void) +correct_negative_edge_counts (void) { basic_block bb; - int i; - int num_edges = 0; - int changes; - int passes; - int hist_br_prob[20]; - int num_never_executed; - int num_branches; - gcov_type *exec_counts = get_exec_counts (); - int exec_counts_pos = 0; + edge e; + edge_iterator ei; - /* Very simple sanity checks so we catch bugs in our profiling code. */ - if (profile_info) + FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) { - if (profile_info->run_max * profile_info->runs < profile_info->sum_max) - { - error ("corrupted profile info: run_max * runs < sum_max"); - exec_counts = NULL; - } + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (e->count < 0) + e->count = 0; + } + } +} - if (profile_info->sum_all < profile_info->sum_max) - { - error ("corrupted profile info: sum_all is smaller than sum_max"); - exec_counts = NULL; - } +/* Check consistency. + Return true if inconsistency is found. */ +static bool +is_inconsistent (void) +{ + basic_block bb; + FOR_EACH_BB (bb) + { + if (is_edge_inconsistent (bb->preds)) + return true; + if (is_edge_inconsistent (bb->succs)) + return true; + if ( bb->count != sum_edge_counts (bb->preds) + || (bb->count != sum_edge_counts (bb->succs) && + !(find_edge (bb, EXIT_BLOCK_PTR) != NULL && + block_ends_with_call_p (bb)))) + return true; } - /* Attach extra info block to each bb. */ + return false; +} - alloc_aux_for_blocks (sizeof (struct bb_info)); +/* Set each basic block count to the sum of its outgoing edge counts */ +static void +set_bb_counts (void) +{ + basic_block bb; FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) { - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, bb->succs) - if (!EDGE_INFO (e)->ignore) - BB_INFO (bb)->succ_count++; - FOR_EACH_EDGE (e, ei, bb->preds) - if (!EDGE_INFO (e)->ignore) - BB_INFO (bb)->pred_count++; + bb->count = sum_edge_counts (bb->succs); + gcc_assert (bb->count >= 0); } +} - /* Avoid predicting entry on exit nodes. */ - BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2; - BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2; - +/* Reads profile data and returns total number of edge counts read */ +static int +read_profile_edge_counts (gcov_type *exec_counts) +{ + basic_block bb; + int num_edges = 0; + int exec_counts_pos = 0; /* For each edge not on the spanning tree, set its execution count from the .da file. */ - /* The first count in the .da file is the number of times that the function was entered. This is the exec_count for block zero. */ @@ -373,6 +381,63 @@ compute_branch_probabilities (void) } } + return num_edges; +} + +/* Compute the branch probabilities for the various branches. + Annotate them accordingly. */ + +static void +compute_branch_probabilities (void) +{ + basic_block bb; + int i; + int num_edges = 0; + int changes; + int passes; + int hist_br_prob[20]; + int num_never_executed; + int num_branches; + gcov_type *exec_counts = get_exec_counts (); + int inconsistent = 0; + + /* Very simple sanity checks so we catch bugs in our profiling code. */ + if (profile_info) + { + if (profile_info->run_max * profile_info->runs < profile_info->sum_max) + { + error ("corrupted profile info: run_max * runs < sum_max"); + exec_counts = NULL; + } + + if (profile_info->sum_all < profile_info->sum_max) + { + error ("corrupted profile info: sum_all is smaller than sum_max"); + exec_counts = NULL; + } + } + + /* Attach extra info block to each bb. */ + alloc_aux_for_blocks (sizeof (struct bb_info)); + FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (!EDGE_INFO (e)->ignore) + BB_INFO (bb)->succ_count++; + FOR_EACH_EDGE (e, ei, bb->preds) + if (!EDGE_INFO (e)->ignore) + BB_INFO (bb)->pred_count++; + } + + /* Avoid predicting entry on exit nodes. */ + BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2; + BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2; + + num_edges = read_profile_edge_counts (exec_counts); + if (dump_file) fprintf (dump_file, "\n%d edge counts read\n", num_edges); @@ -502,6 +567,31 @@ compute_branch_probabilities (void) gcc_assert (!BB_INFO (bb)->succ_count && !BB_INFO (bb)->pred_count); } + /* Check for inconsistent basic block counts */ + inconsistent = is_inconsistent (); + + if (inconsistent) + { + if (flag_profile_correction) + { + /* Inconsistency detected. Make it flow-consistent. */ + static int informed = 0; + if (informed == 0) + { + informed = 1; + inform ("correcting inconsistent profile data"); + } + correct_negative_edge_counts (); + /* Set bb counts to the sum of the outgoing edge counts */ + set_bb_counts (); + if (dump_file) + fprintf (dump_file, "\nCalling mcf_smooth_cfg\n"); + mcf_smooth_cfg (); + } + else + error ("corrupted profile info: profile data is not flow-consistent"); + } + /* For every edge, calculate its branch probability and add a reg_note to the branch insn to indicate this. */ diff --git a/gcc/value-prof.c b/gcc/value-prof.c index 7f776ccaf89..f3522f6dcb5 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -453,18 +453,32 @@ free_histograms (void) somehow. */ static bool -check_counter (gimple stmt, const char *name, gcov_type all, gcov_type bb_count) +check_counter (gimple stmt, const char * name, + gcov_type *count, gcov_type *all, gcov_type bb_count) { - if (all != bb_count) + if (*all != bb_count || *count > *all) { location_t locus; locus = (stmt != NULL) - ? gimple_location (stmt) - : DECL_SOURCE_LOCATION (current_function_decl); - error ("%HCorrupted value profile: %s profiler overall count (%d) " - "does not match BB count (%d)", &locus, name, (int)all, - (int)bb_count); - return true; + ? gimple_location (stmt) + : DECL_SOURCE_LOCATION (current_function_decl); + if (flag_profile_correction) + { + inform ("%HCorrecting inconsistent value profile: " + "%s profiler overall count (%d) does not match BB count " + "(%d)", &locus, name, (int)all, (int)bb_count); + *all = bb_count; + if (*count > *all) + *count = *all; + return false; + } + else + { + error ("%HCorrupted value profile: %s profiler overall count (%d) " + "does not match BB count (%d)", &locus, name, (int)all, + (int)bb_count); + return true; + } } return false; @@ -658,7 +672,7 @@ gimple_divmod_fixed_value_transform (gimple_stmt_iterator *si) || !maybe_hot_bb_p (gimple_bb (stmt))) return false; - if (check_counter (stmt, "value", all, gimple_bb (stmt)->count)) + if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count)) return false; /* Compute probability of taking the optimal path. */ @@ -818,7 +832,7 @@ gimple_mod_pow2_value_transform (gimple_stmt_iterator *si) /* Compute probability of taking the optimal path. */ all = count + wrong_values; - if (check_counter (stmt, "pow2", all, gimple_bb (stmt)->count)) + if (check_counter (stmt, "pow2", &count, &all, gimple_bb (stmt)->count)) return false; if (all > 0) @@ -982,12 +996,17 @@ gimple_mod_subtract_transform (gimple_stmt_iterator *si) count2 = histogram->hvalue.counters[1]; /* Compute probability of taking the optimal path. */ - if (check_counter (stmt, "interval", all, gimple_bb (stmt)->count)) + if (check_counter (stmt, "interval", &count1, &all, gimple_bb (stmt)->count)) { gimple_remove_histogram_value (cfun, stmt, histogram); return false; } + if (flag_profile_correction && count1 + count2 > all) + all = count1 + count2; + + gcc_assert (count1 + count2 <= all); + /* We require that we use just subtractions in at least 50% of all evaluations. */ count = 0; @@ -1160,7 +1179,7 @@ static bool gimple_ic_transform (gimple stmt) { histogram_value histogram; - gcov_type val, count, all; + gcov_type val, count, all, bb_all; gcov_type prob; tree callee; gimple modify; @@ -1186,6 +1205,14 @@ gimple_ic_transform (gimple stmt) if (4 * count <= 3 * all) return false; + bb_all = gimple_bb (stmt)->count; + /* The order of CHECK_COUNTER calls is important - + since check_counter can correct the third parameter + and we want to make count <= all <= bb_all. */ + if ( check_counter (stmt, "ic", &all, &bb_all, bb_all) + || check_counter (stmt, "ic", &count, &all, all)) + return false; + if (all > 0) prob = (count * REG_BR_PROB_BASE + all / 2) / all; else @@ -1372,7 +1399,7 @@ gimple_stringops_transform (gimple_stmt_iterator *gsi) at least 80% of time. */ if ((6 * count / 5) < all || !maybe_hot_bb_p (gimple_bb (stmt))) return false; - if (check_counter (stmt, "value", all, gimple_bb (stmt)->count)) + if (check_counter (stmt, "value", &count, &all, gimple_bb (stmt)->count)) return false; if (all > 0) prob = (count * REG_BR_PROB_BASE + all / 2) / all; -- 2.30.2