From 6d9901e75f19dfd34480c4cf25859b7d4a3bff01 Mon Sep 17 00:00:00 2001 From: Zdenek Dvorak Date: Wed, 1 Sep 2004 22:58:55 +0200 Subject: [PATCH] Makefile.in (rtl-profile.o, [...]): Add GCC_H dependency. * Makefile.in (rtl-profile.o, value-prof.o): Add GCC_H dependency. * common.opt (fspeculative-prefetching): New. * flags.h (flag_speculative_prefetching_set): Declare. * gcov-io.c (gcov_write_counter, gcov_read_counter): Allow negative values. * opts.c (flag_sepculative_prefetching_set): New variable. (common_handle_option): Handle -fspeculative-prefetching. * passes.c (rest_of_compilation): Ditto. * profile.c (instrument_values, compute_value_histograms, branch_prob): Use vectors instead of arrays. * toplev.c (process_options): Handle -fspeculative-prefetching. * rtl-profile.c: Include ggc.h. (rtl_gen_interval_profiler, rtl_gen_pow2_profiler, rtl_gen_one_value_profiler_no_edge_manipulation, rtl_gen_one_value_profiler, rtl_gen_const_delta_profiler): Type of argument changed. * tree-profile.c (tree_gen_interval_profiler, tree_gen_pow2_profiler, tree_gen_one_value_profiler, tree_gen_const_delta_profiler): Type of argument changed. * value-prof.c: Include ggc.h. (NOPREFETCH_RANGE_MIN, NOPREFETCH_RANGE_MAX): New macros. (insn_prefetch_values_to_profile, find_mem_reference_1, find_mem_reference_2, find_mem_reference, gen_speculative_prefetch, speculative_prefetching_transform): New. (value_profile_transformations): Call speculative_prefetching_transform. (insn_values_to_profile): Call insn_prefetch_values_to_profile. (insn_divmod_values_to_profile, rtl_find_values_to_profile, tree_find_values_to_profile, find_values to profile): Use vectors instead of arrays. (free_profiled_values): Removed. * value-prof.h (struct histogram_value): Renamed to struct histogram_value_t. (histogram_value, histogram_values): New types. (find_values_to_profile): Declaration changed. (free_profiled_values): Removed. (struct profile_hooks): Type of argument of the hooks changed to histogram_value. * doc/invoke.texi (-fspeculative-prefetching): Document. From-SVN: r86930 --- gcc/ChangeLog | 42 +++++ gcc/Makefile.in | 4 +- gcc/common.opt | 4 + gcc/doc/invoke.texi | 17 +- gcc/flags.h | 4 + gcc/gcov-io.c | 7 +- gcc/opts.c | 15 +- gcc/passes.c | 3 +- gcc/profile.c | 67 ++++---- gcc/rtl-profile.c | 34 ++-- gcc/toplev.c | 23 +++ gcc/tree-profile.c | 8 +- gcc/value-prof.c | 395 +++++++++++++++++++++++++++++++++++--------- gcc/value-prof.h | 33 ++-- 14 files changed, 499 insertions(+), 157 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e0912713675..04d4fc45a11 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,45 @@ +2004-09-01 Zdenek Dvorak + + * Makefile.in (rtl-profile.o, value-prof.o): Add GCC_H dependency. + * common.opt (fspeculative-prefetching): New. + * flags.h (flag_speculative_prefetching_set): Declare. + * gcov-io.c (gcov_write_counter, gcov_read_counter): Allow negative + values. + * opts.c (flag_sepculative_prefetching_set): New variable. + (common_handle_option): Handle -fspeculative-prefetching. + * passes.c (rest_of_compilation): Ditto. + * profile.c (instrument_values, compute_value_histograms, branch_prob): + Use vectors instead of arrays. + * toplev.c (process_options): Handle -fspeculative-prefetching. + * rtl-profile.c: Include ggc.h. + (rtl_gen_interval_profiler, rtl_gen_pow2_profiler, + rtl_gen_one_value_profiler_no_edge_manipulation, + rtl_gen_one_value_profiler, rtl_gen_const_delta_profiler): Type of + argument changed. + * tree-profile.c (tree_gen_interval_profiler, tree_gen_pow2_profiler, + tree_gen_one_value_profiler, tree_gen_const_delta_profiler): Type of + argument changed. + * value-prof.c: Include ggc.h. + (NOPREFETCH_RANGE_MIN, NOPREFETCH_RANGE_MAX): New + macros. + (insn_prefetch_values_to_profile, find_mem_reference_1, + find_mem_reference_2, find_mem_reference, gen_speculative_prefetch, + speculative_prefetching_transform): New. + (value_profile_transformations): Call speculative_prefetching_transform. + (insn_values_to_profile): Call insn_prefetch_values_to_profile. + (insn_divmod_values_to_profile, rtl_find_values_to_profile, + tree_find_values_to_profile, find_values to profile): Use vectors + instead of arrays. + (free_profiled_values): Removed. + * value-prof.h (struct histogram_value): Renamed to + struct histogram_value_t. + (histogram_value, histogram_values): New types. + (find_values_to_profile): Declaration changed. + (free_profiled_values): Removed. + (struct profile_hooks): Type of argument of the hooks changed to + histogram_value. + * doc/invoke.texi (-fspeculative-prefetching): Document. + 2004-09-01 Zdenek Dvorak PR rtl-optimization/16408 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 40411f69fde..dd2333cd5d9 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1959,10 +1959,10 @@ tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ tree-pass.h $(TREE_FLOW_H) $(TIMEVAR_H) rtl-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) function.h \ - toplev.h $(BASIC_BLOCK_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h + toplev.h $(BASIC_BLOCK_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h $(GGC_H) value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h $(FLAGS_H) \ - $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) + $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) $(GGC_H) loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(FLAGS_H) $(LOOP_H) \ insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \ real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h $(CFGLOOP_H) \ diff --git a/gcc/common.opt b/gcc/common.opt index 2f615f7be53..e74596470fc 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -752,6 +752,10 @@ fsingle-precision-constant Common Report Var(flag_single_precision_constant) Convert floating point constants to single precision constants +fspeculative-prefetching +Common Report Var(flag_speculative_prefetching) +Use value profiling for speculative prefetching + ; Emit code to probe the stack, to help detect stack overflow; also ; may cause large objects to be allocated dynamically. fstack-check diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 96b0fcea791..b4efeb4c2b1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -311,7 +311,7 @@ Objective-C and Objective-C++ Dialects}. -fsched-stalled-insns=@var{n} -sched-stalled-insns-dep=@var{n} @gol -fsched2-use-superblocks @gol -fsched2-use-traces -freschedule-modulo-scheduled-loops @gol --fsignaling-nans -fsingle-precision-constant @gol +-fsignaling-nans -fsingle-precision-constant -fspeculative-prefetching @gol -fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol -funroll-all-loops -funroll-loops -fpeel-loops @gol -funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol @@ -5011,6 +5011,21 @@ and actually performs the optimizations based on them. Currently the optimizations include specialization of division operation using the knowledge about the value of the denominator. +@item -fspeculative-prefetching +@opindex fspeculative-prefetching +If combined with @option{-fprofile-arcs}, it instructs the compiler to add +a code to gather information about addresses of memory references in the +program. + +With @option{-fbranch-probabilities}, it reads back the data gathered +and issues prefetch instructions according to them. In addition to the opportunities +noticed by @option{-fprefetch-loop-arrays}, it also notices more complicated +memory access patterns -- for example accesses to the data stored in linked +list whose elements are usually allocated sequentially. + +In order to prevent issuing double prefetches, usage of +@option{-fspeculative-prefetching} implies @option{-fno-prefetch-loop-arrays}. + Enabled with @option{-fprofile-generate} and @option{-fprofile-use}. @item -frename-registers diff --git a/gcc/flags.h b/gcc/flags.h index c0fcc3c256d..fb240356f7a 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -257,6 +257,10 @@ extern int flag_remove_unreachable_functions; /* Nonzero if we should track variables. */ extern int flag_var_tracking; +/* True if flag_speculative_prefetching was set by user. Used to suppress + warning message in case flag was set by -fprofile-{generate,use}. */ +extern bool flag_speculative_prefetching_set; + /* A string that's used when a random name is required. NULL means to make it really random. */ diff --git a/gcc/gcov-io.c b/gcc/gcov-io.c index 3b4dcd60265..7370f510d37 100644 --- a/gcc/gcov-io.c +++ b/gcc/gcov-io.c @@ -268,9 +268,6 @@ gcov_write_counter (gcov_type value) buffer[1] = (gcov_unsigned_t) (value >> 32); else buffer[1] = 0; - - if (value < 0) - gcov_var.error = -1; } #endif /* IN_LIBGCOV */ @@ -453,9 +450,7 @@ gcov_read_counter (void) value |= ((gcov_type) from_file (buffer[1])) << 32; else if (buffer[1]) gcov_var.error = -1; - - if (value < 0) - gcov_var.error = -1; + return value; } diff --git a/gcc/opts.c b/gcc/opts.c index b802bcf35a2..3c498273512 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -93,6 +93,7 @@ static const char undocumented_msg[] = N_("This switch lacks documentation"); static bool profile_arc_flag_set, flag_profile_values_set; static bool flag_unroll_loops_set, flag_tracer_set; static bool flag_value_profile_transformations_set; +bool flag_speculative_prefetching_set; static bool flag_peel_loops_set, flag_branch_probabilities_set; /* Input file names. */ @@ -830,6 +831,10 @@ common_handle_option (size_t scode, const char *arg, int value) flag_tracer = value; if (!flag_value_profile_transformations_set) flag_value_profile_transformations = value; +#ifdef HAVE_prefetch + if (!flag_speculative_prefetching_set) + flag_speculative_prefetching = value; +#endif break; case OPT_fprofile_generate: @@ -839,6 +844,10 @@ common_handle_option (size_t scode, const char *arg, int value) flag_profile_values = value; if (!flag_value_profile_transformations_set) flag_value_profile_transformations = value; +#ifdef HAVE_prefetch + if (!flag_speculative_prefetching_set) + flag_speculative_prefetching = value; +#endif break; case OPT_fprofile_values: @@ -861,7 +870,11 @@ common_handle_option (size_t scode, const char *arg, int value) break; case OPT_fvpt: - flag_value_profile_transformations_set = value; + flag_value_profile_transformations_set = true; + break; + + case OPT_fspeculative_prefetching: + flag_speculative_prefetching_set = true; break; case OPT_frandom_seed: diff --git a/gcc/passes.c b/gcc/passes.c index 06c2d8990f7..783e33d8bbc 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -1820,7 +1820,8 @@ rest_of_compilation (void) if (flag_branch_probabilities && flag_profile_values - && flag_value_profile_transformations) + && (flag_value_profile_transformations + || flag_speculative_prefetching)) rest_of_handle_value_profile_transformations (); /* Remove the death notes created for vpt. */ diff --git a/gcc/profile.c b/gcc/profile.c index 002e7a1b694..2200e76399d 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -119,9 +119,9 @@ static int total_num_branches; /* Forward declarations. */ static void find_spanning_tree (struct edge_list *); static unsigned instrument_edges (struct edge_list *); -static void instrument_values (unsigned, struct histogram_value *); +static void instrument_values (histogram_values); static void compute_branch_probabilities (void); -static void compute_value_histograms (unsigned, struct histogram_value *); +static void compute_value_histograms (histogram_values); static gcov_type * get_exec_counts (void); static basic_block find_group (basic_block); static void union_groups (basic_block, basic_block); @@ -166,17 +166,18 @@ instrument_edges (struct edge_list *el) return num_instr_edges; } -/* Add code to measure histograms list of VALUES of length N_VALUES. */ +/* Add code to measure histograms for values in list VALUES. */ static void -instrument_values (unsigned n_values, struct histogram_value *values) +instrument_values (histogram_values values) { unsigned i, t; /* Emit code to generate the histograms before the insns. */ - for (i = 0; i < n_values; i++) + for (i = 0; i < VEC_length (histogram_value, values); i++) { - switch (values[i].type) + histogram_value hist = VEC_index (histogram_value, values, i); + switch (hist->type) { case HIST_TYPE_INTERVAL: t = GCOV_COUNTER_V_INTERVAL; @@ -197,25 +198,25 @@ instrument_values (unsigned n_values, struct histogram_value *values) default: abort (); } - if (!coverage_counter_alloc (t, values[i].n_counters)) + if (!coverage_counter_alloc (t, hist->n_counters)) continue; - switch (values[i].type) + switch (hist->type) { case HIST_TYPE_INTERVAL: - (profile_hooks->gen_interval_profiler) (values + i, t, 0); + (profile_hooks->gen_interval_profiler) (hist, t, 0); break; case HIST_TYPE_POW2: - (profile_hooks->gen_pow2_profiler) (values + i, t, 0); + (profile_hooks->gen_pow2_profiler) (hist, t, 0); break; case HIST_TYPE_SINGLE_VALUE: - (profile_hooks->gen_one_value_profiler) (values + i, t, 0); + (profile_hooks->gen_one_value_profiler) (hist, t, 0); break; case HIST_TYPE_CONST_DELTA: - (profile_hooks->gen_const_delta_profiler) (values + i, t, 0); + (profile_hooks->gen_const_delta_profiler) (hist, t, 0); break; default: @@ -613,22 +614,27 @@ compute_branch_probabilities (void) free_aux_for_blocks (); } -/* Load value histograms for N_VALUES values whose description is stored - in VALUES array from .da file. */ +/* Load value histograms values whose description is stored in VALUES array + from .da file. */ + static void -compute_value_histograms (unsigned n_values, struct histogram_value *values) +compute_value_histograms (histogram_values values) { unsigned i, j, t, any; unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS]; gcov_type *histogram_counts[GCOV_N_VALUE_COUNTERS]; gcov_type *act_count[GCOV_N_VALUE_COUNTERS]; gcov_type *aact_count; + histogram_value hist; for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++) n_histogram_counters[t] = 0; - for (i = 0; i < n_values; i++) - n_histogram_counters[(int) (values[i].type)] += values[i].n_counters; + for (i = 0; i < VEC_length (histogram_value, values); i++) + { + hist = VEC_index (histogram_value, values, i); + n_histogram_counters[(int) hist->type] += hist->n_counters; + } any = 0; for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++) @@ -649,25 +655,27 @@ compute_value_histograms (unsigned n_values, struct histogram_value *values) if (!any) return; - for (i = 0; i < n_values; i++) + for (i = 0; i < VEC_length (histogram_value, values); i++) { rtx hist_list = NULL_RTX; - t = (int) (values[i].type); + + hist = VEC_index (histogram_value, values, i); + t = (int) hist->type; /* FIXME: make this work for trees. */ if (!ir_type ()) { aact_count = act_count[t]; - act_count[t] += values[i].n_counters; - for (j = values[i].n_counters; j > 0; j--) + act_count[t] += hist->n_counters; + for (j = hist->n_counters; j > 0; j--) hist_list = alloc_EXPR_LIST (0, GEN_INT (aact_count[j - 1]), hist_list); hist_list = alloc_EXPR_LIST (0, - copy_rtx ((rtx)values[i].value), hist_list); - hist_list = alloc_EXPR_LIST (0, GEN_INT (values[i].type), hist_list); - REG_NOTES ((rtx)values[i].insn) = + copy_rtx ((rtx) hist->value), hist_list); + hist_list = alloc_EXPR_LIST (0, GEN_INT (hist->type), hist_list); + REG_NOTES ((rtx) hist->insn) = alloc_EXPR_LIST (REG_VALUE_PROFILE, hist_list, - REG_NOTES ((rtx)values[i].insn)); + REG_NOTES ((rtx) hist->insn)); } } @@ -700,8 +708,7 @@ branch_prob (void) unsigned num_edges, ignored_edges; unsigned num_instrumented; struct edge_list *el; - unsigned n_values = 0; - struct histogram_value *values = NULL; + histogram_values values = NULL; total_num_times_called++; @@ -960,13 +967,13 @@ branch_prob (void) #undef BB_TO_GCOV_INDEX if (flag_profile_values) - find_values_to_profile (&n_values, &values); + find_values_to_profile (&values); if (flag_branch_probabilities) { compute_branch_probabilities (); if (flag_profile_values) - compute_value_histograms (n_values, values); + compute_value_histograms (values); } remove_fake_edges (); @@ -981,7 +988,7 @@ branch_prob (void) abort (); if (flag_profile_values) - instrument_values (n_values, values); + instrument_values (values); /* Commit changes done by instrumentation. */ if (ir_type ()) diff --git a/gcc/rtl-profile.c b/gcc/rtl-profile.c index a53a00464a9..2d0c69ccb91 100644 --- a/gcc/rtl-profile.c +++ b/gcc/rtl-profile.c @@ -62,6 +62,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "coverage.h" #include "value-prof.h" #include "tree.h" +#include "ggc.h" /* Output instructions as RTL to increment the edge execution count. */ @@ -93,8 +94,7 @@ rtl_gen_edge_profiler (int edgeno, edge e) section for counters, BASE is offset of the counter position. */ static void -rtl_gen_interval_profiler (struct histogram_value *value, unsigned tag, - unsigned base) +rtl_gen_interval_profiler (histogram_value value, unsigned tag, unsigned base) { unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1); enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0); @@ -196,8 +196,7 @@ rtl_gen_interval_profiler (struct histogram_value *value, unsigned tag, section for counters, BASE is offset of the counter position. */ static void -rtl_gen_pow2_profiler (struct histogram_value *value, unsigned tag, - unsigned base) +rtl_gen_pow2_profiler (histogram_value value, unsigned tag, unsigned base) { unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1); enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0); @@ -272,8 +271,8 @@ rtl_gen_pow2_profiler (struct histogram_value *value, unsigned tag, section for counters, BASE is offset of the counter position. */ static rtx -rtl_gen_one_value_profiler_no_edge_manipulation (struct histogram_value *value, - unsigned tag, unsigned base) +rtl_gen_one_value_profiler_no_edge_manipulation (histogram_value value, + unsigned tag, unsigned base) { unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1); enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0); @@ -351,8 +350,7 @@ rtl_gen_one_value_profiler_no_edge_manipulation (struct histogram_value *value, section for counters, BASE is offset of the counter position. */ static void -rtl_gen_one_value_profiler (struct histogram_value *value, unsigned tag, - unsigned base) +rtl_gen_one_value_profiler (histogram_value value, unsigned tag, unsigned base) { edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn), PREV_INSN ((rtx)value->insn)); @@ -368,10 +366,9 @@ rtl_gen_one_value_profiler (struct histogram_value *value, unsigned tag, section for counters, BASE is offset of the counter position. */ static void -rtl_gen_const_delta_profiler (struct histogram_value *value, unsigned tag, - unsigned base) +rtl_gen_const_delta_profiler (histogram_value value, unsigned tag, unsigned base) { - struct histogram_value one_value_delta; + histogram_value one_value_delta; unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1); enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0); rtx stored_value_ref, stored_value, tmp, uval; @@ -393,13 +390,14 @@ rtl_gen_const_delta_profiler (struct histogram_value *value, unsigned tag, copy_rtx (uval), copy_rtx (stored_value), NULL_RTX, 0, OPTAB_WIDEN); - one_value_delta.value = tmp; - one_value_delta.mode = mode; - one_value_delta.seq = NULL_RTX; - one_value_delta.insn = value->insn; - one_value_delta.type = HIST_TYPE_SINGLE_VALUE; - emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (&one_value_delta, - tag, base + 1)); + one_value_delta = ggc_alloc (sizeof (*one_value_delta)); + one_value_delta->value = tmp; + one_value_delta->mode = mode; + one_value_delta->seq = NULL_RTX; + one_value_delta->insn = value->insn; + one_value_delta->type = HIST_TYPE_SINGLE_VALUE; + emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (one_value_delta, + tag, base + 1)); emit_move_insn (copy_rtx (stored_value), uval); sequence = get_insns (); end_sequence (); diff --git a/gcc/toplev.c b/gcc/toplev.c index 34db3bd4a76..853f17048c0 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1730,6 +1730,17 @@ process_options (void) if (flag_value_profile_transformations) flag_profile_values = 1; + /* Speculative prefetching implies the value profiling. We also switch off + the prefetching in the loop optimizer, so that we do not emit double + prefetches. TODO -- we should teach these two to cooperate; the loop + based prefetching may sometimes do a better job, especially in connection + with reuse analysis. */ + if (flag_speculative_prefetching) + { + flag_profile_values = 1; + flag_prefetch_loop_arrays = 0; + } + /* Warn about options that are not supported on this machine. */ #ifndef INSN_SCHEDULING if (flag_schedule_insns || flag_schedule_insns_after_reload) @@ -1898,12 +1909,24 @@ process_options (void) warning ("-fprefetch-loop-arrays not supported for this target"); flag_prefetch_loop_arrays = 0; } + if (flag_speculative_prefetching) + { + if (flag_speculative_prefetching_set) + WARNIng ("-fspeculative-prefetching not supported for this target"); + flag_speculative_prefetching = 0; + } #else if (flag_prefetch_loop_arrays && !HAVE_prefetch) { warning ("-fprefetch-loop-arrays not supported for this target (try -march switches)"); flag_prefetch_loop_arrays = 0; } + if (flag_speculative_prefetching && !HAVE_prefetch) + { + if (flag_speculative_prefetching_set) + warning ("-fspeculative-prefetching not supported for this target (try -march switches)"); + flag_speculative_prefetching = 0; + } #endif /* This combination of options isn't handled for i386 targets and doesn't diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c index 1a7b744f3a7..7f184152cb7 100644 --- a/gcc/tree-profile.c +++ b/gcc/tree-profile.c @@ -94,7 +94,7 @@ tree_gen_edge_profiler (int edgeno, edge e) tag of the section for counters, BASE is offset of the counter position. */ static void -tree_gen_interval_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, +tree_gen_interval_profiler (histogram_value value ATTRIBUTE_UNUSED, unsigned tag ATTRIBUTE_UNUSED, unsigned base ATTRIBUTE_UNUSED) { @@ -107,7 +107,7 @@ tree_gen_interval_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, of the section for counters, BASE is offset of the counter position. */ static void -tree_gen_pow2_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, +tree_gen_pow2_profiler (histogram_value value ATTRIBUTE_UNUSED, unsigned tag ATTRIBUTE_UNUSED, unsigned base ATTRIBUTE_UNUSED) { @@ -120,7 +120,7 @@ tree_gen_pow2_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, section for counters, BASE is offset of the counter position. */ static void -tree_gen_one_value_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, +tree_gen_one_value_profiler (histogram_value value ATTRIBUTE_UNUSED, unsigned tag ATTRIBUTE_UNUSED, unsigned base ATTRIBUTE_UNUSED) { @@ -134,7 +134,7 @@ tree_gen_one_value_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, section for counters, BASE is offset of the counter position. */ static void -tree_gen_const_delta_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, +tree_gen_const_delta_profiler (histogram_value value ATTRIBUTE_UNUSED, unsigned tag ATTRIBUTE_UNUSED, unsigned base ATTRIBUTE_UNUSED) { diff --git a/gcc/value-prof.c b/gcc/value-prof.c index 17f78f6cbbd..a01c1c9b2da 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -33,11 +33,20 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "recog.h" #include "optabs.h" #include "regs.h" +#include "ggc.h" static struct value_prof_hooks *value_prof_hooks; -/* In this file value profile based optimizations will be placed (none are - here just now, but they are hopefully coming soon). +/* In this file value profile based optimizations are placed. Currently the + following optimizations are implemented (for more detailed descriptions + see comments at value_profile_transformations): + + 1) Division/modulo specialisation. Provided that we can determine that the + operands of the division have some special properties, we may use it to + produce more effective code. + 2) Speculative prefetching. If we are able to determine that the difference + between addresses accessed by a memory reference is usually constant, we + may add the prefetch instructions. Every such optimization should add its requirements for profiled values to insn_values_to_profile function. This function is called from branch_prob @@ -52,35 +61,51 @@ static struct value_prof_hooks *value_prof_hooks; -- the expression that is profiled -- list of counters starting from the first one. */ -static void insn_divmod_values_to_profile (rtx, unsigned *, - struct histogram_value **); -static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **); +/* For speculative prefetching, the range in that we do not prefetch (because + we assume that it will be in cache anyway). The assymetry between min and + max range is trying to reflect the fact that the sequential prefetching + of the data is commonly done directly by hardware. Nevertheless, these + values are just a guess and should of course be target-specific. */ + +#ifndef NOPREFETCH_RANGE_MIN +#define NOPREFETCH_RANGE_MIN (-16) +#endif +#ifndef NOPREFETCH_RANGE_MAX +#define NOPREFETCH_RANGE_MAX 32 +#endif + +static void insn_divmod_values_to_profile (rtx, histogram_values *); +#ifdef HAVE_prefetch +static bool insn_prefetch_values_to_profile (rtx, histogram_values *); +static int find_mem_reference_1 (rtx *, void *); +static void find_mem_reference_2 (rtx, rtx, void *); +static bool find_mem_reference (rtx, rtx *, int *); +#endif + +static void insn_values_to_profile (rtx, histogram_values *); static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx, rtx, gcov_type, int); static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int); static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int, int, int); +#ifdef HAVE_prefetch +static rtx gen_speculative_prefetch (rtx, gcov_type, int); +#endif static bool divmod_fixed_value_transform (rtx insn); static bool mod_pow2_value_transform (rtx); static bool mod_subtract_transform (rtx); +#ifdef HAVE_prefetch +static bool speculative_prefetching_transform (rtx); +#endif -/* Release the list of VALUES of length N_VALUES for that we want to measure - histograms. */ -void -free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED, - struct histogram_value *values) -{ - free (values); -} - /* Find values inside INSN for that we want to measure histograms for - division/modulo optimization. */ + division/modulo optimization and stores them to VALUES. */ static void -insn_divmod_values_to_profile (rtx insn, unsigned *n_values, - struct histogram_value **values) +insn_divmod_values_to_profile (rtx insn, histogram_values *values) { rtx set, set_src, op1, op2; enum machine_mode mode; + histogram_value hist; if (!INSN_P (insn)) return; @@ -108,30 +133,26 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values, /* Check for a special case where the divisor is power of 2. */ if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2)) { - *values = xrealloc (*values, - (*n_values + 1) - * sizeof (struct histogram_value)); - (*values)[*n_values].value = op2; - (*values)[*n_values].seq = NULL_RTX; - (*values)[*n_values].mode = mode; - (*values)[*n_values].insn = insn; - (*values)[*n_values].type = HIST_TYPE_POW2; - (*values)[*n_values].hdata.pow2.may_be_other = 1; - (*n_values)++; + hist = ggc_alloc (sizeof (*hist)); + hist->value = op2; + hist->seq = NULL_RTX; + hist->mode = mode; + hist->insn = insn; + hist->type = HIST_TYPE_POW2; + hist->hdata.pow2.may_be_other = 1; + VEC_safe_push (histogram_value, *values, hist); } /* Check whether the divisor is not in fact a constant. */ if (!CONSTANT_P (op2)) { - *values = xrealloc (*values, - (*n_values + 1) - * sizeof (struct histogram_value)); - (*values)[*n_values].value = op2; - (*values)[*n_values].mode = mode; - (*values)[*n_values].seq = NULL_RTX; - (*values)[*n_values].insn = insn; - (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE; - (*n_values)++; + hist = ggc_alloc (sizeof (*hist)); + hist->value = op2; + hist->mode = mode; + hist->seq = NULL_RTX; + hist->insn = insn; + hist->type = HIST_TYPE_SINGLE_VALUE; + VEC_safe_push (histogram_value, *values, hist); } /* For mod, check whether it is not often a noop (or replaceable by @@ -140,22 +161,20 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values, { rtx tmp; - *values = xrealloc (*values, - (*n_values + 1) - * sizeof (struct histogram_value)); + hist = ggc_alloc (sizeof (*hist)); start_sequence (); tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2)); - (*values)[*n_values].value = force_operand (tmp, NULL_RTX); - (*values)[*n_values].seq = get_insns (); + hist->value = force_operand (tmp, NULL_RTX); + hist->seq = get_insns (); end_sequence (); - (*values)[*n_values].mode = mode; - (*values)[*n_values].insn = insn; - (*values)[*n_values].type = HIST_TYPE_INTERVAL; - (*values)[*n_values].hdata.intvl.int_start = 0; - (*values)[*n_values].hdata.intvl.steps = 2; - (*values)[*n_values].hdata.intvl.may_be_less = 1; - (*values)[*n_values].hdata.intvl.may_be_more = 1; - (*n_values)++; + hist->mode = mode; + hist->insn = insn; + hist->type = HIST_TYPE_INTERVAL; + hist->hdata.intvl.int_start = 0; + hist->hdata.intvl.steps = 2; + hist->hdata.intvl.may_be_less = 1; + hist->hdata.intvl.may_be_more = 1; + VEC_safe_push (histogram_value, *values, hist); } return; @@ -164,72 +183,162 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values, } } +#ifdef HAVE_prefetch + +/* Called from find_mem_reference through for_each_rtx, finds a memory + reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored + to *RET and the traversing of the expression is interrupted by returning 1. + Otherwise 0 is returned. */ + +static int +find_mem_reference_1 (rtx *expr, void *ret) +{ + rtx *mem = ret; + + if (GET_CODE (*expr) == MEM) + { + *mem = *expr; + return 1; + } + return 0; +} + +/* Called form find_mem_reference through note_stores to find out whether + the memory reference MEM is a store. I.e. if EXPR == MEM, the variable + FMR2_WRITE is set to true. */ + +static int fmr2_write; +static void +find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem) +{ + if (expr == mem) + fmr2_write = true; +} + +/* Find a memory reference inside INSN, return it in MEM. Set WRITE to true + if it is a write of the mem. Return false if no memory reference is found, + true otherwise. */ + +static bool +find_mem_reference (rtx insn, rtx *mem, int *write) +{ + *mem = NULL_RTX; + for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem); + + if (!*mem) + return false; + + fmr2_write = false; + note_stores (PATTERN (insn), find_mem_reference_2, *mem); + *write = fmr2_write; + return true; +} + +/* Find values inside INSN for that we want to measure histograms for + a speculative prefetching. Add them to the list VALUES. + Returns true if such we found any such value, false otherwise. */ + +static bool +insn_prefetch_values_to_profile (rtx insn, histogram_values *values) +{ + rtx mem, address; + int write; + histogram_value hist; + + if (!INSN_P (insn)) + return false; + + if (!find_mem_reference (insn, &mem, &write)) + return false; + + address = XEXP (mem, 0); + if (side_effects_p (address)) + return false; + + if (CONSTANT_P (address)) + return false; + + hist = ggc_alloc (sizeof (*hist)); + hist->value = address; + hist->mode = GET_MODE (address); + hist->seq = NULL_RTX; + hist->insn = insn; + hist->type = HIST_TYPE_CONST_DELTA; + VEC_safe_push (histogram_value, *values, hist); + + return true; +} +#endif /* Find values inside INSN for that we want to measure histograms and adds them to list VALUES (increasing the record of its length in N_VALUES). */ static void -insn_values_to_profile (rtx insn, - unsigned *n_values, - struct histogram_value **values) +insn_values_to_profile (rtx insn, histogram_values *values) { if (flag_value_profile_transformations) - insn_divmod_values_to_profile (insn, n_values, values); + insn_divmod_values_to_profile (insn, values); + +#ifdef HAVE_prefetch + if (flag_speculative_prefetching) + insn_prefetch_values_to_profile (insn, values); +#endif } /* Find list of values for that we want to measure histograms. */ static void -rtl_find_values_to_profile (unsigned *n_values, struct histogram_value **values) +rtl_find_values_to_profile (histogram_values *values) { rtx insn; unsigned i; life_analysis (NULL, PROP_DEATH_NOTES); - *n_values = 0; - *values = NULL; + *values = VEC_alloc (histogram_value, 0); for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - insn_values_to_profile (insn, n_values, values); + insn_values_to_profile (insn, values); - for (i = 0; i < *n_values; i++) + for (i = 0; i < VEC_length (histogram_value, *values); i++) { - switch ((*values)[i].type) + histogram_value hist = VEC_index (histogram_value, *values, i); + + switch (hist->type) { case HIST_TYPE_INTERVAL: if (dump_file) fprintf (dump_file, "Interval counter for insn %d, range %d -- %d.\n", - INSN_UID ((rtx)(*values)[i].insn), - (*values)[i].hdata.intvl.int_start, - ((*values)[i].hdata.intvl.int_start - + (*values)[i].hdata.intvl.steps - 1)); - (*values)[i].n_counters = (*values)[i].hdata.intvl.steps + - ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) + - ((*values)[i].hdata.intvl.may_be_more ? 1 : 0); + INSN_UID ((rtx)hist->insn), + hist->hdata.intvl.int_start, + (hist->hdata.intvl.int_start + + hist->hdata.intvl.steps - 1)); + hist->n_counters = hist->hdata.intvl.steps + + (hist->hdata.intvl.may_be_less ? 1 : 0) + + (hist->hdata.intvl.may_be_more ? 1 : 0); break; case HIST_TYPE_POW2: if (dump_file) fprintf (dump_file, "Pow2 counter for insn %d.\n", - INSN_UID ((rtx)(*values)[i].insn)); - (*values)[i].n_counters - = GET_MODE_BITSIZE ((*values)[i].mode) - + ((*values)[i].hdata.pow2.may_be_other ? 1 : 0); + INSN_UID ((rtx)hist->insn)); + hist->n_counters + = GET_MODE_BITSIZE (hist->mode) + + (hist->hdata.pow2.may_be_other ? 1 : 0); break; case HIST_TYPE_SINGLE_VALUE: if (dump_file) fprintf (dump_file, "Single value counter for insn %d.\n", - INSN_UID ((rtx)(*values)[i].insn)); - (*values)[i].n_counters = 3; + INSN_UID ((rtx)hist->insn)); + hist->n_counters = 3; break; case HIST_TYPE_CONST_DELTA: if (dump_file) fprintf (dump_file, "Constant delta counter for insn %d.\n", - INSN_UID ((rtx)(*values)[i].insn)); - (*values)[i].n_counters = 4; + INSN_UID ((rtx)hist->insn)); + hist->n_counters = 4; break; default: @@ -300,6 +409,23 @@ rtl_find_values_to_profile (unsigned *n_values, struct histogram_value **values) It would be possible to continue analogically for K * b for other small K's, but it is probably not useful. + 5) + + Read or write of mem[address], where the value of address changes usually + by a constant C != 0 between the following accesses to the computation; with + -fspeculative-prefetching we then add a prefetch of address + C before + the insn. This handles prefetching of several interesting cases in addition + to a simple prefetching for addresses that are induction variables, e. g. + linked lists allocated sequentially (even in case they are processed + recursively). + + TODO -- we should also check whether there is not (usually) a small + difference with the adjacent memory references, so that we do + not issue overlapping prefetches. Also we should employ some + heuristics to eliminate cases where prefetching evidently spoils + the code. + -- it should somehow cooperate with the loop optimizer prefetching + TODO: There are other useful cases that could be handled by a similar mechanism, @@ -353,6 +479,11 @@ rtl_value_profile_transformations (void) || divmod_fixed_value_transform (insn) || mod_pow2_value_transform (insn))) changed = true; +#ifdef HAVE_prefetch + if (flag_speculative_prefetching + && speculative_prefetching_transform (insn)) + changed = true; +#endif } if (changed) @@ -754,12 +885,118 @@ mod_subtract_transform (rtx insn) return true; } + +#ifdef HAVE_prefetch +/* Generate code for transformation 5 for mem with ADDRESS and a constant + step DELTA. WRITE is true if the reference is a store to mem. */ + +static rtx +gen_speculative_prefetch (rtx address, gcov_type delta, int write) +{ + rtx tmp; + rtx sequence; + + /* TODO: we do the prefetching for just one iteration ahead, which + often is not enough. */ + start_sequence (); + if (offsettable_address_p (0, VOIDmode, address)) + tmp = plus_constant (copy_rtx (address), delta); + else + { + tmp = simplify_gen_binary (PLUS, Pmode, + copy_rtx (address), GEN_INT (delta)); + tmp = force_operand (tmp, NULL); + } + if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate) + (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode)) + tmp = force_reg (Pmode, tmp); + emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3))); + sequence = get_insns (); + end_sequence (); + + return sequence; +} + +/* Do transform 5) on INSN if applicable. */ + +static bool +speculative_prefetching_transform (rtx insn) +{ + rtx histogram, value; + gcov_type val, count, all; + edge e; + rtx mem, address; + int write; + + if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn))) + return false; + + if (!find_mem_reference (insn, &mem, &write)) + return false; + + address = XEXP (mem, 0); + if (side_effects_p (address)) + return false; + + if (CONSTANT_P (address)) + return false; + + for (histogram = REG_NOTES (insn); + histogram; + histogram = XEXP (histogram, 1)) + if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE + && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA)) + break; + + if (!histogram) + return false; + + histogram = XEXP (XEXP (histogram, 0), 1); + value = XEXP (histogram, 0); + histogram = XEXP (histogram, 1); + /* Skip last value referenced. */ + histogram = XEXP (histogram, 1); + val = INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + count = INTVAL (XEXP (histogram, 0)); + histogram = XEXP (histogram, 1); + all = INTVAL (XEXP (histogram, 0)); + + /* With that few executions we do not really have a reason to optimize the + statement, and more importantly, the data about differences of addresses + are spoiled by the first item that had no previous value to compare + with. */ + if (all < 4) + return false; + + /* We require that count is at least half of all; this means + that for the transformation to fire the value must be constant + at least 50% of time (and 75% gives the garantee of usage). */ + if (!rtx_equal_p (address, value) || 2 * count < all) + return false; + + /* If the difference is too small, it does not make too much sense to + prefetch, as the memory is probably already in cache. */ + if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX) + return false; + + if (dump_file) + fprintf (dump_file, "Speculative prefetching for insn %d\n", + INSN_UID (insn)); + + e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); + + insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e); + + return true; +} +#endif /* HAVE_prefetch */ /* Connection to the outside world. */ /* Struct for IR-dependent hooks. */ struct value_prof_hooks { /* Find list of values for which we want to measure histograms. */ - void (*find_values_to_profile) (unsigned *, struct histogram_value **); + void (*find_values_to_profile) (histogram_values *); /* Identify and exploit properties of values that are hard to analyze statically. See value-prof.c for more detail. */ @@ -783,10 +1020,8 @@ rtl_register_value_prof_hooks (void) /* Tree-based versions are stubs for now. */ static void -tree_find_values_to_profile (unsigned *n_values, struct histogram_value **values) +tree_find_values_to_profile (histogram_values *values ATTRIBUTE_UNUSED) { - (void)n_values; - (void)values; abort (); } @@ -811,9 +1046,9 @@ tree_register_value_prof_hooks (void) /* IR-independent entry points. */ void -find_values_to_profile (unsigned *n_values, struct histogram_value **values) +find_values_to_profile (histogram_values *values) { - (value_prof_hooks->find_values_to_profile) (n_values, values); + (value_prof_hooks->find_values_to_profile) (values); } bool diff --git a/gcc/value-prof.h b/gcc/value-prof.h index e71e5e56464..60215fde30d 100644 --- a/gcc/value-prof.h +++ b/gcc/value-prof.h @@ -39,14 +39,15 @@ enum hist_type /* The value to measure. */ /* The void *'s are either rtx or tree, depending on which IR is in use. */ -struct histogram_value +struct histogram_value_t GTY(()) { - void * value; /* The value to profile. */ - enum machine_mode mode; /* And its mode. */ - void * seq; /* Insns required to count the profiled value. */ - void * insn; /* Insn before that to measure. */ - enum hist_type type; /* Type of information to measure. */ - unsigned n_counters; /* Number of required counters. */ + PTR GTY ((skip (""))) value; /* The value to profile. */ + enum machine_mode mode; /* And its mode. */ + PTR GTY ((skip (""))) seq; /* Insns required to count the + profiled value. */ + PTR GTY ((skip (""))) insn; /* Insn before that to measure. */ + enum hist_type type; /* Type of information to measure. */ + unsigned n_counters; /* Number of required counters. */ union { struct @@ -63,13 +64,18 @@ struct histogram_value } hdata; /* Profiled information specific data. */ }; +typedef struct histogram_value_t *histogram_value; + +DEF_VEC_P(histogram_value); + +typedef VEC(histogram_value) *histogram_values; + /* Hooks registration. */ extern void rtl_register_value_prof_hooks (void); extern void tree_register_value_prof_hooks (void); /* IR-independent entry points. */ -extern void find_values_to_profile (unsigned *, struct histogram_value **); -extern void free_profiled_values (unsigned, struct histogram_value *); +extern void find_values_to_profile (histogram_values *); extern bool value_profile_transformations (void); /* External declarations for edge-based profiling. */ @@ -78,18 +84,17 @@ struct profile_hooks { void (*gen_edge_profiler) (int, edge); /* Insert code to increment the interval histogram counter. */ - void (*gen_interval_profiler) (struct histogram_value *, unsigned, unsigned); + void (*gen_interval_profiler) (histogram_value, unsigned, unsigned); /* Insert code to increment the power of two histogram counter. */ - void (*gen_pow2_profiler) (struct histogram_value *, unsigned, unsigned); + void (*gen_pow2_profiler) (histogram_value, unsigned, unsigned); /* Insert code to find the most common value. */ - void (*gen_one_value_profiler) (struct histogram_value *, unsigned, unsigned); + void (*gen_one_value_profiler) (histogram_value, unsigned, unsigned); /* Insert code to find the most common value of a difference between two evaluations of an expression. */ - void (*gen_const_delta_profiler) (struct histogram_value *, unsigned, - unsigned); + void (*gen_const_delta_profiler) (histogram_value, unsigned, unsigned); FILE * (*profile_dump_file) (void); }; -- 2.30.2