1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* ipa-profile pass implements the following analysis propagating profille
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
28 Similar histogram is also estimated by coverage runtime. This histogram
29 is not dependent on LTO, but it suffers from various defects; first
30 gcov runtime is not weighting individual basic block by estimated execution
31 time and second the merging of multiple runs makes assumption that the
32 histogram distribution did not change. Consequentely histogram constructed
33 here may be more precise.
35 The information is used to set hot/cold thresholds.
36 - Next speculative indirect call resolution is performed: the local
37 profile pass assigns profile-id to each function and provide us with a
38 histogram specifying the most common target. We look up the callgraph
39 node corresponding to the target and produce a speculative call.
41 This call may or may not survive through IPA optimization based on decision
43 - Finally we propagate the following flags: unlikely executed, executed
44 once, executed at startup and executed at exit. These flags are used to
45 control code size/performance threshold and and code placement (by producing
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
49 #include "coretypes.h"
54 #include "fold-const.h"
56 #include "dominance.h"
58 #include "basic-block.h"
59 #include "hard-reg-set.h"
62 #include "tree-pass.h"
63 #include "tree-ssa-alias.h"
64 #include "internal-fn.h"
65 #include "gimple-expr.h"
67 #include "gimple-iterator.h"
70 #include "tree-iterator.h"
71 #include "ipa-utils.h"
74 #include "value-prof.h"
75 #include "alloc-pool.h"
76 #include "tree-inline.h"
77 #include "lto-streamer.h"
78 #include "data-streamer.h"
79 #include "symbol-summary.h"
81 #include "ipa-inline.h"
83 /* Entry in the histogram. */
85 struct histogram_entry
92 /* Histogram of profile values.
93 The histogram is represented as an ordered vector of entries allocated via
94 histogram_pool. During construction a separate hashtable is kept to lookup
97 vec
<histogram_entry
*> histogram
;
98 static pool_allocator
<histogram_entry
> histogram_pool
99 ("IPA histogram", 10);
101 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
103 struct histogram_hash
: nofree_ptr_hash
<histogram_entry
>
105 static inline hashval_t
hash (const histogram_entry
*);
106 static inline int equal (const histogram_entry
*, const histogram_entry
*);
110 histogram_hash::hash (const histogram_entry
*val
)
116 histogram_hash::equal (const histogram_entry
*val
, const histogram_entry
*val2
)
118 return val
->count
== val2
->count
;
121 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
122 HASHTABLE is the on-side hash kept to avoid duplicates. */
125 account_time_size (hash_table
<histogram_hash
> *hashtable
,
126 vec
<histogram_entry
*> &histogram
,
127 gcov_type count
, int time
, int size
)
129 histogram_entry key
= {count
, 0, 0};
130 histogram_entry
**val
= hashtable
->find_slot (&key
, INSERT
);
134 *val
= histogram_pool
.allocate ();
136 histogram
.safe_push (*val
);
138 (*val
)->time
+= time
;
139 (*val
)->size
+= size
;
143 cmp_counts (const void *v1
, const void *v2
)
145 const histogram_entry
*h1
= *(const histogram_entry
* const *)v1
;
146 const histogram_entry
*h2
= *(const histogram_entry
* const *)v2
;
147 if (h1
->count
< h2
->count
)
149 if (h1
->count
> h2
->count
)
154 /* Dump HISTOGRAM to FILE. */
157 dump_histogram (FILE *file
, vec
<histogram_entry
*> histogram
)
160 gcov_type overall_time
= 0, cumulated_time
= 0, cumulated_size
= 0, overall_size
= 0;
162 fprintf (dump_file
, "Histogram:\n");
163 for (i
= 0; i
< histogram
.length (); i
++)
165 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
166 overall_size
+= histogram
[i
]->size
;
172 for (i
= 0; i
< histogram
.length (); i
++)
174 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
175 cumulated_size
+= histogram
[i
]->size
;
176 fprintf (file
, " %" PRId64
": time:%i (%2.2f) size:%i (%2.2f)\n",
177 (int64_t) histogram
[i
]->count
,
179 cumulated_time
* 100.0 / overall_time
,
181 cumulated_size
* 100.0 / overall_size
);
185 /* Collect histogram from CFG profiles. */
188 ipa_profile_generate_summary (void)
190 struct cgraph_node
*node
;
191 gimple_stmt_iterator gsi
;
194 hash_table
<histogram_hash
> hashtable (10);
196 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
197 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
201 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
203 gimple stmt
= gsi_stmt (gsi
);
204 if (gimple_code (stmt
) == GIMPLE_CALL
205 && !gimple_call_fndecl (stmt
))
208 h
= gimple_histogram_value_of_type
209 (DECL_STRUCT_FUNCTION (node
->decl
),
210 stmt
, HIST_TYPE_INDIR_CALL
);
211 /* No need to do sanity check: gimple_ic_transform already
212 takes away bad histograms. */
215 /* counter 0 is target, counter 1 is number of execution we called target,
216 counter 2 is total number of executions. */
217 if (h
->hvalue
.counters
[2])
219 struct cgraph_edge
* e
= node
->get_edge (stmt
);
220 if (e
&& !e
->indirect_unknown_callee
)
222 e
->indirect_info
->common_target_id
223 = h
->hvalue
.counters
[0];
224 e
->indirect_info
->common_target_probability
225 = GCOV_COMPUTE_SCALE (h
->hvalue
.counters
[1], h
->hvalue
.counters
[2]);
226 if (e
->indirect_info
->common_target_probability
> REG_BR_PROB_BASE
)
229 fprintf (dump_file
, "Probability capped to 1\n");
230 e
->indirect_info
->common_target_probability
= REG_BR_PROB_BASE
;
233 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node
->decl
),
237 time
+= estimate_num_insns (stmt
, &eni_time_weights
);
238 size
+= estimate_num_insns (stmt
, &eni_size_weights
);
240 account_time_size (&hashtable
, histogram
, bb
->count
, time
, size
);
242 histogram
.qsort (cmp_counts
);
245 /* Serialize the ipa info for lto. */
248 ipa_profile_write_summary (void)
250 struct lto_simple_output_block
*ob
251 = lto_create_simple_output_block (LTO_section_ipa_profile
);
254 streamer_write_uhwi_stream (ob
->main_stream
, histogram
.length ());
255 for (i
= 0; i
< histogram
.length (); i
++)
257 streamer_write_gcov_count_stream (ob
->main_stream
, histogram
[i
]->count
);
258 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->time
);
259 streamer_write_uhwi_stream (ob
->main_stream
, histogram
[i
]->size
);
261 lto_destroy_simple_output_block (ob
);
264 /* Deserialize the ipa info for lto. */
267 ipa_profile_read_summary (void)
269 struct lto_file_decl_data
** file_data_vec
270 = lto_get_file_decl_data ();
271 struct lto_file_decl_data
* file_data
;
274 hash_table
<histogram_hash
> hashtable (10);
276 while ((file_data
= file_data_vec
[j
++]))
280 struct lto_input_block
*ib
281 = lto_create_simple_input_block (file_data
,
282 LTO_section_ipa_profile
,
286 unsigned int num
= streamer_read_uhwi (ib
);
288 for (n
= 0; n
< num
; n
++)
290 gcov_type count
= streamer_read_gcov_count (ib
);
291 int time
= streamer_read_uhwi (ib
);
292 int size
= streamer_read_uhwi (ib
);
293 account_time_size (&hashtable
, histogram
,
296 lto_destroy_simple_input_block (file_data
,
297 LTO_section_ipa_profile
,
301 histogram
.qsort (cmp_counts
);
304 /* Data used by ipa_propagate_frequency. */
306 struct ipa_propagate_frequency_data
308 cgraph_node
*function_symbol
;
309 bool maybe_unlikely_executed
;
310 bool maybe_executed_once
;
311 bool only_called_at_startup
;
312 bool only_called_at_exit
;
315 /* Worker for ipa_propagate_frequency_1. */
318 ipa_propagate_frequency_1 (struct cgraph_node
*node
, void *data
)
320 struct ipa_propagate_frequency_data
*d
;
321 struct cgraph_edge
*edge
;
323 d
= (struct ipa_propagate_frequency_data
*)data
;
324 for (edge
= node
->callers
;
325 edge
&& (d
->maybe_unlikely_executed
|| d
->maybe_executed_once
326 || d
->only_called_at_startup
|| d
->only_called_at_exit
);
327 edge
= edge
->next_caller
)
329 if (edge
->caller
!= d
->function_symbol
)
331 d
->only_called_at_startup
&= edge
->caller
->only_called_at_startup
;
332 /* It makes sense to put main() together with the static constructors.
333 It will be executed for sure, but rest of functions called from
334 main are definitely not at startup only. */
335 if (MAIN_NAME_P (DECL_NAME (edge
->caller
->decl
)))
336 d
->only_called_at_startup
= 0;
337 d
->only_called_at_exit
&= edge
->caller
->only_called_at_exit
;
340 /* When profile feedback is available, do not try to propagate too hard;
341 counts are already good guide on function frequencies and roundoff
342 errors can make us to push function into unlikely section even when
343 it is executed by the train run. Transfer the function only if all
344 callers are unlikely executed. */
346 && opt_for_fn (d
->function_symbol
->decl
, flag_branch_probabilities
)
347 /* Thunks are not profiled. This is more or less implementation
349 && !d
->function_symbol
->thunk
.thunk_p
350 && (edge
->caller
->frequency
!= NODE_FREQUENCY_UNLIKELY_EXECUTED
351 || (edge
->caller
->global
.inlined_to
352 && edge
->caller
->global
.inlined_to
->frequency
353 != NODE_FREQUENCY_UNLIKELY_EXECUTED
)))
354 d
->maybe_unlikely_executed
= false;
355 if (!edge
->frequency
)
357 switch (edge
->caller
->frequency
)
359 case NODE_FREQUENCY_UNLIKELY_EXECUTED
:
361 case NODE_FREQUENCY_EXECUTED_ONCE
:
362 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
363 fprintf (dump_file
, " Called by %s that is executed once\n",
364 edge
->caller
->name ());
365 d
->maybe_unlikely_executed
= false;
366 if (inline_edge_summary (edge
)->loop_depth
)
368 d
->maybe_executed_once
= false;
369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
370 fprintf (dump_file
, " Called in loop\n");
373 case NODE_FREQUENCY_HOT
:
374 case NODE_FREQUENCY_NORMAL
:
375 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
376 fprintf (dump_file
, " Called by %s that is normal or hot\n",
377 edge
->caller
->name ());
378 d
->maybe_unlikely_executed
= false;
379 d
->maybe_executed_once
= false;
386 /* Return ture if NODE contains hot calls. */
389 contains_hot_call_p (struct cgraph_node
*node
)
391 struct cgraph_edge
*e
;
392 for (e
= node
->callees
; e
; e
= e
->next_callee
)
393 if (e
->maybe_hot_p ())
395 else if (!e
->inline_failed
396 && contains_hot_call_p (e
->callee
))
398 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
399 if (e
->maybe_hot_p ())
404 /* See if the frequency of NODE can be updated based on frequencies of its
407 ipa_propagate_frequency (struct cgraph_node
*node
)
409 struct ipa_propagate_frequency_data d
= {node
, true, true, true, true};
410 bool changed
= false;
412 /* We can not propagate anything useful about externally visible functions
413 nor about virtuals. */
414 if (!node
->local
.local
416 || (opt_for_fn (node
->decl
, flag_devirtualize
)
417 && DECL_VIRTUAL_P (node
->decl
)))
419 gcc_assert (node
->analyzed
);
420 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
421 fprintf (dump_file
, "Processing frequency %s\n", node
->name ());
423 node
->call_for_symbol_and_aliases (ipa_propagate_frequency_1
, &d
,
426 if ((d
.only_called_at_startup
&& !d
.only_called_at_exit
)
427 && !node
->only_called_at_startup
)
429 node
->only_called_at_startup
= true;
431 fprintf (dump_file
, "Node %s promoted to only called at startup.\n",
435 if ((d
.only_called_at_exit
&& !d
.only_called_at_startup
)
436 && !node
->only_called_at_exit
)
438 node
->only_called_at_exit
= true;
440 fprintf (dump_file
, "Node %s promoted to only called at exit.\n",
445 /* With profile we can decide on hot/normal based on count. */
449 if (node
->count
>= get_hot_bb_threshold ())
452 hot
|= contains_hot_call_p (node
);
455 if (node
->frequency
!= NODE_FREQUENCY_HOT
)
458 fprintf (dump_file
, "Node %s promoted to hot.\n",
460 node
->frequency
= NODE_FREQUENCY_HOT
;
465 else if (node
->frequency
== NODE_FREQUENCY_HOT
)
468 fprintf (dump_file
, "Node %s reduced to normal.\n",
470 node
->frequency
= NODE_FREQUENCY_NORMAL
;
474 /* These come either from profile or user hints; never update them. */
475 if (node
->frequency
== NODE_FREQUENCY_HOT
476 || node
->frequency
== NODE_FREQUENCY_UNLIKELY_EXECUTED
)
478 if (d
.maybe_unlikely_executed
)
480 node
->frequency
= NODE_FREQUENCY_UNLIKELY_EXECUTED
;
482 fprintf (dump_file
, "Node %s promoted to unlikely executed.\n",
486 else if (d
.maybe_executed_once
&& node
->frequency
!= NODE_FREQUENCY_EXECUTED_ONCE
)
488 node
->frequency
= NODE_FREQUENCY_EXECUTED_ONCE
;
490 fprintf (dump_file
, "Node %s promoted to executed once.\n",
497 /* Simple ipa profile pass propagating frequencies across the callgraph. */
502 struct cgraph_node
**order
;
503 struct cgraph_edge
*e
;
505 bool something_changed
= false;
507 gcov_type overall_time
= 0, cutoff
= 0, cumulated
= 0, overall_size
= 0;
508 struct cgraph_node
*n
,*n2
;
509 int nindirect
= 0, ncommon
= 0, nunknown
= 0, nuseless
= 0, nconverted
= 0;
510 int nmismatch
= 0, nimpossible
= 0;
511 bool node_map_initialized
= false;
514 dump_histogram (dump_file
, histogram
);
515 for (i
= 0; i
< (int)histogram
.length (); i
++)
517 overall_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
518 overall_size
+= histogram
[i
]->size
;
524 gcc_assert (overall_size
);
527 gcov_type min
, cumulated_time
= 0, cumulated_size
= 0;
529 fprintf (dump_file
, "Overall time: %" PRId64
"\n",
530 (int64_t)overall_time
);
531 min
= get_hot_bb_threshold ();
532 for (i
= 0; i
< (int)histogram
.length () && histogram
[i
]->count
>= min
;
535 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
536 cumulated_size
+= histogram
[i
]->size
;
538 fprintf (dump_file
, "GCOV min count: %" PRId64
539 " Time:%3.2f%% Size:%3.2f%%\n",
541 cumulated_time
* 100.0 / overall_time
,
542 cumulated_size
* 100.0 / overall_size
);
544 cutoff
= (overall_time
* PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE
) + 500) / 1000;
546 for (i
= 0; cumulated
< cutoff
; i
++)
548 cumulated
+= histogram
[i
]->count
* histogram
[i
]->time
;
549 threshold
= histogram
[i
]->count
;
555 gcov_type cumulated_time
= 0, cumulated_size
= 0;
558 i
< (int)histogram
.length () && histogram
[i
]->count
>= threshold
;
561 cumulated_time
+= histogram
[i
]->count
* histogram
[i
]->time
;
562 cumulated_size
+= histogram
[i
]->size
;
564 fprintf (dump_file
, "Determined min count: %" PRId64
565 " Time:%3.2f%% Size:%3.2f%%\n",
567 cumulated_time
* 100.0 / overall_time
,
568 cumulated_size
* 100.0 / overall_size
);
570 if (threshold
> get_hot_bb_threshold ()
574 fprintf (dump_file
, "Threshold updated.\n");
575 set_hot_bb_threshold (threshold
);
578 histogram
.release ();
579 histogram_pool
.release ();
581 /* Produce speculative calls: we saved common traget from porfiling into
582 e->common_target_id. Now, at link time, we can look up corresponding
583 function node and produce speculative call. */
585 FOR_EACH_DEFINED_FUNCTION (n
)
589 if (!opt_for_fn (n
->decl
, flag_ipa_profile
))
592 for (e
= n
->indirect_calls
; e
; e
= e
->next_callee
)
596 if (e
->indirect_info
->common_target_id
)
598 if (!node_map_initialized
)
599 init_node_map (false);
600 node_map_initialized
= true;
602 n2
= find_func_by_profile_id (e
->indirect_info
->common_target_id
);
607 fprintf (dump_file
, "Indirect call -> direct call from"
608 " other module %s/%i => %s/%i, prob %3.2f\n",
609 xstrdup_for_dump (n
->name ()), n
->order
,
610 xstrdup_for_dump (n2
->name ()), n2
->order
,
611 e
->indirect_info
->common_target_probability
612 / (float)REG_BR_PROB_BASE
);
614 if (e
->indirect_info
->common_target_probability
615 < REG_BR_PROB_BASE
/ 2)
620 "Not speculating: probability is too low.\n");
622 else if (!e
->maybe_hot_p ())
627 "Not speculating: call is cold.\n");
629 else if (n2
->get_availability () <= AVAIL_INTERPOSABLE
630 && n2
->can_be_discarded_p ())
635 "Not speculating: target is overwritable "
636 "and can be discarded.\n");
638 else if (ipa_node_params_sum
&& ipa_edge_args_vector
639 && !IPA_NODE_REF (n2
)->descriptors
.is_empty ()
640 && ipa_get_param_count (IPA_NODE_REF (n2
))
641 != ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
642 && (ipa_get_param_count (IPA_NODE_REF (n2
))
643 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e
))
644 || !stdarg_p (TREE_TYPE (n2
->decl
))))
650 "parameter count mistmatch\n");
652 else if (e
->indirect_info
->polymorphic
653 && !opt_for_fn (n
->decl
, flag_devirtualize
)
654 && !possible_polymorphic_call_target_p (e
, n2
))
660 "function is not in the polymorphic "
661 "call target list\n");
665 /* Target may be overwritable, but profile says that
666 control flow goes to this particular implementation
667 of N2. Speculate on the local alias to allow inlining.
669 if (!n2
->can_be_discarded_p ())
672 alias
= dyn_cast
<cgraph_node
*> (n2
->noninterposable_alias ());
679 apply_scale (e
->count
,
680 e
->indirect_info
->common_target_probability
),
681 apply_scale (e
->frequency
,
682 e
->indirect_info
->common_target_probability
));
689 fprintf (dump_file
, "Function with profile-id %i not found.\n",
690 e
->indirect_info
->common_target_id
);
696 inline_update_overall_summary (n
);
698 if (node_map_initialized
)
700 if (dump_file
&& nindirect
)
702 "%i indirect calls trained.\n"
703 "%i (%3.2f%%) have common target.\n"
704 "%i (%3.2f%%) targets was not found.\n"
705 "%i (%3.2f%%) targets had parameter count mismatch.\n"
706 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
707 "%i (%3.2f%%) speculations seems useless.\n"
708 "%i (%3.2f%%) speculations produced.\n",
710 ncommon
, ncommon
* 100.0 / nindirect
,
711 nunknown
, nunknown
* 100.0 / nindirect
,
712 nmismatch
, nmismatch
* 100.0 / nindirect
,
713 nimpossible
, nimpossible
* 100.0 / nindirect
,
714 nuseless
, nuseless
* 100.0 / nindirect
,
715 nconverted
, nconverted
* 100.0 / nindirect
);
717 order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
718 order_pos
= ipa_reverse_postorder (order
);
719 for (i
= order_pos
- 1; i
>= 0; i
--)
721 if (order
[i
]->local
.local
722 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
723 && ipa_propagate_frequency (order
[i
]))
725 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
726 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
728 something_changed
= true;
729 e
->callee
->aux
= (void *)1;
732 order
[i
]->aux
= NULL
;
735 while (something_changed
)
737 something_changed
= false;
738 for (i
= order_pos
- 1; i
>= 0; i
--)
741 && opt_for_fn (order
[i
]->decl
, flag_ipa_profile
)
742 && ipa_propagate_frequency (order
[i
]))
744 for (e
= order
[i
]->callees
; e
; e
= e
->next_callee
)
745 if (e
->callee
->local
.local
&& !e
->callee
->aux
)
747 something_changed
= true;
748 e
->callee
->aux
= (void *)1;
751 order
[i
]->aux
= NULL
;
760 const pass_data pass_data_ipa_profile
=
763 "profile_estimate", /* name */
764 OPTGROUP_NONE
, /* optinfo_flags */
765 TV_IPA_PROFILE
, /* tv_id */
766 0, /* properties_required */
767 0, /* properties_provided */
768 0, /* properties_destroyed */
769 0, /* todo_flags_start */
770 0, /* todo_flags_finish */
773 class pass_ipa_profile
: public ipa_opt_pass_d
776 pass_ipa_profile (gcc::context
*ctxt
)
777 : ipa_opt_pass_d (pass_data_ipa_profile
, ctxt
,
778 ipa_profile_generate_summary
, /* generate_summary */
779 ipa_profile_write_summary
, /* write_summary */
780 ipa_profile_read_summary
, /* read_summary */
781 NULL
, /* write_optimization_summary */
782 NULL
, /* read_optimization_summary */
783 NULL
, /* stmt_fixup */
784 0, /* function_transform_todo_flags_start */
785 NULL
, /* function_transform */
786 NULL
) /* variable_transform */
789 /* opt_pass methods: */
790 virtual bool gate (function
*) { return flag_ipa_profile
|| in_lto_p
; }
791 virtual unsigned int execute (function
*) { return ipa_profile (); }
793 }; // class pass_ipa_profile
798 make_pass_ipa_profile (gcc::context
*ctxt
)
800 return new pass_ipa_profile (ctxt
);