Makefile.in: Add ipa-profile.o
[gcc.git] / gcc / ipa-profile.c
1 /* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "cgraph.h"
25 #include "tree-pass.h"
26 #include "gimple.h"
27 #include "ggc.h"
28 #include "flags.h"
29 #include "target.h"
30 #include "tree-iterator.h"
31 #include "ipa-utils.h"
32 #include "hash-table.h"
33 #include "profile.h"
34 #include "params.h"
35 #include "value-prof.h"
36 #include "alloc-pool.h"
37 #include "tree-inline.h"
38 #include "lto-streamer.h"
39 #include "data-streamer.h"
40 #include "ipa-inline.h"
41
42 /* Entry in the histogram. */
43
44 struct histogram_entry
45 {
46 gcov_type count;
47 int time;
48 int size;
49 };
50
51 /* Histogram of profile values.
52 The histogram is represented as an ordered vector of entries allocated via
53 histogram_pool. During construction a separate hashtable is kept to lookup
54 duplicate entries. */
55
56 vec<histogram_entry *> histogram;
57 static alloc_pool histogram_pool;
58
59 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
60
61 struct histogram_hash : typed_noop_remove <histogram_entry>
62 {
63 typedef histogram_entry value_type;
64 typedef histogram_entry compare_type;
65 static inline hashval_t hash (const value_type *);
66 static inline int equal (const value_type *, const compare_type *);
67 };
68
69 inline hashval_t
70 histogram_hash::hash (const histogram_entry *val)
71 {
72 return val->count;
73 }
74
75 inline int
76 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
77 {
78 return val->count == val2->count;
79 }
80
81 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
82 HASHTABLE is the on-side hash kept to avoid duplicates. */
83
84 static void
85 account_time_size (hash_table <histogram_hash> hashtable,
86 vec<histogram_entry *> &histogram,
87 gcov_type count, int time, int size)
88 {
89 histogram_entry key = {count, 0, 0};
90 histogram_entry **val = hashtable.find_slot (&key, INSERT);
91
92 if (!*val)
93 {
94 *val = (histogram_entry *) pool_alloc (histogram_pool);
95 **val = key;
96 histogram.safe_push (*val);
97 }
98 (*val)->time += time;
99 (*val)->size += size;
100 }
101
102 int
103 cmp_counts (const void *v1, const void *v2)
104 {
105 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
106 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
107 if (h1->count < h2->count)
108 return 1;
109 if (h1->count > h2->count)
110 return -1;
111 return 0;
112 }
113
114 /* Dump HISTOGRAM to FILE. */
115
116 static void
117 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
118 {
119 unsigned int i;
120 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
121
122 fprintf (dump_file, "Histogram:\n");
123 for (i = 0; i < histogram.length (); i++)
124 {
125 overall_time += histogram[i]->count * histogram[i]->time;
126 overall_size += histogram[i]->size;
127 }
128 if (!overall_time)
129 overall_time = 1;
130 if (!overall_size)
131 overall_size = 1;
132 for (i = 0; i < histogram.length (); i++)
133 {
134 cumulated_time += histogram[i]->count * histogram[i]->time;
135 cumulated_size += histogram[i]->size;
136 fprintf (file, " "HOST_WIDEST_INT_PRINT_DEC": time:%i (%2.2f) size:%i (%2.2f)\n",
137 (HOST_WIDEST_INT) histogram[i]->count,
138 histogram[i]->time,
139 cumulated_time * 100.0 / overall_time,
140 histogram[i]->size,
141 cumulated_size * 100.0 / overall_size);
142 }
143 }
144
145 /* Collect histogram from CFG profiles. */
146
147 static void
148 ipa_profile_generate_summary (void)
149 {
150 struct cgraph_node *node;
151 gimple_stmt_iterator gsi;
152 hash_table <histogram_hash> hashtable;
153 basic_block bb;
154
155 hashtable.create (10);
156 histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
157 10);
158
159 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
160 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->symbol.decl))
161 {
162 int time = 0;
163 int size = 0;
164 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
165 {
166 gimple stmt = gsi_stmt (gsi);
167 if (gimple_code (stmt) == GIMPLE_CALL
168 && !gimple_call_fndecl (stmt))
169 {
170 histogram_value h;
171 h = gimple_histogram_value_of_type
172 (DECL_STRUCT_FUNCTION (node->symbol.decl),
173 stmt, HIST_TYPE_INDIR_CALL);
174 /* No need to do sanity check: gimple_ic_transform already
175 takes away bad histograms. */
176 if (h)
177 {
178 /* counter 0 is target, counter 1 is number of execution we called target,
179 counter 2 is total number of executions. */
180 if (h->hvalue.counters[2])
181 {
182 struct cgraph_edge * e = cgraph_edge (node, stmt);
183 e->indirect_info->common_target_id
184 = h->hvalue.counters [0];
185 e->indirect_info->common_target_probability
186 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
187 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
188 {
189 if (dump_file)
190 fprintf (dump_file, "Probability capped to 1\n");
191 e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
192 }
193 }
194 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->symbol.decl),
195 stmt, h);
196 }
197 }
198 time += estimate_num_insns (stmt, &eni_time_weights);
199 size += estimate_num_insns (stmt, &eni_size_weights);
200 }
201 account_time_size (hashtable, histogram, bb->count, time, size);
202 }
203 hashtable.dispose ();
204 histogram.qsort (cmp_counts);
205 }
206
207 /* Serialize the ipa info for lto. */
208
209 static void
210 ipa_profile_write_summary (void)
211 {
212 struct lto_simple_output_block *ob
213 = lto_create_simple_output_block (LTO_section_ipa_profile);
214 unsigned int i;
215
216 streamer_write_uhwi_stream (ob->main_stream, histogram.length());
217 for (i = 0; i < histogram.length (); i++)
218 {
219 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
220 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
221 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
222 }
223 lto_destroy_simple_output_block (ob);
224 }
225
226 /* Deserialize the ipa info for lto. */
227
228 static void
229 ipa_profile_read_summary (void)
230 {
231 struct lto_file_decl_data ** file_data_vec
232 = lto_get_file_decl_data ();
233 struct lto_file_decl_data * file_data;
234 hash_table <histogram_hash> hashtable;
235 int j = 0;
236
237 hashtable.create (10);
238 histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
239 10);
240
241 while ((file_data = file_data_vec[j++]))
242 {
243 const char *data;
244 size_t len;
245 struct lto_input_block *ib
246 = lto_create_simple_input_block (file_data,
247 LTO_section_ipa_profile,
248 &data, &len);
249 if (ib)
250 {
251 unsigned int num = streamer_read_uhwi (ib);
252 unsigned int n;
253 for (n = 0; n < num; n++)
254 {
255 gcov_type count = streamer_read_gcov_count (ib);
256 int time = streamer_read_uhwi (ib);
257 int size = streamer_read_uhwi (ib);
258 account_time_size (hashtable, histogram,
259 count, time, size);
260 }
261 lto_destroy_simple_input_block (file_data,
262 LTO_section_ipa_profile,
263 ib, data, len);
264 }
265 }
266 hashtable.dispose ();
267 histogram.qsort (cmp_counts);
268 }
269
270 /* Data used by ipa_propagate_frequency. */
271
272 struct ipa_propagate_frequency_data
273 {
274 bool maybe_unlikely_executed;
275 bool maybe_executed_once;
276 bool only_called_at_startup;
277 bool only_called_at_exit;
278 };
279
280 /* Worker for ipa_propagate_frequency_1. */
281
282 static bool
283 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
284 {
285 struct ipa_propagate_frequency_data *d;
286 struct cgraph_edge *edge;
287
288 d = (struct ipa_propagate_frequency_data *)data;
289 for (edge = node->callers;
290 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
291 || d->only_called_at_startup || d->only_called_at_exit);
292 edge = edge->next_caller)
293 {
294 if (edge->caller != node)
295 {
296 d->only_called_at_startup &= edge->caller->only_called_at_startup;
297 /* It makes sense to put main() together with the static constructors.
298 It will be executed for sure, but rest of functions called from
299 main are definitely not at startup only. */
300 if (MAIN_NAME_P (DECL_NAME (edge->caller->symbol.decl)))
301 d->only_called_at_startup = 0;
302 d->only_called_at_exit &= edge->caller->only_called_at_exit;
303 }
304 if (!edge->frequency)
305 continue;
306 switch (edge->caller->frequency)
307 {
308 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
309 break;
310 case NODE_FREQUENCY_EXECUTED_ONCE:
311 if (dump_file && (dump_flags & TDF_DETAILS))
312 fprintf (dump_file, " Called by %s that is executed once\n",
313 cgraph_node_name (edge->caller));
314 d->maybe_unlikely_executed = false;
315 if (inline_edge_summary (edge)->loop_depth)
316 {
317 d->maybe_executed_once = false;
318 if (dump_file && (dump_flags & TDF_DETAILS))
319 fprintf (dump_file, " Called in loop\n");
320 }
321 break;
322 case NODE_FREQUENCY_HOT:
323 case NODE_FREQUENCY_NORMAL:
324 if (dump_file && (dump_flags & TDF_DETAILS))
325 fprintf (dump_file, " Called by %s that is normal or hot\n",
326 cgraph_node_name (edge->caller));
327 d->maybe_unlikely_executed = false;
328 d->maybe_executed_once = false;
329 break;
330 }
331 }
332 return edge != NULL;
333 }
334
335 /* See if the frequency of NODE can be updated based on frequencies of its
336 callers. */
337 bool
338 ipa_propagate_frequency (struct cgraph_node *node)
339 {
340 struct ipa_propagate_frequency_data d = {true, true, true, true};
341 bool changed = false;
342
343 /* We can not propagate anything useful about externally visible functions
344 nor about virtuals. */
345 if (!node->local.local
346 || (flag_devirtualize && DECL_VIRTUAL_P (node->symbol.decl)))
347 return false;
348 gcc_assert (node->symbol.analyzed);
349 if (dump_file && (dump_flags & TDF_DETAILS))
350 fprintf (dump_file, "Processing frequency %s\n", cgraph_node_name (node));
351
352 cgraph_for_node_and_aliases (node, ipa_propagate_frequency_1, &d, true);
353
354 if ((d.only_called_at_startup && !d.only_called_at_exit)
355 && !node->only_called_at_startup)
356 {
357 node->only_called_at_startup = true;
358 if (dump_file)
359 fprintf (dump_file, "Node %s promoted to only called at startup.\n",
360 cgraph_node_name (node));
361 changed = true;
362 }
363 if ((d.only_called_at_exit && !d.only_called_at_startup)
364 && !node->only_called_at_exit)
365 {
366 node->only_called_at_exit = true;
367 if (dump_file)
368 fprintf (dump_file, "Node %s promoted to only called at exit.\n",
369 cgraph_node_name (node));
370 changed = true;
371 }
372 /* These come either from profile or user hints; never update them. */
373 if (node->frequency == NODE_FREQUENCY_HOT
374 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
375 return changed;
376 if (d.maybe_unlikely_executed)
377 {
378 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
379 if (dump_file)
380 fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
381 cgraph_node_name (node));
382 changed = true;
383 }
384 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
385 {
386 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
387 if (dump_file)
388 fprintf (dump_file, "Node %s promoted to executed once.\n",
389 cgraph_node_name (node));
390 changed = true;
391 }
392 return changed;
393 }
394
395 /* Simple ipa profile pass propagating frequencies across the callgraph. */
396
397 static unsigned int
398 ipa_profile (void)
399 {
400 struct cgraph_node **order;
401 struct cgraph_edge *e;
402 int order_pos;
403 bool something_changed = false;
404 int i;
405 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
406 struct cgraph_node *n,*n2;
407 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
408 bool node_map_initialized = false;
409
410 if (dump_file)
411 dump_histogram (dump_file, histogram);
412 for (i = 0; i < (int)histogram.length (); i++)
413 {
414 overall_time += histogram[i]->count * histogram[i]->time;
415 overall_size += histogram[i]->size;
416 }
417 if (overall_time)
418 {
419 gcov_type threshold;
420
421 gcc_assert (overall_size);
422 if (dump_file)
423 {
424 gcov_type min, cumulated_time = 0, cumulated_size = 0;
425
426 fprintf (dump_file, "Overall time: "HOST_WIDEST_INT_PRINT_DEC"\n",
427 (HOST_WIDEST_INT)overall_time);
428 min = get_hot_bb_threshold ();
429 for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min;
430 i++)
431 {
432 cumulated_time += histogram[i]->count * histogram[i]->time;
433 cumulated_size += histogram[i]->size;
434 }
435 fprintf (dump_file, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC
436 " Time:%3.2f%% Size:%3.2f%%\n",
437 (HOST_WIDEST_INT)min,
438 cumulated_time * 100.0 / overall_time,
439 cumulated_size * 100.0 / overall_size);
440 }
441 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
442 threshold = 0;
443 for (i = 0; cumulated < cutoff; i++)
444 {
445 cumulated += histogram[i]->count * histogram[i]->time;
446 threshold = histogram[i]->count;
447 }
448 if (!threshold)
449 threshold = 1;
450 if (dump_file)
451 {
452 gcov_type cumulated_time = 0, cumulated_size = 0;
453
454 for (i = 0;
455 i < (int)histogram.length () && histogram[i]->count >= threshold;
456 i++)
457 {
458 cumulated_time += histogram[i]->count * histogram[i]->time;
459 cumulated_size += histogram[i]->size;
460 }
461 fprintf (dump_file, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC
462 " Time:%3.2f%% Size:%3.2f%%\n",
463 (HOST_WIDEST_INT)threshold,
464 cumulated_time * 100.0 / overall_time,
465 cumulated_size * 100.0 / overall_size);
466 }
467 if (threshold > get_hot_bb_threshold ()
468 || in_lto_p)
469 {
470 if (dump_file)
471 fprintf (dump_file, "Threshold updated.\n");
472 set_hot_bb_threshold (threshold);
473 }
474 }
475 histogram.release();
476 free_alloc_pool (histogram_pool);
477
478 /* Produce speculative calls: we saved common traget from porfiling into
479 e->common_target_id. Now, at link time, we can look up corresponding
480 function node and produce speculative call. */
481
482 FOR_EACH_DEFINED_FUNCTION (n)
483 {
484 bool update = false;
485
486 for (e = n->indirect_calls; e; e = e->next_callee)
487 {
488 if (n->count)
489 nindirect++;
490 if (e->indirect_info->common_target_id)
491 {
492 if (!node_map_initialized)
493 init_node_map (false);
494 node_map_initialized = true;
495 ncommon++;
496 n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
497 if (n2)
498 {
499 if (dump_file)
500 {
501 fprintf (dump_file, "Indirect call -> direct call from"
502 " other module %s/%i => %s/%i, prob %3.2f\n",
503 xstrdup (cgraph_node_name (n)), n->symbol.order,
504 xstrdup (cgraph_node_name (n2)), n2->symbol.order,
505 e->indirect_info->common_target_probability
506 / (float)REG_BR_PROB_BASE);
507 }
508 if (e->indirect_info->common_target_probability
509 < REG_BR_PROB_BASE / 2)
510 {
511 nuseless++;
512 if (dump_file)
513 fprintf (dump_file,
514 "Not speculating: probability is too low.\n");
515 }
516 else if (!cgraph_maybe_hot_edge_p (e))
517 {
518 nuseless++;
519 if (dump_file)
520 fprintf (dump_file,
521 "Not speculating: call is cold.\n");
522 }
523 else if (cgraph_function_body_availability (n2)
524 <= AVAIL_OVERWRITABLE
525 && symtab_can_be_discarded ((symtab_node) n2))
526 {
527 nuseless++;
528 if (dump_file)
529 fprintf (dump_file,
530 "Not speculating: target is overwritable "
531 "and can be discarded.\n");
532 }
533 else
534 {
535 /* Target may be overwritable, but profile says that
536 control flow goes to this particular implementation
537 of N2. Speculate on the local alias to allow inlining.
538 */
539 if (!symtab_can_be_discarded ((symtab_node) n2))
540 n2 = cgraph (symtab_nonoverwritable_alias ((symtab_node)n2));
541 nconverted++;
542 cgraph_turn_edge_to_speculative
543 (e, n2,
544 apply_scale (e->count,
545 e->indirect_info->common_target_probability),
546 apply_scale (e->frequency,
547 e->indirect_info->common_target_probability));
548 update = true;
549 }
550 }
551 else
552 {
553 if (dump_file)
554 fprintf (dump_file, "Function with profile-id %i not found.\n",
555 e->indirect_info->common_target_id);
556 nunknown++;
557 }
558 }
559 }
560 if (update)
561 inline_update_overall_summary (n);
562 }
563 if (node_map_initialized)
564 del_node_map ();
565 if (dump_file && nindirect)
566 fprintf (dump_file,
567 "%i indirect calls trained.\n"
568 "%i (%3.2f%%) have common target.\n"
569 "%i (%3.2f%%) targets was not found.\n"
570 "%i (%3.2f%%) speculations seems useless.\n"
571 "%i (%3.2f%%) speculations produced.\n",
572 nindirect,
573 ncommon, ncommon * 100.0 / nindirect,
574 nunknown, nunknown * 100.0 / nindirect,
575 nuseless, nuseless * 100.0 / nindirect,
576 nconverted, nconverted * 100.0 / nindirect);
577
578 order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
579 order_pos = ipa_reverse_postorder (order);
580 for (i = order_pos - 1; i >= 0; i--)
581 {
582 if (order[i]->local.local && ipa_propagate_frequency (order[i]))
583 {
584 for (e = order[i]->callees; e; e = e->next_callee)
585 if (e->callee->local.local && !e->callee->symbol.aux)
586 {
587 something_changed = true;
588 e->callee->symbol.aux = (void *)1;
589 }
590 }
591 order[i]->symbol.aux = NULL;
592 }
593
594 while (something_changed)
595 {
596 something_changed = false;
597 for (i = order_pos - 1; i >= 0; i--)
598 {
599 if (order[i]->symbol.aux && ipa_propagate_frequency (order[i]))
600 {
601 for (e = order[i]->callees; e; e = e->next_callee)
602 if (e->callee->local.local && !e->callee->symbol.aux)
603 {
604 something_changed = true;
605 e->callee->symbol.aux = (void *)1;
606 }
607 }
608 order[i]->symbol.aux = NULL;
609 }
610 }
611 free (order);
612 return 0;
613 }
614
615 static bool
616 gate_ipa_profile (void)
617 {
618 return flag_ipa_profile;
619 }
620
621 namespace {
622
623 const pass_data pass_data_ipa_profile =
624 {
625 IPA_PASS, /* type */
626 "profile_estimate", /* name */
627 OPTGROUP_NONE, /* optinfo_flags */
628 true, /* has_gate */
629 true, /* has_execute */
630 TV_IPA_PROFILE, /* tv_id */
631 0, /* properties_required */
632 0, /* properties_provided */
633 0, /* properties_destroyed */
634 0, /* todo_flags_start */
635 0, /* todo_flags_finish */
636 };
637
638 class pass_ipa_profile : public ipa_opt_pass_d
639 {
640 public:
641 pass_ipa_profile(gcc::context *ctxt)
642 : ipa_opt_pass_d(pass_data_ipa_profile, ctxt,
643 ipa_profile_generate_summary, /* generate_summary */
644 ipa_profile_write_summary, /* write_summary */
645 ipa_profile_read_summary, /* read_summary */
646 NULL, /* write_optimization_summary */
647 NULL, /* read_optimization_summary */
648 NULL, /* stmt_fixup */
649 0, /* function_transform_todo_flags_start */
650 NULL, /* function_transform */
651 NULL) /* variable_transform */
652 {}
653
654 /* opt_pass methods: */
655 bool gate () { return gate_ipa_profile (); }
656 unsigned int execute () { return ipa_profile (); }
657
658 }; // class pass_ipa_profile
659
660 } // anon namespace
661
662 ipa_opt_pass_d *
663 make_pass_ipa_profile (gcc::context *ctxt)
664 {
665 return new pass_ipa_profile (ctxt);
666 }