coretypes.h: Include input.h and as-a.h.
[gcc.git] / gcc / auto-profile.c
1 /* Read and annotate call graph profile from the auto profile data file.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Dehao Chen (dehao@google.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23
24 #include <string.h>
25 #include <map>
26 #include <set>
27
28 #include "coretypes.h"
29 #include "alias.h"
30 #include "symtab.h"
31 #include "options.h"
32 #include "tree.h"
33 #include "fold-const.h"
34 #include "tree-pass.h"
35 #include "flags.h"
36 #include "predict.h"
37 #include "tm.h"
38 #include "hard-reg-set.h"
39 #include "function.h"
40 #include "dominance.h"
41 #include "cfg.h"
42 #include "basic-block.h"
43 #include "diagnostic-core.h"
44 #include "gcov-io.h"
45 #include "profile.h"
46 #include "langhooks.h"
47 #include "opts.h"
48 #include "tree-pass.h"
49 #include "cfgloop.h"
50 #include "tree-ssa-alias.h"
51 #include "tree-cfg.h"
52 #include "tree-cfgcleanup.h"
53 #include "tree-ssa-operands.h"
54 #include "tree-into-ssa.h"
55 #include "internal-fn.h"
56 #include "gimple-expr.h"
57 #include "gimple.h"
58 #include "gimple-iterator.h"
59 #include "gimple-ssa.h"
60 #include "plugin-api.h"
61 #include "ipa-ref.h"
62 #include "cgraph.h"
63 #include "value-prof.h"
64 #include "coverage.h"
65 #include "params.h"
66 #include "alloc-pool.h"
67 #include "symbol-summary.h"
68 #include "ipa-prop.h"
69 #include "ipa-inline.h"
70 #include "tree-inline.h"
71 #include "stringpool.h"
72 #include "auto-profile.h"
73
74 /* The following routines implements AutoFDO optimization.
75
76 This optimization uses sampling profiles to annotate basic block counts
77 and uses heuristics to estimate branch probabilities.
78
79 There are three phases in AutoFDO:
80
81 Phase 1: Read profile from the profile data file.
82 The following info is read from the profile datafile:
83 * string_table: a map between function name and its index.
84 * autofdo_source_profile: a map from function_instance name to
85 function_instance. This is represented as a forest of
86 function_instances.
87 * WorkingSet: a histogram of how many instructions are covered for a
88 given percentage of total cycles. This is describing the binary
89 level information (not source level). This info is used to help
90 decide if we want aggressive optimizations that could increase
91 code footprint (e.g. loop unroll etc.)
92 A function instance is an instance of function that could either be a
93 standalone symbol, or a clone of a function that is inlined into another
94 function.
95
96 Phase 2: Early inline + value profile transformation.
97 Early inline uses autofdo_source_profile to find if a callsite is:
98 * inlined in the profiled binary.
99 * callee body is hot in the profiling run.
100 If both condition satisfies, early inline will inline the callsite
101 regardless of the code growth.
102 Phase 2 is an iterative process. During each iteration, we also check
103 if an indirect callsite is promoted and inlined in the profiling run.
104 If yes, vpt will happen to force promote it and in the next iteration,
105 einline will inline the promoted callsite in the next iteration.
106
107 Phase 3: Annotate control flow graph.
108 AutoFDO uses a separate pass to:
109 * Annotate basic block count
110 * Estimate branch probability
111
112 After the above 3 phases, all profile is readily annotated on the GCC IR.
113 AutoFDO tries to reuse all FDO infrastructure as much as possible to make
114 use of the profile. E.g. it uses existing mechanism to calculate the basic
115 block/edge frequency, as well as the cgraph node/edge count.
116 */
117
118 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
119 #define AUTO_PROFILE_VERSION 1
120
121 namespace autofdo
122 {
123
124 /* Represent a source location: (function_decl, lineno). */
125 typedef std::pair<tree, unsigned> decl_lineno;
126
127 /* Represent an inline stack. vector[0] is the leaf node. */
128 typedef auto_vec<decl_lineno> inline_stack;
129
130 /* String array that stores function names. */
131 typedef auto_vec<char *> string_vector;
132
133 /* Map from function name's index in string_table to target's
134 execution count. */
135 typedef std::map<unsigned, gcov_type> icall_target_map;
136
137 /* Set of gimple stmts. Used to track if the stmt has already been promoted
138 to direct call. */
139 typedef std::set<gimple> stmt_set;
140
141 /* Represent count info of an inline stack. */
142 struct count_info
143 {
144 /* Sampled count of the inline stack. */
145 gcov_type count;
146
147 /* Map from indirect call target to its sample count. */
148 icall_target_map targets;
149
150 /* Whether this inline stack is already used in annotation.
151
152 Each inline stack should only be used to annotate IR once.
153 This will be enforced when instruction-level discriminator
154 is supported. */
155 bool annotated;
156 };
157
158 /* operator< for "const char *". */
159 struct string_compare
160 {
161 bool operator()(const char *a, const char *b) const
162 {
163 return strcmp (a, b) < 0;
164 }
165 };
166
167 /* Store a string array, indexed by string position in the array. */
168 class string_table
169 {
170 public:
171 string_table ()
172 {}
173
174 ~string_table ();
175
176 /* For a given string, returns its index. */
177 int get_index (const char *name) const;
178
179 /* For a given decl, returns the index of the decl name. */
180 int get_index_by_decl (tree decl) const;
181
182 /* For a given index, returns the string. */
183 const char *get_name (int index) const;
184
185 /* Read profile, return TRUE on success. */
186 bool read ();
187
188 private:
189 typedef std::map<const char *, unsigned, string_compare> string_index_map;
190 string_vector vector_;
191 string_index_map map_;
192 };
193
194 /* Profile of a function instance:
195 1. total_count of the function.
196 2. head_count (entry basic block count) of the function (only valid when
197 function is a top-level function_instance, i.e. it is the original copy
198 instead of the inlined copy).
199 3. map from source location (decl_lineno) to profile (count_info).
200 4. map from callsite to callee function_instance. */
201 class function_instance
202 {
203 public:
204 typedef auto_vec<function_instance *> function_instance_stack;
205
206 /* Read the profile and return a function_instance with head count as
207 HEAD_COUNT. Recursively read callsites to create nested function_instances
208 too. STACK is used to track the recursive creation process. */
209 static function_instance *
210 read_function_instance (function_instance_stack *stack,
211 gcov_type head_count);
212
213 /* Recursively deallocate all callsites (nested function_instances). */
214 ~function_instance ();
215
216 /* Accessors. */
217 int
218 name () const
219 {
220 return name_;
221 }
222 gcov_type
223 total_count () const
224 {
225 return total_count_;
226 }
227 gcov_type
228 head_count () const
229 {
230 return head_count_;
231 }
232
233 /* Traverse callsites of the current function_instance to find one at the
234 location of LINENO and callee name represented in DECL. */
235 function_instance *get_function_instance_by_decl (unsigned lineno,
236 tree decl) const;
237
238 /* Store the profile info for LOC in INFO. Return TRUE if profile info
239 is found. */
240 bool get_count_info (location_t loc, count_info *info) const;
241
242 /* Read the inlined indirect call target profile for STMT and store it in
243 MAP, return the total count for all inlined indirect calls. */
244 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
245
246 /* Sum of counts that is used during annotation. */
247 gcov_type total_annotated_count () const;
248
249 /* Mark LOC as annotated. */
250 void mark_annotated (location_t loc);
251
252 private:
253 /* Callsite, represented as (decl_lineno, callee_function_name_index). */
254 typedef std::pair<unsigned, unsigned> callsite;
255
256 /* Map from callsite to callee function_instance. */
257 typedef std::map<callsite, function_instance *> callsite_map;
258
259 function_instance (unsigned name, gcov_type head_count)
260 : name_ (name), total_count_ (0), head_count_ (head_count)
261 {
262 }
263
264 /* Map from source location (decl_lineno) to profile (count_info). */
265 typedef std::map<unsigned, count_info> position_count_map;
266
267 /* function_instance name index in the string_table. */
268 unsigned name_;
269
270 /* Total sample count. */
271 gcov_type total_count_;
272
273 /* Entry BB's sample count. */
274 gcov_type head_count_;
275
276 /* Map from callsite location to callee function_instance. */
277 callsite_map callsites;
278
279 /* Map from source location to count_info. */
280 position_count_map pos_counts;
281 };
282
283 /* Profile for all functions. */
284 class autofdo_source_profile
285 {
286 public:
287 static autofdo_source_profile *
288 create ()
289 {
290 autofdo_source_profile *map = new autofdo_source_profile ();
291
292 if (map->read ())
293 return map;
294 delete map;
295 return NULL;
296 }
297
298 ~autofdo_source_profile ();
299
300 /* For a given DECL, returns the top-level function_instance. */
301 function_instance *get_function_instance_by_decl (tree decl) const;
302
303 /* Find count_info for a given gimple STMT. If found, store the count_info
304 in INFO and return true; otherwise return false. */
305 bool get_count_info (gimple stmt, count_info *info) const;
306
307 /* Find total count of the callee of EDGE. */
308 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
309
310 /* Update value profile INFO for STMT from the inlined indirect callsite.
311 Return true if INFO is updated. */
312 bool update_inlined_ind_target (gcall *stmt, count_info *info);
313
314 /* Mark LOC as annotated. */
315 void mark_annotated (location_t loc);
316
317 private:
318 /* Map from function_instance name index (in string_table) to
319 function_instance. */
320 typedef std::map<unsigned, function_instance *> name_function_instance_map;
321
322 autofdo_source_profile () {}
323
324 /* Read AutoFDO profile and returns TRUE on success. */
325 bool read ();
326
327 /* Return the function_instance in the profile that correspond to the
328 inline STACK. */
329 function_instance *
330 get_function_instance_by_inline_stack (const inline_stack &stack) const;
331
332 name_function_instance_map map_;
333 };
334
335 /* Store the strings read from the profile data file. */
336 static string_table *afdo_string_table;
337
338 /* Store the AutoFDO source profile. */
339 static autofdo_source_profile *afdo_source_profile;
340
341 /* gcov_ctr_summary structure to store the profile_info. */
342 static struct gcov_ctr_summary *afdo_profile_info;
343
344 /* Helper functions. */
345
346 /* Return the original name of NAME: strip the suffix that starts
347 with '.' Caller is responsible for freeing RET. */
348
349 static char *
350 get_original_name (const char *name)
351 {
352 char *ret = xstrdup (name);
353 char *find = strchr (ret, '.');
354 if (find != NULL)
355 *find = 0;
356 return ret;
357 }
358
359 /* Return the combined location, which is a 32bit integer in which
360 higher 16 bits stores the line offset of LOC to the start lineno
361 of DECL, The lower 16 bits stores the discriminator. */
362
363 static unsigned
364 get_combined_location (location_t loc, tree decl)
365 {
366 /* TODO: allow more bits for line and less bits for discriminator. */
367 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
368 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
369 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
370 }
371
372 /* Return the function decl of a given lexical BLOCK. */
373
374 static tree
375 get_function_decl_from_block (tree block)
376 {
377 tree decl;
378
379 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
380 return NULL_TREE;
381
382 for (decl = BLOCK_ABSTRACT_ORIGIN (block);
383 decl && (TREE_CODE (decl) == BLOCK);
384 decl = BLOCK_ABSTRACT_ORIGIN (decl))
385 if (TREE_CODE (decl) == FUNCTION_DECL)
386 break;
387 return decl;
388 }
389
390 /* Store inline stack for STMT in STACK. */
391
392 static void
393 get_inline_stack (location_t locus, inline_stack *stack)
394 {
395 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
396 return;
397
398 tree block = LOCATION_BLOCK (locus);
399 if (block && TREE_CODE (block) == BLOCK)
400 {
401 int level = 0;
402 for (block = BLOCK_SUPERCONTEXT (block);
403 block && (TREE_CODE (block) == BLOCK);
404 block = BLOCK_SUPERCONTEXT (block))
405 {
406 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
407 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
408 continue;
409
410 tree decl = get_function_decl_from_block (block);
411 stack->safe_push (
412 std::make_pair (decl, get_combined_location (locus, decl)));
413 locus = tmp_locus;
414 level++;
415 }
416 }
417 stack->safe_push (
418 std::make_pair (current_function_decl,
419 get_combined_location (locus, current_function_decl)));
420 }
421
422 /* Return STMT's combined location, which is a 32bit integer in which
423 higher 16 bits stores the line offset of LOC to the start lineno
424 of DECL, The lower 16 bits stores the discriminator. */
425
426 static unsigned
427 get_relative_location_for_stmt (gimple stmt)
428 {
429 location_t locus = gimple_location (stmt);
430 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
431 return UNKNOWN_LOCATION;
432
433 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
434 block = BLOCK_SUPERCONTEXT (block))
435 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
436 return get_combined_location (locus,
437 get_function_decl_from_block (block));
438 return get_combined_location (locus, current_function_decl);
439 }
440
441 /* Return true if BB contains indirect call. */
442
443 static bool
444 has_indirect_call (basic_block bb)
445 {
446 gimple_stmt_iterator gsi;
447
448 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
449 {
450 gimple stmt = gsi_stmt (gsi);
451 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
452 && (gimple_call_fn (stmt) == NULL
453 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
454 return true;
455 }
456 return false;
457 }
458
459 /* Member functions for string_table. */
460
461 /* Deconstructor. */
462
463 string_table::~string_table ()
464 {
465 for (unsigned i = 0; i < vector_.length (); i++)
466 free (vector_[i]);
467 }
468
469
470 /* Return the index of a given function NAME. Return -1 if NAME is not
471 found in string table. */
472
473 int
474 string_table::get_index (const char *name) const
475 {
476 if (name == NULL)
477 return -1;
478 string_index_map::const_iterator iter = map_.find (name);
479 if (iter == map_.end ())
480 return -1;
481
482 return iter->second;
483 }
484
485 /* Return the index of a given function DECL. Return -1 if DECL is not
486 found in string table. */
487
488 int
489 string_table::get_index_by_decl (tree decl) const
490 {
491 char *name
492 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
493 int ret = get_index (name);
494 free (name);
495 if (ret != -1)
496 return ret;
497 ret = get_index (lang_hooks.dwarf_name (decl, 0));
498 if (ret != -1)
499 return ret;
500 if (DECL_ABSTRACT_ORIGIN (decl))
501 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
502
503 return -1;
504 }
505
506 /* Return the function name of a given INDEX. */
507
508 const char *
509 string_table::get_name (int index) const
510 {
511 gcc_assert (index > 0 && index < (int)vector_.length ());
512 return vector_[index];
513 }
514
515 /* Read the string table. Return TRUE if reading is successful. */
516
517 bool
518 string_table::read ()
519 {
520 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
521 return false;
522 /* Skip the length of the section. */
523 gcov_read_unsigned ();
524 /* Read in the file name table. */
525 unsigned string_num = gcov_read_unsigned ();
526 for (unsigned i = 0; i < string_num; i++)
527 {
528 vector_.safe_push (get_original_name (gcov_read_string ()));
529 map_[vector_.last ()] = i;
530 }
531 return true;
532 }
533
534 /* Member functions for function_instance. */
535
536 function_instance::~function_instance ()
537 {
538 for (callsite_map::iterator iter = callsites.begin ();
539 iter != callsites.end (); ++iter)
540 delete iter->second;
541 }
542
543 /* Traverse callsites of the current function_instance to find one at the
544 location of LINENO and callee name represented in DECL. */
545
546 function_instance *
547 function_instance::get_function_instance_by_decl (unsigned lineno,
548 tree decl) const
549 {
550 int func_name_idx = afdo_string_table->get_index_by_decl (decl);
551 if (func_name_idx != -1)
552 {
553 callsite_map::const_iterator ret
554 = callsites.find (std::make_pair (lineno, func_name_idx));
555 if (ret != callsites.end ())
556 return ret->second;
557 }
558 func_name_idx
559 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
560 if (func_name_idx != -1)
561 {
562 callsite_map::const_iterator ret
563 = callsites.find (std::make_pair (lineno, func_name_idx));
564 if (ret != callsites.end ())
565 return ret->second;
566 }
567 if (DECL_ABSTRACT_ORIGIN (decl))
568 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
569
570 return NULL;
571 }
572
573 /* Store the profile info for LOC in INFO. Return TRUE if profile info
574 is found. */
575
576 bool
577 function_instance::get_count_info (location_t loc, count_info *info) const
578 {
579 position_count_map::const_iterator iter = pos_counts.find (loc);
580 if (iter == pos_counts.end ())
581 return false;
582 *info = iter->second;
583 return true;
584 }
585
586 /* Mark LOC as annotated. */
587
588 void
589 function_instance::mark_annotated (location_t loc)
590 {
591 position_count_map::iterator iter = pos_counts.find (loc);
592 if (iter == pos_counts.end ())
593 return;
594 iter->second.annotated = true;
595 }
596
597 /* Read the inlined indirect call target profile for STMT and store it in
598 MAP, return the total count for all inlined indirect calls. */
599
600 gcov_type
601 function_instance::find_icall_target_map (gcall *stmt,
602 icall_target_map *map) const
603 {
604 gcov_type ret = 0;
605 unsigned stmt_offset = get_relative_location_for_stmt (stmt);
606
607 for (callsite_map::const_iterator iter = callsites.begin ();
608 iter != callsites.end (); ++iter)
609 {
610 unsigned callee = iter->second->name ();
611 /* Check if callsite location match the stmt. */
612 if (iter->first.first != stmt_offset)
613 continue;
614 struct cgraph_node *node = cgraph_node::get_for_asmname (
615 get_identifier (afdo_string_table->get_name (callee)));
616 if (node == NULL)
617 continue;
618 if (!check_ic_target (stmt, node))
619 continue;
620 (*map)[callee] = iter->second->total_count ();
621 ret += iter->second->total_count ();
622 }
623 return ret;
624 }
625
626 /* Read the profile and create a function_instance with head count as
627 HEAD_COUNT. Recursively read callsites to create nested function_instances
628 too. STACK is used to track the recursive creation process. */
629
630 /* function instance profile format:
631
632 ENTRY_COUNT: 8 bytes
633 NAME_INDEX: 4 bytes
634 NUM_POS_COUNTS: 4 bytes
635 NUM_CALLSITES: 4 byte
636 POS_COUNT_1:
637 POS_1_OFFSET: 4 bytes
638 NUM_TARGETS: 4 bytes
639 COUNT: 8 bytes
640 TARGET_1:
641 VALUE_PROFILE_TYPE: 4 bytes
642 TARGET_IDX: 8 bytes
643 COUNT: 8 bytes
644 TARGET_2
645 ...
646 TARGET_n
647 POS_COUNT_2
648 ...
649 POS_COUNT_N
650 CALLSITE_1:
651 CALLSITE_1_OFFSET: 4 bytes
652 FUNCTION_INSTANCE_PROFILE (nested)
653 CALLSITE_2
654 ...
655 CALLSITE_n. */
656
657 function_instance *
658 function_instance::read_function_instance (function_instance_stack *stack,
659 gcov_type head_count)
660 {
661 unsigned name = gcov_read_unsigned ();
662 unsigned num_pos_counts = gcov_read_unsigned ();
663 unsigned num_callsites = gcov_read_unsigned ();
664 function_instance *s = new function_instance (name, head_count);
665 stack->safe_push (s);
666
667 for (unsigned i = 0; i < num_pos_counts; i++)
668 {
669 unsigned offset = gcov_read_unsigned () & 0xffff0000;
670 unsigned num_targets = gcov_read_unsigned ();
671 gcov_type count = gcov_read_counter ();
672 s->pos_counts[offset].count = count;
673 for (unsigned j = 0; j < stack->length (); j++)
674 (*stack)[j]->total_count_ += count;
675 for (unsigned j = 0; j < num_targets; j++)
676 {
677 /* Only indirect call target histogram is supported now. */
678 gcov_read_unsigned ();
679 gcov_type target_idx = gcov_read_counter ();
680 s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
681 }
682 }
683 for (unsigned i = 0; i < num_callsites; i++)
684 {
685 unsigned offset = gcov_read_unsigned ();
686 function_instance *callee_function_instance
687 = read_function_instance (stack, 0);
688 s->callsites[std::make_pair (offset, callee_function_instance->name ())]
689 = callee_function_instance;
690 }
691 stack->pop ();
692 return s;
693 }
694
695 /* Sum of counts that is used during annotation. */
696
697 gcov_type
698 function_instance::total_annotated_count () const
699 {
700 gcov_type ret = 0;
701 for (callsite_map::const_iterator iter = callsites.begin ();
702 iter != callsites.end (); ++iter)
703 ret += iter->second->total_annotated_count ();
704 for (position_count_map::const_iterator iter = pos_counts.begin ();
705 iter != pos_counts.end (); ++iter)
706 if (iter->second.annotated)
707 ret += iter->second.count;
708 return ret;
709 }
710
711 /* Member functions for autofdo_source_profile. */
712
713 autofdo_source_profile::~autofdo_source_profile ()
714 {
715 for (name_function_instance_map::const_iterator iter = map_.begin ();
716 iter != map_.end (); ++iter)
717 delete iter->second;
718 }
719
720 /* For a given DECL, returns the top-level function_instance. */
721
722 function_instance *
723 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
724 {
725 int index = afdo_string_table->get_index_by_decl (decl);
726 if (index == -1)
727 return NULL;
728 name_function_instance_map::const_iterator ret = map_.find (index);
729 return ret == map_.end () ? NULL : ret->second;
730 }
731
732 /* Find count_info for a given gimple STMT. If found, store the count_info
733 in INFO and return true; otherwise return false. */
734
735 bool
736 autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
737 {
738 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
739 return false;
740
741 inline_stack stack;
742 get_inline_stack (gimple_location (stmt), &stack);
743 if (stack.length () == 0)
744 return false;
745 function_instance *s = get_function_instance_by_inline_stack (stack);
746 if (s == NULL)
747 return false;
748 return s->get_count_info (stack[0].second, info);
749 }
750
751 /* Mark LOC as annotated. */
752
753 void
754 autofdo_source_profile::mark_annotated (location_t loc)
755 {
756 inline_stack stack;
757 get_inline_stack (loc, &stack);
758 if (stack.length () == 0)
759 return;
760 function_instance *s = get_function_instance_by_inline_stack (stack);
761 if (s == NULL)
762 return;
763 s->mark_annotated (stack[0].second);
764 }
765
766 /* Update value profile INFO for STMT from the inlined indirect callsite.
767 Return true if INFO is updated. */
768
769 bool
770 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
771 count_info *info)
772 {
773 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
774 return false;
775
776 count_info old_info;
777 get_count_info (stmt, &old_info);
778 gcov_type total = 0;
779 for (icall_target_map::const_iterator iter = old_info.targets.begin ();
780 iter != old_info.targets.end (); ++iter)
781 total += iter->second;
782
783 /* Program behavior changed, original promoted (and inlined) target is not
784 hot any more. Will avoid promote the original target.
785
786 To check if original promoted target is still hot, we check the total
787 count of the unpromoted targets (stored in old_info). If it is no less
788 than half of the callsite count (stored in INFO), the original promoted
789 target is considered not hot any more. */
790 if (total >= info->count / 2)
791 return false;
792
793 inline_stack stack;
794 get_inline_stack (gimple_location (stmt), &stack);
795 if (stack.length () == 0)
796 return false;
797 function_instance *s = get_function_instance_by_inline_stack (stack);
798 if (s == NULL)
799 return false;
800 icall_target_map map;
801 if (s->find_icall_target_map (stmt, &map) == 0)
802 return false;
803 for (icall_target_map::const_iterator iter = map.begin ();
804 iter != map.end (); ++iter)
805 info->targets[iter->first] = iter->second;
806 return true;
807 }
808
809 /* Find total count of the callee of EDGE. */
810
811 gcov_type
812 autofdo_source_profile::get_callsite_total_count (
813 struct cgraph_edge *edge) const
814 {
815 inline_stack stack;
816 stack.safe_push (std::make_pair (edge->callee->decl, 0));
817 get_inline_stack (gimple_location (edge->call_stmt), &stack);
818
819 function_instance *s = get_function_instance_by_inline_stack (stack);
820 if (s == NULL
821 || afdo_string_table->get_index (IDENTIFIER_POINTER (
822 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
823 return 0;
824
825 return s->total_count ();
826 }
827
828 /* Read AutoFDO profile and returns TRUE on success. */
829
830 /* source profile format:
831
832 GCOV_TAG_AFDO_FUNCTION: 4 bytes
833 LENGTH: 4 bytes
834 NUM_FUNCTIONS: 4 bytes
835 FUNCTION_INSTANCE_1
836 FUNCTION_INSTANCE_2
837 ...
838 FUNCTION_INSTANCE_N. */
839
840 bool
841 autofdo_source_profile::read ()
842 {
843 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
844 {
845 inform (0, "Not expected TAG.");
846 return false;
847 }
848
849 /* Skip the length of the section. */
850 gcov_read_unsigned ();
851
852 /* Read in the function/callsite profile, and store it in local
853 data structure. */
854 unsigned function_num = gcov_read_unsigned ();
855 for (unsigned i = 0; i < function_num; i++)
856 {
857 function_instance::function_instance_stack stack;
858 function_instance *s = function_instance::read_function_instance (
859 &stack, gcov_read_counter ());
860 afdo_profile_info->sum_all += s->total_count ();
861 map_[s->name ()] = s;
862 }
863 return true;
864 }
865
866 /* Return the function_instance in the profile that correspond to the
867 inline STACK. */
868
869 function_instance *
870 autofdo_source_profile::get_function_instance_by_inline_stack (
871 const inline_stack &stack) const
872 {
873 name_function_instance_map::const_iterator iter = map_.find (
874 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
875 if (iter == map_.end())
876 return NULL;
877 function_instance *s = iter->second;
878 for (unsigned i = stack.length() - 1; i > 0; i--)
879 {
880 s = s->get_function_instance_by_decl (
881 stack[i].second, stack[i - 1].first);
882 if (s == NULL)
883 return NULL;
884 }
885 return s;
886 }
887
888 /* Module profile is only used by LIPO. Here we simply ignore it. */
889
890 static void
891 fake_read_autofdo_module_profile ()
892 {
893 /* Read in the module info. */
894 gcov_read_unsigned ();
895
896 /* Skip the length of the section. */
897 gcov_read_unsigned ();
898
899 /* Read in the file name table. */
900 unsigned total_module_num = gcov_read_unsigned ();
901 gcc_assert (total_module_num == 0);
902 }
903
904 /* Read data from profile data file. */
905
906 static void
907 read_profile (void)
908 {
909 if (gcov_open (auto_profile_file, 1) == 0)
910 error ("Cannot open profile file %s.", auto_profile_file);
911
912 if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
913 error ("AutoFDO profile magic number does not mathch.");
914
915 /* Skip the version number. */
916 unsigned version = gcov_read_unsigned ();
917 if (version != AUTO_PROFILE_VERSION)
918 error ("AutoFDO profile version %u does match %u.",
919 version, AUTO_PROFILE_VERSION);
920
921 /* Skip the empty integer. */
922 gcov_read_unsigned ();
923
924 /* string_table. */
925 afdo_string_table = new string_table ();
926 if (!afdo_string_table->read())
927 error ("Cannot read string table from %s.", auto_profile_file);
928
929 /* autofdo_source_profile. */
930 afdo_source_profile = autofdo_source_profile::create ();
931 if (afdo_source_profile == NULL)
932 error ("Cannot read function profile from %s.", auto_profile_file);
933
934 /* autofdo_module_profile. */
935 fake_read_autofdo_module_profile ();
936
937 /* Read in the working set. */
938 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
939 error ("Cannot read working set from %s.", auto_profile_file);
940
941 /* Skip the length of the section. */
942 gcov_read_unsigned ();
943 gcov_working_set_t set[128];
944 for (unsigned i = 0; i < 128; i++)
945 {
946 set[i].num_counters = gcov_read_unsigned ();
947 set[i].min_counter = gcov_read_counter ();
948 }
949 add_working_set (set);
950 }
951
952 /* From AutoFDO profiles, find values inside STMT for that we want to measure
953 histograms for indirect-call optimization.
954
955 This function is actually served for 2 purposes:
956 * before annotation, we need to mark histogram, promote and inline
957 * after annotation, we just need to mark, and let follow-up logic to
958 decide if it needs to promote and inline. */
959
960 static void
961 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
962 bool transform)
963 {
964 gimple gs = gsi_stmt (*gsi);
965 tree callee;
966
967 if (map.size () == 0)
968 return;
969 gcall *stmt = dyn_cast <gcall *> (gs);
970 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
971 return;
972
973 callee = gimple_call_fn (stmt);
974
975 histogram_value hist = gimple_alloc_histogram_value (
976 cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
977 hist->n_counters = 3;
978 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
979 gimple_add_histogram_value (cfun, stmt, hist);
980
981 gcov_type total = 0;
982 icall_target_map::const_iterator max_iter = map.end ();
983
984 for (icall_target_map::const_iterator iter = map.begin ();
985 iter != map.end (); ++iter)
986 {
987 total += iter->second;
988 if (max_iter == map.end () || max_iter->second < iter->second)
989 max_iter = iter;
990 }
991
992 hist->hvalue.counters[0]
993 = (unsigned long long)afdo_string_table->get_name (max_iter->first);
994 hist->hvalue.counters[1] = max_iter->second;
995 hist->hvalue.counters[2] = total;
996
997 if (!transform)
998 return;
999
1000 struct cgraph_edge *indirect_edge
1001 = cgraph_node::get (current_function_decl)->get_edge (stmt);
1002 struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1003 get_identifier ((const char *) hist->hvalue.counters[0]));
1004
1005 if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1006 return;
1007 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1008 return;
1009 struct cgraph_edge *new_edge
1010 = indirect_edge->make_speculative (direct_call, 0, 0);
1011 new_edge->redirect_call_stmt_to_callee ();
1012 gimple_remove_histogram_value (cfun, stmt, hist);
1013 inline_call (new_edge, true, NULL, NULL, false);
1014 }
1015
1016 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1017 histograms and adds them to list VALUES. */
1018
1019 static void
1020 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1021 bool transform)
1022 {
1023 afdo_indirect_call (gsi, map, transform);
1024 }
1025
1026 typedef std::set<basic_block> bb_set;
1027 typedef std::set<edge> edge_set;
1028
1029 static bool
1030 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1031 {
1032 return annotated.find (bb) != annotated.end ();
1033 }
1034
1035 static void
1036 set_bb_annotated (basic_block bb, bb_set *annotated)
1037 {
1038 annotated->insert (bb);
1039 }
1040
1041 static bool
1042 is_edge_annotated (const edge e, const edge_set &annotated)
1043 {
1044 return annotated.find (e) != annotated.end ();
1045 }
1046
1047 static void
1048 set_edge_annotated (edge e, edge_set *annotated)
1049 {
1050 annotated->insert (e);
1051 }
1052
1053 /* For a given BB, set its execution count. Attach value profile if a stmt
1054 is not in PROMOTED, because we only want to promote an indirect call once.
1055 Return TRUE if BB is annotated. */
1056
1057 static bool
1058 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1059 {
1060 gimple_stmt_iterator gsi;
1061 edge e;
1062 edge_iterator ei;
1063 gcov_type max_count = 0;
1064 bool has_annotated = false;
1065
1066 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1067 {
1068 count_info info;
1069 gimple stmt = gsi_stmt (gsi);
1070 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1071 continue;
1072 if (afdo_source_profile->get_count_info (stmt, &info))
1073 {
1074 if (info.count > max_count)
1075 max_count = info.count;
1076 has_annotated = true;
1077 if (info.targets.size () > 0
1078 && promoted.find (stmt) == promoted.end ())
1079 afdo_vpt (&gsi, info.targets, false);
1080 }
1081 }
1082
1083 if (!has_annotated)
1084 return false;
1085
1086 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1087 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1088 for (gphi_iterator gpi = gsi_start_phis (bb);
1089 !gsi_end_p (gpi);
1090 gsi_next (&gpi))
1091 {
1092 gphi *phi = gpi.phi ();
1093 size_t i;
1094 for (i = 0; i < gimple_phi_num_args (phi); i++)
1095 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1096 }
1097 FOR_EACH_EDGE (e, ei, bb->succs)
1098 afdo_source_profile->mark_annotated (e->goto_locus);
1099
1100 bb->count = max_count;
1101 return true;
1102 }
1103
1104 /* BB1 and BB2 are in an equivalent class iff:
1105 1. BB1 dominates BB2.
1106 2. BB2 post-dominates BB1.
1107 3. BB1 and BB2 are in the same loop nest.
1108 This function finds the equivalent class for each basic block, and
1109 stores a pointer to the first BB in its equivalent class. Meanwhile,
1110 set bb counts for the same equivalent class to be idenical. Update
1111 ANNOTATED_BB for the first BB in its equivalent class. */
1112
1113 static void
1114 afdo_find_equiv_class (bb_set *annotated_bb)
1115 {
1116 basic_block bb;
1117
1118 FOR_ALL_BB_FN (bb, cfun)
1119 bb->aux = NULL;
1120
1121 FOR_ALL_BB_FN (bb, cfun)
1122 {
1123 vec<basic_block> dom_bbs;
1124 basic_block bb1;
1125 int i;
1126
1127 if (bb->aux != NULL)
1128 continue;
1129 bb->aux = bb;
1130 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1131 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1132 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1133 && bb1->loop_father == bb->loop_father)
1134 {
1135 bb1->aux = bb;
1136 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1137 {
1138 bb->count = bb1->count;
1139 set_bb_annotated (bb, annotated_bb);
1140 }
1141 }
1142 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1143 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1144 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1145 && bb1->loop_father == bb->loop_father)
1146 {
1147 bb1->aux = bb;
1148 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1149 {
1150 bb->count = bb1->count;
1151 set_bb_annotated (bb, annotated_bb);
1152 }
1153 }
1154 }
1155 }
1156
1157 /* If a basic block's count is known, and only one of its in/out edges' count
1158 is unknown, its count can be calculated. Meanwhile, if all of the in/out
1159 edges' counts are known, then the basic block's unknown count can also be
1160 calculated.
1161 IS_SUCC is true if out edges of a basic blocks are examined.
1162 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1163 Return TRUE if any basic block/edge count is changed. */
1164
1165 static bool
1166 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1167 edge_set *annotated_edge)
1168 {
1169 basic_block bb;
1170 bool changed = false;
1171
1172 FOR_EACH_BB_FN (bb, cfun)
1173 {
1174 edge e, unknown_edge = NULL;
1175 edge_iterator ei;
1176 int num_unknown_edge = 0;
1177 gcov_type total_known_count = 0;
1178
1179 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1180 if (!is_edge_annotated (e, *annotated_edge))
1181 num_unknown_edge++, unknown_edge = e;
1182 else
1183 total_known_count += e->count;
1184
1185 if (num_unknown_edge == 0)
1186 {
1187 if (total_known_count > bb->count)
1188 {
1189 bb->count = total_known_count;
1190 changed = true;
1191 }
1192 if (!is_bb_annotated (bb, *annotated_bb))
1193 {
1194 set_bb_annotated (bb, annotated_bb);
1195 changed = true;
1196 }
1197 }
1198 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1199 {
1200 if (bb->count >= total_known_count)
1201 unknown_edge->count = bb->count - total_known_count;
1202 else
1203 unknown_edge->count = 0;
1204 set_edge_annotated (unknown_edge, annotated_edge);
1205 changed = true;
1206 }
1207 }
1208 return changed;
1209 }
1210
1211 /* Special propagation for circuit expressions. Because GCC translates
1212 control flow into data flow for circuit expressions. E.g.
1213 BB1:
1214 if (a && b)
1215 BB2
1216 else
1217 BB3
1218
1219 will be translated into:
1220
1221 BB1:
1222 if (a)
1223 goto BB.t1
1224 else
1225 goto BB.t3
1226 BB.t1:
1227 if (b)
1228 goto BB.t2
1229 else
1230 goto BB.t3
1231 BB.t2:
1232 goto BB.t3
1233 BB.t3:
1234 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1235 if (tmp)
1236 goto BB2
1237 else
1238 goto BB3
1239
1240 In this case, we need to propagate through PHI to determine the edge
1241 count of BB1->BB.t1, BB.t1->BB.t2.
1242 Update ANNOTATED_EDGE accordingly. */
1243
1244 static void
1245 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1246 {
1247 basic_block bb;
1248 FOR_ALL_BB_FN (bb, cfun)
1249 {
1250 gimple def_stmt;
1251 tree cmp_rhs, cmp_lhs;
1252 gimple cmp_stmt = last_stmt (bb);
1253 edge e;
1254 edge_iterator ei;
1255
1256 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1257 continue;
1258 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1259 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1260 if (!TREE_CONSTANT (cmp_rhs)
1261 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1262 continue;
1263 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1264 continue;
1265 if (!is_bb_annotated (bb, annotated_bb))
1266 continue;
1267 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1268 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1269 && gimple_assign_single_p (def_stmt)
1270 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1271 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1272 if (!def_stmt)
1273 continue;
1274 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1275 if (!phi_stmt)
1276 continue;
1277 FOR_EACH_EDGE (e, ei, bb->succs)
1278 {
1279 unsigned i, total = 0;
1280 edge only_one;
1281 bool check_value_one = (((integer_onep (cmp_rhs))
1282 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1283 ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1284 if (!is_edge_annotated (e, *annotated_edge))
1285 continue;
1286 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1287 {
1288 tree val = gimple_phi_arg_def (phi_stmt, i);
1289 edge ep = gimple_phi_arg_edge (phi_stmt, i);
1290
1291 if (!TREE_CONSTANT (val)
1292 || !(integer_zerop (val) || integer_onep (val)))
1293 continue;
1294 if (check_value_one ^ integer_onep (val))
1295 continue;
1296 total++;
1297 only_one = ep;
1298 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1299 {
1300 ep->probability = 0;
1301 ep->count = 0;
1302 set_edge_annotated (ep, annotated_edge);
1303 }
1304 }
1305 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1306 {
1307 only_one->probability = e->probability;
1308 only_one->count = e->count;
1309 set_edge_annotated (only_one, annotated_edge);
1310 }
1311 }
1312 }
1313 }
1314
1315 /* Propagate the basic block count and edge count on the control flow
1316 graph. We do the propagation iteratively until stablize. */
1317
1318 static void
1319 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1320 {
1321 basic_block bb;
1322 bool changed = true;
1323 int i = 0;
1324
1325 FOR_ALL_BB_FN (bb, cfun)
1326 {
1327 bb->count = ((basic_block)bb->aux)->count;
1328 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1329 set_bb_annotated (bb, annotated_bb);
1330 }
1331
1332 while (changed && i++ < 10)
1333 {
1334 changed = false;
1335
1336 if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1337 changed = true;
1338 if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1339 changed = true;
1340 afdo_propagate_circuit (*annotated_bb, annotated_edge);
1341 }
1342 }
1343
1344 /* Propagate counts on control flow graph and calculate branch
1345 probabilities. */
1346
1347 static void
1348 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1349 {
1350 basic_block bb;
1351 bool has_sample = false;
1352
1353 FOR_EACH_BB_FN (bb, cfun)
1354 {
1355 if (bb->count > 0)
1356 {
1357 has_sample = true;
1358 break;
1359 }
1360 }
1361
1362 if (!has_sample)
1363 return;
1364
1365 calculate_dominance_info (CDI_POST_DOMINATORS);
1366 calculate_dominance_info (CDI_DOMINATORS);
1367 loop_optimizer_init (0);
1368
1369 afdo_find_equiv_class (annotated_bb);
1370 afdo_propagate (annotated_bb, annotated_edge);
1371
1372 FOR_EACH_BB_FN (bb, cfun)
1373 {
1374 edge e;
1375 edge_iterator ei;
1376 int num_unknown_succ = 0;
1377 gcov_type total_count = 0;
1378
1379 FOR_EACH_EDGE (e, ei, bb->succs)
1380 {
1381 if (!is_edge_annotated (e, *annotated_edge))
1382 num_unknown_succ++;
1383 else
1384 total_count += e->count;
1385 }
1386 if (num_unknown_succ == 0 && total_count > 0)
1387 {
1388 FOR_EACH_EDGE (e, ei, bb->succs)
1389 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1390 }
1391 }
1392 FOR_ALL_BB_FN (bb, cfun)
1393 {
1394 edge e;
1395 edge_iterator ei;
1396
1397 FOR_EACH_EDGE (e, ei, bb->succs)
1398 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1399 bb->aux = NULL;
1400 }
1401
1402 loop_optimizer_finalize ();
1403 free_dominance_info (CDI_DOMINATORS);
1404 free_dominance_info (CDI_POST_DOMINATORS);
1405 }
1406
1407 /* Perform value profile transformation using AutoFDO profile. Add the
1408 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1409 indirect call promoted. */
1410
1411 static bool
1412 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1413 {
1414 basic_block bb;
1415 if (afdo_source_profile->get_function_instance_by_decl (
1416 current_function_decl) == NULL)
1417 return false;
1418
1419 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1420
1421 bool has_vpt = false;
1422 FOR_EACH_BB_FN (bb, cfun)
1423 {
1424 if (!has_indirect_call (bb))
1425 continue;
1426 gimple_stmt_iterator gsi;
1427
1428 gcov_type bb_count = 0;
1429 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1430 {
1431 count_info info;
1432 gimple stmt = gsi_stmt (gsi);
1433 if (afdo_source_profile->get_count_info (stmt, &info))
1434 bb_count = MAX (bb_count, info.count);
1435 }
1436
1437 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1438 {
1439 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1440 /* IC_promotion and early_inline_2 is done in multiple iterations.
1441 No need to promoted the stmt if its in promoted_stmts (means
1442 it is already been promoted in the previous iterations). */
1443 if ((!stmt) || gimple_call_fn (stmt) == NULL
1444 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1445 || promoted_stmts->find (stmt) != promoted_stmts->end ())
1446 continue;
1447
1448 count_info info;
1449 afdo_source_profile->get_count_info (stmt, &info);
1450 info.count = bb_count;
1451 if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1452 {
1453 /* Promote the indirect call and update the promoted_stmts. */
1454 promoted_stmts->insert (stmt);
1455 afdo_vpt (&gsi, info.targets, true);
1456 has_vpt = true;
1457 }
1458 }
1459 }
1460
1461 if (has_vpt)
1462 {
1463 optimize_inline_calls (current_function_decl);
1464 return true;
1465 }
1466
1467 return false;
1468 }
1469
1470 /* Annotate auto profile to the control flow graph. Do not annotate value
1471 profile for stmts in PROMOTED_STMTS. */
1472
1473 static void
1474 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1475 {
1476 basic_block bb;
1477 bb_set annotated_bb;
1478 edge_set annotated_edge;
1479 const function_instance *s
1480 = afdo_source_profile->get_function_instance_by_decl (
1481 current_function_decl);
1482
1483 if (s == NULL)
1484 return;
1485 cgraph_node::get (current_function_decl)->count = s->head_count ();
1486 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1487 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1488
1489 FOR_EACH_BB_FN (bb, cfun)
1490 {
1491 edge e;
1492 edge_iterator ei;
1493
1494 bb->count = 0;
1495 FOR_EACH_EDGE (e, ei, bb->succs)
1496 e->count = 0;
1497
1498 if (afdo_set_bb_count (bb, promoted_stmts))
1499 set_bb_annotated (bb, &annotated_bb);
1500 if (bb->count > max_count)
1501 max_count = bb->count;
1502 }
1503 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1504 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1505 {
1506 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1507 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1508 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1509 }
1510 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1511 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1512 {
1513 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1514 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1515 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1516 }
1517 afdo_source_profile->mark_annotated (
1518 DECL_SOURCE_LOCATION (current_function_decl));
1519 afdo_source_profile->mark_annotated (cfun->function_start_locus);
1520 afdo_source_profile->mark_annotated (cfun->function_end_locus);
1521 if (max_count > 0)
1522 {
1523 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1524 counts_to_freqs ();
1525 profile_status_for_fn (cfun) = PROFILE_READ;
1526 }
1527 if (flag_value_profile_transformations)
1528 {
1529 gimple_value_profile_transformations ();
1530 free_dominance_info (CDI_DOMINATORS);
1531 free_dominance_info (CDI_POST_DOMINATORS);
1532 update_ssa (TODO_update_ssa);
1533 }
1534 }
1535
1536 /* Wrapper function to invoke early inliner. */
1537
1538 static void
1539 early_inline ()
1540 {
1541 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1542 unsigned todo = early_inliner (cfun);
1543 if (todo & TODO_update_ssa_any)
1544 update_ssa (TODO_update_ssa);
1545 }
1546
1547 /* Use AutoFDO profile to annoate the control flow graph.
1548 Return the todo flag. */
1549
1550 static unsigned int
1551 auto_profile (void)
1552 {
1553 struct cgraph_node *node;
1554
1555 if (symtab->state == FINISHED)
1556 return 0;
1557
1558 init_node_map (true);
1559 profile_info = autofdo::afdo_profile_info;
1560
1561 FOR_EACH_FUNCTION (node)
1562 {
1563 if (!gimple_has_body_p (node->decl))
1564 continue;
1565
1566 /* Don't profile functions produced for builtin stuff. */
1567 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1568 continue;
1569
1570 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1571
1572 /* First do indirect call promotion and early inline to make the
1573 IR match the profiled binary before actual annotation.
1574
1575 This is needed because an indirect call might have been promoted
1576 and inlined in the profiled binary. If we do not promote and
1577 inline these indirect calls before annotation, the profile for
1578 these promoted functions will be lost.
1579
1580 e.g. foo() --indirect_call--> bar()
1581 In profiled binary, the callsite is promoted and inlined, making
1582 the profile look like:
1583
1584 foo: {
1585 loc_foo_1: count_1
1586 bar@loc_foo_2: {
1587 loc_bar_1: count_2
1588 loc_bar_2: count_3
1589 }
1590 }
1591
1592 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1593 If we perform annotation on it, the profile inside bar@loc_foo2
1594 will be wasted.
1595
1596 To avoid this, we promote loc_foo_2 and inline the promoted bar
1597 function before annotation, so the profile inside bar@loc_foo2
1598 will be useful. */
1599 autofdo::stmt_set promoted_stmts;
1600 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1601 {
1602 if (!flag_value_profile_transformations
1603 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1604 break;
1605 early_inline ();
1606 }
1607
1608 early_inline ();
1609 autofdo::afdo_annotate_cfg (promoted_stmts);
1610 compute_function_frequency ();
1611
1612 /* Local pure-const may imply need to fixup the cfg. */
1613 if (execute_fixup_cfg () & TODO_cleanup_cfg)
1614 cleanup_tree_cfg ();
1615
1616 free_dominance_info (CDI_DOMINATORS);
1617 free_dominance_info (CDI_POST_DOMINATORS);
1618 cgraph_edge::rebuild_edges ();
1619 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1620 pop_cfun ();
1621 }
1622
1623 return TODO_rebuild_cgraph_edges;
1624 }
1625 } /* namespace autofdo. */
1626
1627 /* Read the profile from the profile data file. */
1628
1629 void
1630 read_autofdo_file (void)
1631 {
1632 if (auto_profile_file == NULL)
1633 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1634
1635 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1636 1, sizeof (struct gcov_ctr_summary));
1637 autofdo::afdo_profile_info->runs = 1;
1638 autofdo::afdo_profile_info->sum_max = 0;
1639 autofdo::afdo_profile_info->sum_all = 0;
1640
1641 /* Read the profile from the profile file. */
1642 autofdo::read_profile ();
1643 }
1644
1645 /* Free the resources. */
1646
1647 void
1648 end_auto_profile (void)
1649 {
1650 delete autofdo::afdo_source_profile;
1651 delete autofdo::afdo_string_table;
1652 profile_info = NULL;
1653 }
1654
1655 /* Returns TRUE if EDGE is hot enough to be inlined early. */
1656
1657 bool
1658 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1659 {
1660 gcov_type count
1661 = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1662
1663 if (count > 0)
1664 {
1665 bool is_hot;
1666 const struct gcov_ctr_summary *saved_profile_info = profile_info;
1667 /* At early inline stage, profile_info is not set yet. We need to
1668 temporarily set it to afdo_profile_info to calculate hotness. */
1669 profile_info = autofdo::afdo_profile_info;
1670 is_hot = maybe_hot_count_p (NULL, count);
1671 profile_info = saved_profile_info;
1672 return is_hot;
1673 }
1674
1675 return false;
1676 }
1677
1678 namespace
1679 {
1680
1681 const pass_data pass_data_ipa_auto_profile = {
1682 SIMPLE_IPA_PASS, "afdo", /* name */
1683 OPTGROUP_NONE, /* optinfo_flags */
1684 TV_IPA_AUTOFDO, /* tv_id */
1685 0, /* properties_required */
1686 0, /* properties_provided */
1687 0, /* properties_destroyed */
1688 0, /* todo_flags_start */
1689 0, /* todo_flags_finish */
1690 };
1691
1692 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1693 {
1694 public:
1695 pass_ipa_auto_profile (gcc::context *ctxt)
1696 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1697 {
1698 }
1699
1700 /* opt_pass methods: */
1701 virtual bool
1702 gate (function *)
1703 {
1704 return flag_auto_profile;
1705 }
1706 virtual unsigned int
1707 execute (function *)
1708 {
1709 return autofdo::auto_profile ();
1710 }
1711 }; // class pass_ipa_auto_profile
1712
1713 } // anon namespace
1714
1715 simple_ipa_opt_pass *
1716 make_pass_ipa_auto_profile (gcc::context *ctxt)
1717 {
1718 return new pass_ipa_auto_profile (ctxt);
1719 }