coretypes.h: Include machmode.h...
[gcc.git] / gcc / auto-profile.c
1 /* Read and annotate call graph profile from the auto profile data file.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Dehao Chen (dehao@google.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23
24 #include <string.h>
25 #include <map>
26 #include <set>
27
28 #include "coretypes.h"
29 #include "hash-set.h"
30 #include "vec.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "options.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40 #include "predict.h"
41 #include "vec.h"
42 #include "hashtab.h"
43 #include "hash-set.h"
44 #include "tm.h"
45 #include "hard-reg-set.h"
46 #include "input.h"
47 #include "function.h"
48 #include "dominance.h"
49 #include "cfg.h"
50 #include "basic-block.h"
51 #include "diagnostic-core.h"
52 #include "gcov-io.h"
53 #include "profile.h"
54 #include "langhooks.h"
55 #include "opts.h"
56 #include "tree-pass.h"
57 #include "cfgloop.h"
58 #include "tree-ssa-alias.h"
59 #include "tree-cfg.h"
60 #include "tree-cfgcleanup.h"
61 #include "tree-ssa-operands.h"
62 #include "tree-into-ssa.h"
63 #include "internal-fn.h"
64 #include "is-a.h"
65 #include "gimple-expr.h"
66 #include "gimple.h"
67 #include "gimple-iterator.h"
68 #include "gimple-ssa.h"
69 #include "hash-map.h"
70 #include "plugin-api.h"
71 #include "ipa-ref.h"
72 #include "cgraph.h"
73 #include "value-prof.h"
74 #include "coverage.h"
75 #include "params.h"
76 #include "alloc-pool.h"
77 #include "symbol-summary.h"
78 #include "ipa-prop.h"
79 #include "ipa-inline.h"
80 #include "tree-inline.h"
81 #include "stringpool.h"
82 #include "auto-profile.h"
83
84 /* The following routines implements AutoFDO optimization.
85
86 This optimization uses sampling profiles to annotate basic block counts
87 and uses heuristics to estimate branch probabilities.
88
89 There are three phases in AutoFDO:
90
91 Phase 1: Read profile from the profile data file.
92 The following info is read from the profile datafile:
93 * string_table: a map between function name and its index.
94 * autofdo_source_profile: a map from function_instance name to
95 function_instance. This is represented as a forest of
96 function_instances.
97 * WorkingSet: a histogram of how many instructions are covered for a
98 given percentage of total cycles. This is describing the binary
99 level information (not source level). This info is used to help
100 decide if we want aggressive optimizations that could increase
101 code footprint (e.g. loop unroll etc.)
102 A function instance is an instance of function that could either be a
103 standalone symbol, or a clone of a function that is inlined into another
104 function.
105
106 Phase 2: Early inline + value profile transformation.
107 Early inline uses autofdo_source_profile to find if a callsite is:
108 * inlined in the profiled binary.
109 * callee body is hot in the profiling run.
110 If both condition satisfies, early inline will inline the callsite
111 regardless of the code growth.
112 Phase 2 is an iterative process. During each iteration, we also check
113 if an indirect callsite is promoted and inlined in the profiling run.
114 If yes, vpt will happen to force promote it and in the next iteration,
115 einline will inline the promoted callsite in the next iteration.
116
117 Phase 3: Annotate control flow graph.
118 AutoFDO uses a separate pass to:
119 * Annotate basic block count
120 * Estimate branch probability
121
122 After the above 3 phases, all profile is readily annotated on the GCC IR.
123 AutoFDO tries to reuse all FDO infrastructure as much as possible to make
124 use of the profile. E.g. it uses existing mechanism to calculate the basic
125 block/edge frequency, as well as the cgraph node/edge count.
126 */
127
128 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
129 #define AUTO_PROFILE_VERSION 1
130
131 namespace autofdo
132 {
133
134 /* Represent a source location: (function_decl, lineno). */
135 typedef std::pair<tree, unsigned> decl_lineno;
136
137 /* Represent an inline stack. vector[0] is the leaf node. */
138 typedef auto_vec<decl_lineno> inline_stack;
139
140 /* String array that stores function names. */
141 typedef auto_vec<char *> string_vector;
142
143 /* Map from function name's index in string_table to target's
144 execution count. */
145 typedef std::map<unsigned, gcov_type> icall_target_map;
146
147 /* Set of gimple stmts. Used to track if the stmt has already been promoted
148 to direct call. */
149 typedef std::set<gimple> stmt_set;
150
151 /* Represent count info of an inline stack. */
152 struct count_info
153 {
154 /* Sampled count of the inline stack. */
155 gcov_type count;
156
157 /* Map from indirect call target to its sample count. */
158 icall_target_map targets;
159
160 /* Whether this inline stack is already used in annotation.
161
162 Each inline stack should only be used to annotate IR once.
163 This will be enforced when instruction-level discriminator
164 is supported. */
165 bool annotated;
166 };
167
168 /* operator< for "const char *". */
169 struct string_compare
170 {
171 bool operator()(const char *a, const char *b) const
172 {
173 return strcmp (a, b) < 0;
174 }
175 };
176
177 /* Store a string array, indexed by string position in the array. */
178 class string_table
179 {
180 public:
181 string_table ()
182 {}
183
184 ~string_table ();
185
186 /* For a given string, returns its index. */
187 int get_index (const char *name) const;
188
189 /* For a given decl, returns the index of the decl name. */
190 int get_index_by_decl (tree decl) const;
191
192 /* For a given index, returns the string. */
193 const char *get_name (int index) const;
194
195 /* Read profile, return TRUE on success. */
196 bool read ();
197
198 private:
199 typedef std::map<const char *, unsigned, string_compare> string_index_map;
200 string_vector vector_;
201 string_index_map map_;
202 };
203
204 /* Profile of a function instance:
205 1. total_count of the function.
206 2. head_count (entry basic block count) of the function (only valid when
207 function is a top-level function_instance, i.e. it is the original copy
208 instead of the inlined copy).
209 3. map from source location (decl_lineno) to profile (count_info).
210 4. map from callsite to callee function_instance. */
211 class function_instance
212 {
213 public:
214 typedef auto_vec<function_instance *> function_instance_stack;
215
216 /* Read the profile and return a function_instance with head count as
217 HEAD_COUNT. Recursively read callsites to create nested function_instances
218 too. STACK is used to track the recursive creation process. */
219 static function_instance *
220 read_function_instance (function_instance_stack *stack,
221 gcov_type head_count);
222
223 /* Recursively deallocate all callsites (nested function_instances). */
224 ~function_instance ();
225
226 /* Accessors. */
227 int
228 name () const
229 {
230 return name_;
231 }
232 gcov_type
233 total_count () const
234 {
235 return total_count_;
236 }
237 gcov_type
238 head_count () const
239 {
240 return head_count_;
241 }
242
243 /* Traverse callsites of the current function_instance to find one at the
244 location of LINENO and callee name represented in DECL. */
245 function_instance *get_function_instance_by_decl (unsigned lineno,
246 tree decl) const;
247
248 /* Store the profile info for LOC in INFO. Return TRUE if profile info
249 is found. */
250 bool get_count_info (location_t loc, count_info *info) const;
251
252 /* Read the inlined indirect call target profile for STMT and store it in
253 MAP, return the total count for all inlined indirect calls. */
254 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
255
256 /* Sum of counts that is used during annotation. */
257 gcov_type total_annotated_count () const;
258
259 /* Mark LOC as annotated. */
260 void mark_annotated (location_t loc);
261
262 private:
263 /* Callsite, represented as (decl_lineno, callee_function_name_index). */
264 typedef std::pair<unsigned, unsigned> callsite;
265
266 /* Map from callsite to callee function_instance. */
267 typedef std::map<callsite, function_instance *> callsite_map;
268
269 function_instance (unsigned name, gcov_type head_count)
270 : name_ (name), total_count_ (0), head_count_ (head_count)
271 {
272 }
273
274 /* Map from source location (decl_lineno) to profile (count_info). */
275 typedef std::map<unsigned, count_info> position_count_map;
276
277 /* function_instance name index in the string_table. */
278 unsigned name_;
279
280 /* Total sample count. */
281 gcov_type total_count_;
282
283 /* Entry BB's sample count. */
284 gcov_type head_count_;
285
286 /* Map from callsite location to callee function_instance. */
287 callsite_map callsites;
288
289 /* Map from source location to count_info. */
290 position_count_map pos_counts;
291 };
292
293 /* Profile for all functions. */
294 class autofdo_source_profile
295 {
296 public:
297 static autofdo_source_profile *
298 create ()
299 {
300 autofdo_source_profile *map = new autofdo_source_profile ();
301
302 if (map->read ())
303 return map;
304 delete map;
305 return NULL;
306 }
307
308 ~autofdo_source_profile ();
309
310 /* For a given DECL, returns the top-level function_instance. */
311 function_instance *get_function_instance_by_decl (tree decl) const;
312
313 /* Find count_info for a given gimple STMT. If found, store the count_info
314 in INFO and return true; otherwise return false. */
315 bool get_count_info (gimple stmt, count_info *info) const;
316
317 /* Find total count of the callee of EDGE. */
318 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
319
320 /* Update value profile INFO for STMT from the inlined indirect callsite.
321 Return true if INFO is updated. */
322 bool update_inlined_ind_target (gcall *stmt, count_info *info);
323
324 /* Mark LOC as annotated. */
325 void mark_annotated (location_t loc);
326
327 private:
328 /* Map from function_instance name index (in string_table) to
329 function_instance. */
330 typedef std::map<unsigned, function_instance *> name_function_instance_map;
331
332 autofdo_source_profile () {}
333
334 /* Read AutoFDO profile and returns TRUE on success. */
335 bool read ();
336
337 /* Return the function_instance in the profile that correspond to the
338 inline STACK. */
339 function_instance *
340 get_function_instance_by_inline_stack (const inline_stack &stack) const;
341
342 name_function_instance_map map_;
343 };
344
345 /* Store the strings read from the profile data file. */
346 static string_table *afdo_string_table;
347
348 /* Store the AutoFDO source profile. */
349 static autofdo_source_profile *afdo_source_profile;
350
351 /* gcov_ctr_summary structure to store the profile_info. */
352 static struct gcov_ctr_summary *afdo_profile_info;
353
354 /* Helper functions. */
355
356 /* Return the original name of NAME: strip the suffix that starts
357 with '.' Caller is responsible for freeing RET. */
358
359 static char *
360 get_original_name (const char *name)
361 {
362 char *ret = xstrdup (name);
363 char *find = strchr (ret, '.');
364 if (find != NULL)
365 *find = 0;
366 return ret;
367 }
368
369 /* Return the combined location, which is a 32bit integer in which
370 higher 16 bits stores the line offset of LOC to the start lineno
371 of DECL, The lower 16 bits stores the discriminator. */
372
373 static unsigned
374 get_combined_location (location_t loc, tree decl)
375 {
376 /* TODO: allow more bits for line and less bits for discriminator. */
377 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
378 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
379 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
380 }
381
382 /* Return the function decl of a given lexical BLOCK. */
383
384 static tree
385 get_function_decl_from_block (tree block)
386 {
387 tree decl;
388
389 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
390 return NULL_TREE;
391
392 for (decl = BLOCK_ABSTRACT_ORIGIN (block);
393 decl && (TREE_CODE (decl) == BLOCK);
394 decl = BLOCK_ABSTRACT_ORIGIN (decl))
395 if (TREE_CODE (decl) == FUNCTION_DECL)
396 break;
397 return decl;
398 }
399
400 /* Store inline stack for STMT in STACK. */
401
402 static void
403 get_inline_stack (location_t locus, inline_stack *stack)
404 {
405 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
406 return;
407
408 tree block = LOCATION_BLOCK (locus);
409 if (block && TREE_CODE (block) == BLOCK)
410 {
411 int level = 0;
412 for (block = BLOCK_SUPERCONTEXT (block);
413 block && (TREE_CODE (block) == BLOCK);
414 block = BLOCK_SUPERCONTEXT (block))
415 {
416 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
417 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
418 continue;
419
420 tree decl = get_function_decl_from_block (block);
421 stack->safe_push (
422 std::make_pair (decl, get_combined_location (locus, decl)));
423 locus = tmp_locus;
424 level++;
425 }
426 }
427 stack->safe_push (
428 std::make_pair (current_function_decl,
429 get_combined_location (locus, current_function_decl)));
430 }
431
432 /* Return STMT's combined location, which is a 32bit integer in which
433 higher 16 bits stores the line offset of LOC to the start lineno
434 of DECL, The lower 16 bits stores the discriminator. */
435
436 static unsigned
437 get_relative_location_for_stmt (gimple stmt)
438 {
439 location_t locus = gimple_location (stmt);
440 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
441 return UNKNOWN_LOCATION;
442
443 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
444 block = BLOCK_SUPERCONTEXT (block))
445 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
446 return get_combined_location (locus,
447 get_function_decl_from_block (block));
448 return get_combined_location (locus, current_function_decl);
449 }
450
451 /* Return true if BB contains indirect call. */
452
453 static bool
454 has_indirect_call (basic_block bb)
455 {
456 gimple_stmt_iterator gsi;
457
458 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
459 {
460 gimple stmt = gsi_stmt (gsi);
461 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
462 && (gimple_call_fn (stmt) == NULL
463 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
464 return true;
465 }
466 return false;
467 }
468
469 /* Member functions for string_table. */
470
471 /* Deconstructor. */
472
473 string_table::~string_table ()
474 {
475 for (unsigned i = 0; i < vector_.length (); i++)
476 free (vector_[i]);
477 }
478
479
480 /* Return the index of a given function NAME. Return -1 if NAME is not
481 found in string table. */
482
483 int
484 string_table::get_index (const char *name) const
485 {
486 if (name == NULL)
487 return -1;
488 string_index_map::const_iterator iter = map_.find (name);
489 if (iter == map_.end ())
490 return -1;
491
492 return iter->second;
493 }
494
495 /* Return the index of a given function DECL. Return -1 if DECL is not
496 found in string table. */
497
498 int
499 string_table::get_index_by_decl (tree decl) const
500 {
501 char *name
502 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
503 int ret = get_index (name);
504 free (name);
505 if (ret != -1)
506 return ret;
507 ret = get_index (lang_hooks.dwarf_name (decl, 0));
508 if (ret != -1)
509 return ret;
510 if (DECL_ABSTRACT_ORIGIN (decl))
511 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
512
513 return -1;
514 }
515
516 /* Return the function name of a given INDEX. */
517
518 const char *
519 string_table::get_name (int index) const
520 {
521 gcc_assert (index > 0 && index < (int)vector_.length ());
522 return vector_[index];
523 }
524
525 /* Read the string table. Return TRUE if reading is successful. */
526
527 bool
528 string_table::read ()
529 {
530 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
531 return false;
532 /* Skip the length of the section. */
533 gcov_read_unsigned ();
534 /* Read in the file name table. */
535 unsigned string_num = gcov_read_unsigned ();
536 for (unsigned i = 0; i < string_num; i++)
537 {
538 vector_.safe_push (get_original_name (gcov_read_string ()));
539 map_[vector_.last ()] = i;
540 }
541 return true;
542 }
543
544 /* Member functions for function_instance. */
545
546 function_instance::~function_instance ()
547 {
548 for (callsite_map::iterator iter = callsites.begin ();
549 iter != callsites.end (); ++iter)
550 delete iter->second;
551 }
552
553 /* Traverse callsites of the current function_instance to find one at the
554 location of LINENO and callee name represented in DECL. */
555
556 function_instance *
557 function_instance::get_function_instance_by_decl (unsigned lineno,
558 tree decl) const
559 {
560 int func_name_idx = afdo_string_table->get_index_by_decl (decl);
561 if (func_name_idx != -1)
562 {
563 callsite_map::const_iterator ret
564 = callsites.find (std::make_pair (lineno, func_name_idx));
565 if (ret != callsites.end ())
566 return ret->second;
567 }
568 func_name_idx
569 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
570 if (func_name_idx != -1)
571 {
572 callsite_map::const_iterator ret
573 = callsites.find (std::make_pair (lineno, func_name_idx));
574 if (ret != callsites.end ())
575 return ret->second;
576 }
577 if (DECL_ABSTRACT_ORIGIN (decl))
578 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
579
580 return NULL;
581 }
582
583 /* Store the profile info for LOC in INFO. Return TRUE if profile info
584 is found. */
585
586 bool
587 function_instance::get_count_info (location_t loc, count_info *info) const
588 {
589 position_count_map::const_iterator iter = pos_counts.find (loc);
590 if (iter == pos_counts.end ())
591 return false;
592 *info = iter->second;
593 return true;
594 }
595
596 /* Mark LOC as annotated. */
597
598 void
599 function_instance::mark_annotated (location_t loc)
600 {
601 position_count_map::iterator iter = pos_counts.find (loc);
602 if (iter == pos_counts.end ())
603 return;
604 iter->second.annotated = true;
605 }
606
607 /* Read the inlined indirect call target profile for STMT and store it in
608 MAP, return the total count for all inlined indirect calls. */
609
610 gcov_type
611 function_instance::find_icall_target_map (gcall *stmt,
612 icall_target_map *map) const
613 {
614 gcov_type ret = 0;
615 unsigned stmt_offset = get_relative_location_for_stmt (stmt);
616
617 for (callsite_map::const_iterator iter = callsites.begin ();
618 iter != callsites.end (); ++iter)
619 {
620 unsigned callee = iter->second->name ();
621 /* Check if callsite location match the stmt. */
622 if (iter->first.first != stmt_offset)
623 continue;
624 struct cgraph_node *node = cgraph_node::get_for_asmname (
625 get_identifier (afdo_string_table->get_name (callee)));
626 if (node == NULL)
627 continue;
628 if (!check_ic_target (stmt, node))
629 continue;
630 (*map)[callee] = iter->second->total_count ();
631 ret += iter->second->total_count ();
632 }
633 return ret;
634 }
635
636 /* Read the profile and create a function_instance with head count as
637 HEAD_COUNT. Recursively read callsites to create nested function_instances
638 too. STACK is used to track the recursive creation process. */
639
640 /* function instance profile format:
641
642 ENTRY_COUNT: 8 bytes
643 NAME_INDEX: 4 bytes
644 NUM_POS_COUNTS: 4 bytes
645 NUM_CALLSITES: 4 byte
646 POS_COUNT_1:
647 POS_1_OFFSET: 4 bytes
648 NUM_TARGETS: 4 bytes
649 COUNT: 8 bytes
650 TARGET_1:
651 VALUE_PROFILE_TYPE: 4 bytes
652 TARGET_IDX: 8 bytes
653 COUNT: 8 bytes
654 TARGET_2
655 ...
656 TARGET_n
657 POS_COUNT_2
658 ...
659 POS_COUNT_N
660 CALLSITE_1:
661 CALLSITE_1_OFFSET: 4 bytes
662 FUNCTION_INSTANCE_PROFILE (nested)
663 CALLSITE_2
664 ...
665 CALLSITE_n. */
666
667 function_instance *
668 function_instance::read_function_instance (function_instance_stack *stack,
669 gcov_type head_count)
670 {
671 unsigned name = gcov_read_unsigned ();
672 unsigned num_pos_counts = gcov_read_unsigned ();
673 unsigned num_callsites = gcov_read_unsigned ();
674 function_instance *s = new function_instance (name, head_count);
675 stack->safe_push (s);
676
677 for (unsigned i = 0; i < num_pos_counts; i++)
678 {
679 unsigned offset = gcov_read_unsigned () & 0xffff0000;
680 unsigned num_targets = gcov_read_unsigned ();
681 gcov_type count = gcov_read_counter ();
682 s->pos_counts[offset].count = count;
683 for (unsigned j = 0; j < stack->length (); j++)
684 (*stack)[j]->total_count_ += count;
685 for (unsigned j = 0; j < num_targets; j++)
686 {
687 /* Only indirect call target histogram is supported now. */
688 gcov_read_unsigned ();
689 gcov_type target_idx = gcov_read_counter ();
690 s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
691 }
692 }
693 for (unsigned i = 0; i < num_callsites; i++)
694 {
695 unsigned offset = gcov_read_unsigned ();
696 function_instance *callee_function_instance
697 = read_function_instance (stack, 0);
698 s->callsites[std::make_pair (offset, callee_function_instance->name ())]
699 = callee_function_instance;
700 }
701 stack->pop ();
702 return s;
703 }
704
705 /* Sum of counts that is used during annotation. */
706
707 gcov_type
708 function_instance::total_annotated_count () const
709 {
710 gcov_type ret = 0;
711 for (callsite_map::const_iterator iter = callsites.begin ();
712 iter != callsites.end (); ++iter)
713 ret += iter->second->total_annotated_count ();
714 for (position_count_map::const_iterator iter = pos_counts.begin ();
715 iter != pos_counts.end (); ++iter)
716 if (iter->second.annotated)
717 ret += iter->second.count;
718 return ret;
719 }
720
721 /* Member functions for autofdo_source_profile. */
722
723 autofdo_source_profile::~autofdo_source_profile ()
724 {
725 for (name_function_instance_map::const_iterator iter = map_.begin ();
726 iter != map_.end (); ++iter)
727 delete iter->second;
728 }
729
730 /* For a given DECL, returns the top-level function_instance. */
731
732 function_instance *
733 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
734 {
735 int index = afdo_string_table->get_index_by_decl (decl);
736 if (index == -1)
737 return NULL;
738 name_function_instance_map::const_iterator ret = map_.find (index);
739 return ret == map_.end () ? NULL : ret->second;
740 }
741
742 /* Find count_info for a given gimple STMT. If found, store the count_info
743 in INFO and return true; otherwise return false. */
744
745 bool
746 autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
747 {
748 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
749 return false;
750
751 inline_stack stack;
752 get_inline_stack (gimple_location (stmt), &stack);
753 if (stack.length () == 0)
754 return false;
755 function_instance *s = get_function_instance_by_inline_stack (stack);
756 if (s == NULL)
757 return false;
758 return s->get_count_info (stack[0].second, info);
759 }
760
761 /* Mark LOC as annotated. */
762
763 void
764 autofdo_source_profile::mark_annotated (location_t loc)
765 {
766 inline_stack stack;
767 get_inline_stack (loc, &stack);
768 if (stack.length () == 0)
769 return;
770 function_instance *s = get_function_instance_by_inline_stack (stack);
771 if (s == NULL)
772 return;
773 s->mark_annotated (stack[0].second);
774 }
775
776 /* Update value profile INFO for STMT from the inlined indirect callsite.
777 Return true if INFO is updated. */
778
779 bool
780 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
781 count_info *info)
782 {
783 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
784 return false;
785
786 count_info old_info;
787 get_count_info (stmt, &old_info);
788 gcov_type total = 0;
789 for (icall_target_map::const_iterator iter = old_info.targets.begin ();
790 iter != old_info.targets.end (); ++iter)
791 total += iter->second;
792
793 /* Program behavior changed, original promoted (and inlined) target is not
794 hot any more. Will avoid promote the original target.
795
796 To check if original promoted target is still hot, we check the total
797 count of the unpromoted targets (stored in old_info). If it is no less
798 than half of the callsite count (stored in INFO), the original promoted
799 target is considered not hot any more. */
800 if (total >= info->count / 2)
801 return false;
802
803 inline_stack stack;
804 get_inline_stack (gimple_location (stmt), &stack);
805 if (stack.length () == 0)
806 return false;
807 function_instance *s = get_function_instance_by_inline_stack (stack);
808 if (s == NULL)
809 return false;
810 icall_target_map map;
811 if (s->find_icall_target_map (stmt, &map) == 0)
812 return false;
813 for (icall_target_map::const_iterator iter = map.begin ();
814 iter != map.end (); ++iter)
815 info->targets[iter->first] = iter->second;
816 return true;
817 }
818
819 /* Find total count of the callee of EDGE. */
820
821 gcov_type
822 autofdo_source_profile::get_callsite_total_count (
823 struct cgraph_edge *edge) const
824 {
825 inline_stack stack;
826 stack.safe_push (std::make_pair (edge->callee->decl, 0));
827 get_inline_stack (gimple_location (edge->call_stmt), &stack);
828
829 function_instance *s = get_function_instance_by_inline_stack (stack);
830 if (s == NULL
831 || afdo_string_table->get_index (IDENTIFIER_POINTER (
832 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
833 return 0;
834
835 return s->total_count ();
836 }
837
838 /* Read AutoFDO profile and returns TRUE on success. */
839
840 /* source profile format:
841
842 GCOV_TAG_AFDO_FUNCTION: 4 bytes
843 LENGTH: 4 bytes
844 NUM_FUNCTIONS: 4 bytes
845 FUNCTION_INSTANCE_1
846 FUNCTION_INSTANCE_2
847 ...
848 FUNCTION_INSTANCE_N. */
849
850 bool
851 autofdo_source_profile::read ()
852 {
853 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
854 {
855 inform (0, "Not expected TAG.");
856 return false;
857 }
858
859 /* Skip the length of the section. */
860 gcov_read_unsigned ();
861
862 /* Read in the function/callsite profile, and store it in local
863 data structure. */
864 unsigned function_num = gcov_read_unsigned ();
865 for (unsigned i = 0; i < function_num; i++)
866 {
867 function_instance::function_instance_stack stack;
868 function_instance *s = function_instance::read_function_instance (
869 &stack, gcov_read_counter ());
870 afdo_profile_info->sum_all += s->total_count ();
871 map_[s->name ()] = s;
872 }
873 return true;
874 }
875
876 /* Return the function_instance in the profile that correspond to the
877 inline STACK. */
878
879 function_instance *
880 autofdo_source_profile::get_function_instance_by_inline_stack (
881 const inline_stack &stack) const
882 {
883 name_function_instance_map::const_iterator iter = map_.find (
884 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
885 if (iter == map_.end())
886 return NULL;
887 function_instance *s = iter->second;
888 for (unsigned i = stack.length() - 1; i > 0; i--)
889 {
890 s = s->get_function_instance_by_decl (
891 stack[i].second, stack[i - 1].first);
892 if (s == NULL)
893 return NULL;
894 }
895 return s;
896 }
897
898 /* Module profile is only used by LIPO. Here we simply ignore it. */
899
900 static void
901 fake_read_autofdo_module_profile ()
902 {
903 /* Read in the module info. */
904 gcov_read_unsigned ();
905
906 /* Skip the length of the section. */
907 gcov_read_unsigned ();
908
909 /* Read in the file name table. */
910 unsigned total_module_num = gcov_read_unsigned ();
911 gcc_assert (total_module_num == 0);
912 }
913
914 /* Read data from profile data file. */
915
916 static void
917 read_profile (void)
918 {
919 if (gcov_open (auto_profile_file, 1) == 0)
920 error ("Cannot open profile file %s.", auto_profile_file);
921
922 if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
923 error ("AutoFDO profile magic number does not mathch.");
924
925 /* Skip the version number. */
926 unsigned version = gcov_read_unsigned ();
927 if (version != AUTO_PROFILE_VERSION)
928 error ("AutoFDO profile version %u does match %u.",
929 version, AUTO_PROFILE_VERSION);
930
931 /* Skip the empty integer. */
932 gcov_read_unsigned ();
933
934 /* string_table. */
935 afdo_string_table = new string_table ();
936 if (!afdo_string_table->read())
937 error ("Cannot read string table from %s.", auto_profile_file);
938
939 /* autofdo_source_profile. */
940 afdo_source_profile = autofdo_source_profile::create ();
941 if (afdo_source_profile == NULL)
942 error ("Cannot read function profile from %s.", auto_profile_file);
943
944 /* autofdo_module_profile. */
945 fake_read_autofdo_module_profile ();
946
947 /* Read in the working set. */
948 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
949 error ("Cannot read working set from %s.", auto_profile_file);
950
951 /* Skip the length of the section. */
952 gcov_read_unsigned ();
953 gcov_working_set_t set[128];
954 for (unsigned i = 0; i < 128; i++)
955 {
956 set[i].num_counters = gcov_read_unsigned ();
957 set[i].min_counter = gcov_read_counter ();
958 }
959 add_working_set (set);
960 }
961
962 /* From AutoFDO profiles, find values inside STMT for that we want to measure
963 histograms for indirect-call optimization.
964
965 This function is actually served for 2 purposes:
966 * before annotation, we need to mark histogram, promote and inline
967 * after annotation, we just need to mark, and let follow-up logic to
968 decide if it needs to promote and inline. */
969
970 static void
971 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
972 bool transform)
973 {
974 gimple gs = gsi_stmt (*gsi);
975 tree callee;
976
977 if (map.size () == 0)
978 return;
979 gcall *stmt = dyn_cast <gcall *> (gs);
980 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
981 return;
982
983 callee = gimple_call_fn (stmt);
984
985 histogram_value hist = gimple_alloc_histogram_value (
986 cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
987 hist->n_counters = 3;
988 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
989 gimple_add_histogram_value (cfun, stmt, hist);
990
991 gcov_type total = 0;
992 icall_target_map::const_iterator max_iter = map.end ();
993
994 for (icall_target_map::const_iterator iter = map.begin ();
995 iter != map.end (); ++iter)
996 {
997 total += iter->second;
998 if (max_iter == map.end () || max_iter->second < iter->second)
999 max_iter = iter;
1000 }
1001
1002 hist->hvalue.counters[0]
1003 = (unsigned long long)afdo_string_table->get_name (max_iter->first);
1004 hist->hvalue.counters[1] = max_iter->second;
1005 hist->hvalue.counters[2] = total;
1006
1007 if (!transform)
1008 return;
1009
1010 struct cgraph_edge *indirect_edge
1011 = cgraph_node::get (current_function_decl)->get_edge (stmt);
1012 struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1013 get_identifier ((const char *) hist->hvalue.counters[0]));
1014
1015 if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1016 return;
1017 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1018 return;
1019 struct cgraph_edge *new_edge
1020 = indirect_edge->make_speculative (direct_call, 0, 0);
1021 new_edge->redirect_call_stmt_to_callee ();
1022 gimple_remove_histogram_value (cfun, stmt, hist);
1023 inline_call (new_edge, true, NULL, NULL, false);
1024 }
1025
1026 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1027 histograms and adds them to list VALUES. */
1028
1029 static void
1030 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1031 bool transform)
1032 {
1033 afdo_indirect_call (gsi, map, transform);
1034 }
1035
1036 typedef std::set<basic_block> bb_set;
1037 typedef std::set<edge> edge_set;
1038
1039 static bool
1040 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1041 {
1042 return annotated.find (bb) != annotated.end ();
1043 }
1044
1045 static void
1046 set_bb_annotated (basic_block bb, bb_set *annotated)
1047 {
1048 annotated->insert (bb);
1049 }
1050
1051 static bool
1052 is_edge_annotated (const edge e, const edge_set &annotated)
1053 {
1054 return annotated.find (e) != annotated.end ();
1055 }
1056
1057 static void
1058 set_edge_annotated (edge e, edge_set *annotated)
1059 {
1060 annotated->insert (e);
1061 }
1062
1063 /* For a given BB, set its execution count. Attach value profile if a stmt
1064 is not in PROMOTED, because we only want to promote an indirect call once.
1065 Return TRUE if BB is annotated. */
1066
1067 static bool
1068 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1069 {
1070 gimple_stmt_iterator gsi;
1071 edge e;
1072 edge_iterator ei;
1073 gcov_type max_count = 0;
1074 bool has_annotated = false;
1075
1076 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1077 {
1078 count_info info;
1079 gimple stmt = gsi_stmt (gsi);
1080 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1081 continue;
1082 if (afdo_source_profile->get_count_info (stmt, &info))
1083 {
1084 if (info.count > max_count)
1085 max_count = info.count;
1086 has_annotated = true;
1087 if (info.targets.size () > 0
1088 && promoted.find (stmt) == promoted.end ())
1089 afdo_vpt (&gsi, info.targets, false);
1090 }
1091 }
1092
1093 if (!has_annotated)
1094 return false;
1095
1096 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1097 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1098 for (gphi_iterator gpi = gsi_start_phis (bb);
1099 !gsi_end_p (gpi);
1100 gsi_next (&gpi))
1101 {
1102 gphi *phi = gpi.phi ();
1103 size_t i;
1104 for (i = 0; i < gimple_phi_num_args (phi); i++)
1105 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1106 }
1107 FOR_EACH_EDGE (e, ei, bb->succs)
1108 afdo_source_profile->mark_annotated (e->goto_locus);
1109
1110 bb->count = max_count;
1111 return true;
1112 }
1113
1114 /* BB1 and BB2 are in an equivalent class iff:
1115 1. BB1 dominates BB2.
1116 2. BB2 post-dominates BB1.
1117 3. BB1 and BB2 are in the same loop nest.
1118 This function finds the equivalent class for each basic block, and
1119 stores a pointer to the first BB in its equivalent class. Meanwhile,
1120 set bb counts for the same equivalent class to be idenical. Update
1121 ANNOTATED_BB for the first BB in its equivalent class. */
1122
1123 static void
1124 afdo_find_equiv_class (bb_set *annotated_bb)
1125 {
1126 basic_block bb;
1127
1128 FOR_ALL_BB_FN (bb, cfun)
1129 bb->aux = NULL;
1130
1131 FOR_ALL_BB_FN (bb, cfun)
1132 {
1133 vec<basic_block> dom_bbs;
1134 basic_block bb1;
1135 int i;
1136
1137 if (bb->aux != NULL)
1138 continue;
1139 bb->aux = bb;
1140 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1141 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1142 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1143 && bb1->loop_father == bb->loop_father)
1144 {
1145 bb1->aux = bb;
1146 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1147 {
1148 bb->count = bb1->count;
1149 set_bb_annotated (bb, annotated_bb);
1150 }
1151 }
1152 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1153 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1154 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1155 && bb1->loop_father == bb->loop_father)
1156 {
1157 bb1->aux = bb;
1158 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1159 {
1160 bb->count = bb1->count;
1161 set_bb_annotated (bb, annotated_bb);
1162 }
1163 }
1164 }
1165 }
1166
1167 /* If a basic block's count is known, and only one of its in/out edges' count
1168 is unknown, its count can be calculated. Meanwhile, if all of the in/out
1169 edges' counts are known, then the basic block's unknown count can also be
1170 calculated.
1171 IS_SUCC is true if out edges of a basic blocks are examined.
1172 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1173 Return TRUE if any basic block/edge count is changed. */
1174
1175 static bool
1176 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1177 edge_set *annotated_edge)
1178 {
1179 basic_block bb;
1180 bool changed = false;
1181
1182 FOR_EACH_BB_FN (bb, cfun)
1183 {
1184 edge e, unknown_edge = NULL;
1185 edge_iterator ei;
1186 int num_unknown_edge = 0;
1187 gcov_type total_known_count = 0;
1188
1189 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1190 if (!is_edge_annotated (e, *annotated_edge))
1191 num_unknown_edge++, unknown_edge = e;
1192 else
1193 total_known_count += e->count;
1194
1195 if (num_unknown_edge == 0)
1196 {
1197 if (total_known_count > bb->count)
1198 {
1199 bb->count = total_known_count;
1200 changed = true;
1201 }
1202 if (!is_bb_annotated (bb, *annotated_bb))
1203 {
1204 set_bb_annotated (bb, annotated_bb);
1205 changed = true;
1206 }
1207 }
1208 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1209 {
1210 if (bb->count >= total_known_count)
1211 unknown_edge->count = bb->count - total_known_count;
1212 else
1213 unknown_edge->count = 0;
1214 set_edge_annotated (unknown_edge, annotated_edge);
1215 changed = true;
1216 }
1217 }
1218 return changed;
1219 }
1220
1221 /* Special propagation for circuit expressions. Because GCC translates
1222 control flow into data flow for circuit expressions. E.g.
1223 BB1:
1224 if (a && b)
1225 BB2
1226 else
1227 BB3
1228
1229 will be translated into:
1230
1231 BB1:
1232 if (a)
1233 goto BB.t1
1234 else
1235 goto BB.t3
1236 BB.t1:
1237 if (b)
1238 goto BB.t2
1239 else
1240 goto BB.t3
1241 BB.t2:
1242 goto BB.t3
1243 BB.t3:
1244 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1245 if (tmp)
1246 goto BB2
1247 else
1248 goto BB3
1249
1250 In this case, we need to propagate through PHI to determine the edge
1251 count of BB1->BB.t1, BB.t1->BB.t2.
1252 Update ANNOTATED_EDGE accordingly. */
1253
1254 static void
1255 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1256 {
1257 basic_block bb;
1258 FOR_ALL_BB_FN (bb, cfun)
1259 {
1260 gimple def_stmt;
1261 tree cmp_rhs, cmp_lhs;
1262 gimple cmp_stmt = last_stmt (bb);
1263 edge e;
1264 edge_iterator ei;
1265
1266 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1267 continue;
1268 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1269 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1270 if (!TREE_CONSTANT (cmp_rhs)
1271 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1272 continue;
1273 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1274 continue;
1275 if (!is_bb_annotated (bb, annotated_bb))
1276 continue;
1277 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1278 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1279 && gimple_assign_single_p (def_stmt)
1280 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1281 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1282 if (!def_stmt)
1283 continue;
1284 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1285 if (!phi_stmt)
1286 continue;
1287 FOR_EACH_EDGE (e, ei, bb->succs)
1288 {
1289 unsigned i, total = 0;
1290 edge only_one;
1291 bool check_value_one = (((integer_onep (cmp_rhs))
1292 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1293 ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1294 if (!is_edge_annotated (e, *annotated_edge))
1295 continue;
1296 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1297 {
1298 tree val = gimple_phi_arg_def (phi_stmt, i);
1299 edge ep = gimple_phi_arg_edge (phi_stmt, i);
1300
1301 if (!TREE_CONSTANT (val)
1302 || !(integer_zerop (val) || integer_onep (val)))
1303 continue;
1304 if (check_value_one ^ integer_onep (val))
1305 continue;
1306 total++;
1307 only_one = ep;
1308 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1309 {
1310 ep->probability = 0;
1311 ep->count = 0;
1312 set_edge_annotated (ep, annotated_edge);
1313 }
1314 }
1315 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1316 {
1317 only_one->probability = e->probability;
1318 only_one->count = e->count;
1319 set_edge_annotated (only_one, annotated_edge);
1320 }
1321 }
1322 }
1323 }
1324
1325 /* Propagate the basic block count and edge count on the control flow
1326 graph. We do the propagation iteratively until stablize. */
1327
1328 static void
1329 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1330 {
1331 basic_block bb;
1332 bool changed = true;
1333 int i = 0;
1334
1335 FOR_ALL_BB_FN (bb, cfun)
1336 {
1337 bb->count = ((basic_block)bb->aux)->count;
1338 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1339 set_bb_annotated (bb, annotated_bb);
1340 }
1341
1342 while (changed && i++ < 10)
1343 {
1344 changed = false;
1345
1346 if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1347 changed = true;
1348 if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1349 changed = true;
1350 afdo_propagate_circuit (*annotated_bb, annotated_edge);
1351 }
1352 }
1353
1354 /* Propagate counts on control flow graph and calculate branch
1355 probabilities. */
1356
1357 static void
1358 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1359 {
1360 basic_block bb;
1361 bool has_sample = false;
1362
1363 FOR_EACH_BB_FN (bb, cfun)
1364 {
1365 if (bb->count > 0)
1366 {
1367 has_sample = true;
1368 break;
1369 }
1370 }
1371
1372 if (!has_sample)
1373 return;
1374
1375 calculate_dominance_info (CDI_POST_DOMINATORS);
1376 calculate_dominance_info (CDI_DOMINATORS);
1377 loop_optimizer_init (0);
1378
1379 afdo_find_equiv_class (annotated_bb);
1380 afdo_propagate (annotated_bb, annotated_edge);
1381
1382 FOR_EACH_BB_FN (bb, cfun)
1383 {
1384 edge e;
1385 edge_iterator ei;
1386 int num_unknown_succ = 0;
1387 gcov_type total_count = 0;
1388
1389 FOR_EACH_EDGE (e, ei, bb->succs)
1390 {
1391 if (!is_edge_annotated (e, *annotated_edge))
1392 num_unknown_succ++;
1393 else
1394 total_count += e->count;
1395 }
1396 if (num_unknown_succ == 0 && total_count > 0)
1397 {
1398 FOR_EACH_EDGE (e, ei, bb->succs)
1399 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1400 }
1401 }
1402 FOR_ALL_BB_FN (bb, cfun)
1403 {
1404 edge e;
1405 edge_iterator ei;
1406
1407 FOR_EACH_EDGE (e, ei, bb->succs)
1408 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1409 bb->aux = NULL;
1410 }
1411
1412 loop_optimizer_finalize ();
1413 free_dominance_info (CDI_DOMINATORS);
1414 free_dominance_info (CDI_POST_DOMINATORS);
1415 }
1416
1417 /* Perform value profile transformation using AutoFDO profile. Add the
1418 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1419 indirect call promoted. */
1420
1421 static bool
1422 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1423 {
1424 basic_block bb;
1425 if (afdo_source_profile->get_function_instance_by_decl (
1426 current_function_decl) == NULL)
1427 return false;
1428
1429 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1430
1431 bool has_vpt = false;
1432 FOR_EACH_BB_FN (bb, cfun)
1433 {
1434 if (!has_indirect_call (bb))
1435 continue;
1436 gimple_stmt_iterator gsi;
1437
1438 gcov_type bb_count = 0;
1439 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1440 {
1441 count_info info;
1442 gimple stmt = gsi_stmt (gsi);
1443 if (afdo_source_profile->get_count_info (stmt, &info))
1444 bb_count = MAX (bb_count, info.count);
1445 }
1446
1447 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1448 {
1449 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1450 /* IC_promotion and early_inline_2 is done in multiple iterations.
1451 No need to promoted the stmt if its in promoted_stmts (means
1452 it is already been promoted in the previous iterations). */
1453 if ((!stmt) || gimple_call_fn (stmt) == NULL
1454 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1455 || promoted_stmts->find (stmt) != promoted_stmts->end ())
1456 continue;
1457
1458 count_info info;
1459 afdo_source_profile->get_count_info (stmt, &info);
1460 info.count = bb_count;
1461 if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1462 {
1463 /* Promote the indirect call and update the promoted_stmts. */
1464 promoted_stmts->insert (stmt);
1465 afdo_vpt (&gsi, info.targets, true);
1466 has_vpt = true;
1467 }
1468 }
1469 }
1470
1471 if (has_vpt)
1472 {
1473 optimize_inline_calls (current_function_decl);
1474 return true;
1475 }
1476
1477 return false;
1478 }
1479
1480 /* Annotate auto profile to the control flow graph. Do not annotate value
1481 profile for stmts in PROMOTED_STMTS. */
1482
1483 static void
1484 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1485 {
1486 basic_block bb;
1487 bb_set annotated_bb;
1488 edge_set annotated_edge;
1489 const function_instance *s
1490 = afdo_source_profile->get_function_instance_by_decl (
1491 current_function_decl);
1492
1493 if (s == NULL)
1494 return;
1495 cgraph_node::get (current_function_decl)->count = s->head_count ();
1496 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1497 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1498
1499 FOR_EACH_BB_FN (bb, cfun)
1500 {
1501 edge e;
1502 edge_iterator ei;
1503
1504 bb->count = 0;
1505 FOR_EACH_EDGE (e, ei, bb->succs)
1506 e->count = 0;
1507
1508 if (afdo_set_bb_count (bb, promoted_stmts))
1509 set_bb_annotated (bb, &annotated_bb);
1510 if (bb->count > max_count)
1511 max_count = bb->count;
1512 }
1513 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1514 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1515 {
1516 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1517 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1518 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1519 }
1520 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1521 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1522 {
1523 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1524 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1525 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1526 }
1527 afdo_source_profile->mark_annotated (
1528 DECL_SOURCE_LOCATION (current_function_decl));
1529 afdo_source_profile->mark_annotated (cfun->function_start_locus);
1530 afdo_source_profile->mark_annotated (cfun->function_end_locus);
1531 if (max_count > 0)
1532 {
1533 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1534 counts_to_freqs ();
1535 profile_status_for_fn (cfun) = PROFILE_READ;
1536 }
1537 if (flag_value_profile_transformations)
1538 {
1539 gimple_value_profile_transformations ();
1540 free_dominance_info (CDI_DOMINATORS);
1541 free_dominance_info (CDI_POST_DOMINATORS);
1542 update_ssa (TODO_update_ssa);
1543 }
1544 }
1545
1546 /* Wrapper function to invoke early inliner. */
1547
1548 static void
1549 early_inline ()
1550 {
1551 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1552 unsigned todo = early_inliner (cfun);
1553 if (todo & TODO_update_ssa_any)
1554 update_ssa (TODO_update_ssa);
1555 }
1556
1557 /* Use AutoFDO profile to annoate the control flow graph.
1558 Return the todo flag. */
1559
1560 static unsigned int
1561 auto_profile (void)
1562 {
1563 struct cgraph_node *node;
1564
1565 if (symtab->state == FINISHED)
1566 return 0;
1567
1568 init_node_map (true);
1569 profile_info = autofdo::afdo_profile_info;
1570
1571 FOR_EACH_FUNCTION (node)
1572 {
1573 if (!gimple_has_body_p (node->decl))
1574 continue;
1575
1576 /* Don't profile functions produced for builtin stuff. */
1577 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1578 continue;
1579
1580 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1581
1582 /* First do indirect call promotion and early inline to make the
1583 IR match the profiled binary before actual annotation.
1584
1585 This is needed because an indirect call might have been promoted
1586 and inlined in the profiled binary. If we do not promote and
1587 inline these indirect calls before annotation, the profile for
1588 these promoted functions will be lost.
1589
1590 e.g. foo() --indirect_call--> bar()
1591 In profiled binary, the callsite is promoted and inlined, making
1592 the profile look like:
1593
1594 foo: {
1595 loc_foo_1: count_1
1596 bar@loc_foo_2: {
1597 loc_bar_1: count_2
1598 loc_bar_2: count_3
1599 }
1600 }
1601
1602 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1603 If we perform annotation on it, the profile inside bar@loc_foo2
1604 will be wasted.
1605
1606 To avoid this, we promote loc_foo_2 and inline the promoted bar
1607 function before annotation, so the profile inside bar@loc_foo2
1608 will be useful. */
1609 autofdo::stmt_set promoted_stmts;
1610 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1611 {
1612 if (!flag_value_profile_transformations
1613 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1614 break;
1615 early_inline ();
1616 }
1617
1618 early_inline ();
1619 autofdo::afdo_annotate_cfg (promoted_stmts);
1620 compute_function_frequency ();
1621
1622 /* Local pure-const may imply need to fixup the cfg. */
1623 if (execute_fixup_cfg () & TODO_cleanup_cfg)
1624 cleanup_tree_cfg ();
1625
1626 free_dominance_info (CDI_DOMINATORS);
1627 free_dominance_info (CDI_POST_DOMINATORS);
1628 cgraph_edge::rebuild_edges ();
1629 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1630 pop_cfun ();
1631 }
1632
1633 return TODO_rebuild_cgraph_edges;
1634 }
1635 } /* namespace autofdo. */
1636
1637 /* Read the profile from the profile data file. */
1638
1639 void
1640 read_autofdo_file (void)
1641 {
1642 if (auto_profile_file == NULL)
1643 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1644
1645 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1646 1, sizeof (struct gcov_ctr_summary));
1647 autofdo::afdo_profile_info->runs = 1;
1648 autofdo::afdo_profile_info->sum_max = 0;
1649 autofdo::afdo_profile_info->sum_all = 0;
1650
1651 /* Read the profile from the profile file. */
1652 autofdo::read_profile ();
1653 }
1654
1655 /* Free the resources. */
1656
1657 void
1658 end_auto_profile (void)
1659 {
1660 delete autofdo::afdo_source_profile;
1661 delete autofdo::afdo_string_table;
1662 profile_info = NULL;
1663 }
1664
1665 /* Returns TRUE if EDGE is hot enough to be inlined early. */
1666
1667 bool
1668 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1669 {
1670 gcov_type count
1671 = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1672
1673 if (count > 0)
1674 {
1675 bool is_hot;
1676 const struct gcov_ctr_summary *saved_profile_info = profile_info;
1677 /* At early inline stage, profile_info is not set yet. We need to
1678 temporarily set it to afdo_profile_info to calculate hotness. */
1679 profile_info = autofdo::afdo_profile_info;
1680 is_hot = maybe_hot_count_p (NULL, count);
1681 profile_info = saved_profile_info;
1682 return is_hot;
1683 }
1684
1685 return false;
1686 }
1687
1688 namespace
1689 {
1690
1691 const pass_data pass_data_ipa_auto_profile = {
1692 SIMPLE_IPA_PASS, "afdo", /* name */
1693 OPTGROUP_NONE, /* optinfo_flags */
1694 TV_IPA_AUTOFDO, /* tv_id */
1695 0, /* properties_required */
1696 0, /* properties_provided */
1697 0, /* properties_destroyed */
1698 0, /* todo_flags_start */
1699 0, /* todo_flags_finish */
1700 };
1701
1702 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1703 {
1704 public:
1705 pass_ipa_auto_profile (gcc::context *ctxt)
1706 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1707 {
1708 }
1709
1710 /* opt_pass methods: */
1711 virtual bool
1712 gate (function *)
1713 {
1714 return flag_auto_profile;
1715 }
1716 virtual unsigned int
1717 execute (function *)
1718 {
1719 return autofdo::auto_profile ();
1720 }
1721 }; // class pass_ipa_auto_profile
1722
1723 } // anon namespace
1724
1725 simple_ipa_opt_pass *
1726 make_pass_ipa_auto_profile (gcc::context *ctxt)
1727 {
1728 return new pass_ipa_auto_profile (ctxt);
1729 }