* config/nvptx/nvptx.md (call_operation): Remove unused variables.
[gcc.git] / gcc / auto-profile.c
1 /* Read and annotate call graph profile from the auto profile data file.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Dehao Chen (dehao@google.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23
24 #include <string.h>
25 #include <map>
26 #include <set>
27
28 #include "coretypes.h"
29 #include "alias.h"
30 #include "symtab.h"
31 #include "options.h"
32 #include "tree.h"
33 #include "fold-const.h"
34 #include "tree-pass.h"
35 #include "flags.h"
36 #include "predict.h"
37 #include "tm.h"
38 #include "hard-reg-set.h"
39 #include "function.h"
40 #include "dominance.h"
41 #include "cfg.h"
42 #include "basic-block.h"
43 #include "diagnostic-core.h"
44 #include "gcov-io.h"
45 #include "profile.h"
46 #include "langhooks.h"
47 #include "opts.h"
48 #include "tree-pass.h"
49 #include "cfgloop.h"
50 #include "tree-ssa-alias.h"
51 #include "tree-cfg.h"
52 #include "tree-cfgcleanup.h"
53 #include "tree-ssa-operands.h"
54 #include "tree-into-ssa.h"
55 #include "internal-fn.h"
56 #include "gimple-expr.h"
57 #include "gimple.h"
58 #include "gimple-iterator.h"
59 #include "gimple-ssa.h"
60 #include "cgraph.h"
61 #include "value-prof.h"
62 #include "coverage.h"
63 #include "params.h"
64 #include "alloc-pool.h"
65 #include "symbol-summary.h"
66 #include "ipa-prop.h"
67 #include "ipa-inline.h"
68 #include "tree-inline.h"
69 #include "stringpool.h"
70 #include "auto-profile.h"
71
72 /* The following routines implements AutoFDO optimization.
73
74 This optimization uses sampling profiles to annotate basic block counts
75 and uses heuristics to estimate branch probabilities.
76
77 There are three phases in AutoFDO:
78
79 Phase 1: Read profile from the profile data file.
80 The following info is read from the profile datafile:
81 * string_table: a map between function name and its index.
82 * autofdo_source_profile: a map from function_instance name to
83 function_instance. This is represented as a forest of
84 function_instances.
85 * WorkingSet: a histogram of how many instructions are covered for a
86 given percentage of total cycles. This is describing the binary
87 level information (not source level). This info is used to help
88 decide if we want aggressive optimizations that could increase
89 code footprint (e.g. loop unroll etc.)
90 A function instance is an instance of function that could either be a
91 standalone symbol, or a clone of a function that is inlined into another
92 function.
93
94 Phase 2: Early inline + value profile transformation.
95 Early inline uses autofdo_source_profile to find if a callsite is:
96 * inlined in the profiled binary.
97 * callee body is hot in the profiling run.
98 If both condition satisfies, early inline will inline the callsite
99 regardless of the code growth.
100 Phase 2 is an iterative process. During each iteration, we also check
101 if an indirect callsite is promoted and inlined in the profiling run.
102 If yes, vpt will happen to force promote it and in the next iteration,
103 einline will inline the promoted callsite in the next iteration.
104
105 Phase 3: Annotate control flow graph.
106 AutoFDO uses a separate pass to:
107 * Annotate basic block count
108 * Estimate branch probability
109
110 After the above 3 phases, all profile is readily annotated on the GCC IR.
111 AutoFDO tries to reuse all FDO infrastructure as much as possible to make
112 use of the profile. E.g. it uses existing mechanism to calculate the basic
113 block/edge frequency, as well as the cgraph node/edge count.
114 */
115
116 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
117 #define AUTO_PROFILE_VERSION 1
118
119 namespace autofdo
120 {
121
122 /* Represent a source location: (function_decl, lineno). */
123 typedef std::pair<tree, unsigned> decl_lineno;
124
125 /* Represent an inline stack. vector[0] is the leaf node. */
126 typedef auto_vec<decl_lineno> inline_stack;
127
128 /* String array that stores function names. */
129 typedef auto_vec<char *> string_vector;
130
131 /* Map from function name's index in string_table to target's
132 execution count. */
133 typedef std::map<unsigned, gcov_type> icall_target_map;
134
135 /* Set of gimple stmts. Used to track if the stmt has already been promoted
136 to direct call. */
137 typedef std::set<gimple> stmt_set;
138
139 /* Represent count info of an inline stack. */
140 struct count_info
141 {
142 /* Sampled count of the inline stack. */
143 gcov_type count;
144
145 /* Map from indirect call target to its sample count. */
146 icall_target_map targets;
147
148 /* Whether this inline stack is already used in annotation.
149
150 Each inline stack should only be used to annotate IR once.
151 This will be enforced when instruction-level discriminator
152 is supported. */
153 bool annotated;
154 };
155
156 /* operator< for "const char *". */
157 struct string_compare
158 {
159 bool operator()(const char *a, const char *b) const
160 {
161 return strcmp (a, b) < 0;
162 }
163 };
164
165 /* Store a string array, indexed by string position in the array. */
166 class string_table
167 {
168 public:
169 string_table ()
170 {}
171
172 ~string_table ();
173
174 /* For a given string, returns its index. */
175 int get_index (const char *name) const;
176
177 /* For a given decl, returns the index of the decl name. */
178 int get_index_by_decl (tree decl) const;
179
180 /* For a given index, returns the string. */
181 const char *get_name (int index) const;
182
183 /* Read profile, return TRUE on success. */
184 bool read ();
185
186 private:
187 typedef std::map<const char *, unsigned, string_compare> string_index_map;
188 string_vector vector_;
189 string_index_map map_;
190 };
191
192 /* Profile of a function instance:
193 1. total_count of the function.
194 2. head_count (entry basic block count) of the function (only valid when
195 function is a top-level function_instance, i.e. it is the original copy
196 instead of the inlined copy).
197 3. map from source location (decl_lineno) to profile (count_info).
198 4. map from callsite to callee function_instance. */
199 class function_instance
200 {
201 public:
202 typedef auto_vec<function_instance *> function_instance_stack;
203
204 /* Read the profile and return a function_instance with head count as
205 HEAD_COUNT. Recursively read callsites to create nested function_instances
206 too. STACK is used to track the recursive creation process. */
207 static function_instance *
208 read_function_instance (function_instance_stack *stack,
209 gcov_type head_count);
210
211 /* Recursively deallocate all callsites (nested function_instances). */
212 ~function_instance ();
213
214 /* Accessors. */
215 int
216 name () const
217 {
218 return name_;
219 }
220 gcov_type
221 total_count () const
222 {
223 return total_count_;
224 }
225 gcov_type
226 head_count () const
227 {
228 return head_count_;
229 }
230
231 /* Traverse callsites of the current function_instance to find one at the
232 location of LINENO and callee name represented in DECL. */
233 function_instance *get_function_instance_by_decl (unsigned lineno,
234 tree decl) const;
235
236 /* Store the profile info for LOC in INFO. Return TRUE if profile info
237 is found. */
238 bool get_count_info (location_t loc, count_info *info) const;
239
240 /* Read the inlined indirect call target profile for STMT and store it in
241 MAP, return the total count for all inlined indirect calls. */
242 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
243
244 /* Sum of counts that is used during annotation. */
245 gcov_type total_annotated_count () const;
246
247 /* Mark LOC as annotated. */
248 void mark_annotated (location_t loc);
249
250 private:
251 /* Callsite, represented as (decl_lineno, callee_function_name_index). */
252 typedef std::pair<unsigned, unsigned> callsite;
253
254 /* Map from callsite to callee function_instance. */
255 typedef std::map<callsite, function_instance *> callsite_map;
256
257 function_instance (unsigned name, gcov_type head_count)
258 : name_ (name), total_count_ (0), head_count_ (head_count)
259 {
260 }
261
262 /* Map from source location (decl_lineno) to profile (count_info). */
263 typedef std::map<unsigned, count_info> position_count_map;
264
265 /* function_instance name index in the string_table. */
266 unsigned name_;
267
268 /* Total sample count. */
269 gcov_type total_count_;
270
271 /* Entry BB's sample count. */
272 gcov_type head_count_;
273
274 /* Map from callsite location to callee function_instance. */
275 callsite_map callsites;
276
277 /* Map from source location to count_info. */
278 position_count_map pos_counts;
279 };
280
281 /* Profile for all functions. */
282 class autofdo_source_profile
283 {
284 public:
285 static autofdo_source_profile *
286 create ()
287 {
288 autofdo_source_profile *map = new autofdo_source_profile ();
289
290 if (map->read ())
291 return map;
292 delete map;
293 return NULL;
294 }
295
296 ~autofdo_source_profile ();
297
298 /* For a given DECL, returns the top-level function_instance. */
299 function_instance *get_function_instance_by_decl (tree decl) const;
300
301 /* Find count_info for a given gimple STMT. If found, store the count_info
302 in INFO and return true; otherwise return false. */
303 bool get_count_info (gimple stmt, count_info *info) const;
304
305 /* Find total count of the callee of EDGE. */
306 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
307
308 /* Update value profile INFO for STMT from the inlined indirect callsite.
309 Return true if INFO is updated. */
310 bool update_inlined_ind_target (gcall *stmt, count_info *info);
311
312 /* Mark LOC as annotated. */
313 void mark_annotated (location_t loc);
314
315 private:
316 /* Map from function_instance name index (in string_table) to
317 function_instance. */
318 typedef std::map<unsigned, function_instance *> name_function_instance_map;
319
320 autofdo_source_profile () {}
321
322 /* Read AutoFDO profile and returns TRUE on success. */
323 bool read ();
324
325 /* Return the function_instance in the profile that correspond to the
326 inline STACK. */
327 function_instance *
328 get_function_instance_by_inline_stack (const inline_stack &stack) const;
329
330 name_function_instance_map map_;
331 };
332
333 /* Store the strings read from the profile data file. */
334 static string_table *afdo_string_table;
335
336 /* Store the AutoFDO source profile. */
337 static autofdo_source_profile *afdo_source_profile;
338
339 /* gcov_ctr_summary structure to store the profile_info. */
340 static struct gcov_ctr_summary *afdo_profile_info;
341
342 /* Helper functions. */
343
344 /* Return the original name of NAME: strip the suffix that starts
345 with '.' Caller is responsible for freeing RET. */
346
347 static char *
348 get_original_name (const char *name)
349 {
350 char *ret = xstrdup (name);
351 char *find = strchr (ret, '.');
352 if (find != NULL)
353 *find = 0;
354 return ret;
355 }
356
357 /* Return the combined location, which is a 32bit integer in which
358 higher 16 bits stores the line offset of LOC to the start lineno
359 of DECL, The lower 16 bits stores the discriminator. */
360
361 static unsigned
362 get_combined_location (location_t loc, tree decl)
363 {
364 /* TODO: allow more bits for line and less bits for discriminator. */
365 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
366 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
367 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
368 }
369
370 /* Return the function decl of a given lexical BLOCK. */
371
372 static tree
373 get_function_decl_from_block (tree block)
374 {
375 tree decl;
376
377 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
378 return NULL_TREE;
379
380 for (decl = BLOCK_ABSTRACT_ORIGIN (block);
381 decl && (TREE_CODE (decl) == BLOCK);
382 decl = BLOCK_ABSTRACT_ORIGIN (decl))
383 if (TREE_CODE (decl) == FUNCTION_DECL)
384 break;
385 return decl;
386 }
387
388 /* Store inline stack for STMT in STACK. */
389
390 static void
391 get_inline_stack (location_t locus, inline_stack *stack)
392 {
393 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
394 return;
395
396 tree block = LOCATION_BLOCK (locus);
397 if (block && TREE_CODE (block) == BLOCK)
398 {
399 int level = 0;
400 for (block = BLOCK_SUPERCONTEXT (block);
401 block && (TREE_CODE (block) == BLOCK);
402 block = BLOCK_SUPERCONTEXT (block))
403 {
404 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
405 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
406 continue;
407
408 tree decl = get_function_decl_from_block (block);
409 stack->safe_push (
410 std::make_pair (decl, get_combined_location (locus, decl)));
411 locus = tmp_locus;
412 level++;
413 }
414 }
415 stack->safe_push (
416 std::make_pair (current_function_decl,
417 get_combined_location (locus, current_function_decl)));
418 }
419
420 /* Return STMT's combined location, which is a 32bit integer in which
421 higher 16 bits stores the line offset of LOC to the start lineno
422 of DECL, The lower 16 bits stores the discriminator. */
423
424 static unsigned
425 get_relative_location_for_stmt (gimple stmt)
426 {
427 location_t locus = gimple_location (stmt);
428 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
429 return UNKNOWN_LOCATION;
430
431 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
432 block = BLOCK_SUPERCONTEXT (block))
433 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
434 return get_combined_location (locus,
435 get_function_decl_from_block (block));
436 return get_combined_location (locus, current_function_decl);
437 }
438
439 /* Return true if BB contains indirect call. */
440
441 static bool
442 has_indirect_call (basic_block bb)
443 {
444 gimple_stmt_iterator gsi;
445
446 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
447 {
448 gimple stmt = gsi_stmt (gsi);
449 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
450 && (gimple_call_fn (stmt) == NULL
451 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
452 return true;
453 }
454 return false;
455 }
456
457 /* Member functions for string_table. */
458
459 /* Deconstructor. */
460
461 string_table::~string_table ()
462 {
463 for (unsigned i = 0; i < vector_.length (); i++)
464 free (vector_[i]);
465 }
466
467
468 /* Return the index of a given function NAME. Return -1 if NAME is not
469 found in string table. */
470
471 int
472 string_table::get_index (const char *name) const
473 {
474 if (name == NULL)
475 return -1;
476 string_index_map::const_iterator iter = map_.find (name);
477 if (iter == map_.end ())
478 return -1;
479
480 return iter->second;
481 }
482
483 /* Return the index of a given function DECL. Return -1 if DECL is not
484 found in string table. */
485
486 int
487 string_table::get_index_by_decl (tree decl) const
488 {
489 char *name
490 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
491 int ret = get_index (name);
492 free (name);
493 if (ret != -1)
494 return ret;
495 ret = get_index (lang_hooks.dwarf_name (decl, 0));
496 if (ret != -1)
497 return ret;
498 if (DECL_ABSTRACT_ORIGIN (decl))
499 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
500
501 return -1;
502 }
503
504 /* Return the function name of a given INDEX. */
505
506 const char *
507 string_table::get_name (int index) const
508 {
509 gcc_assert (index > 0 && index < (int)vector_.length ());
510 return vector_[index];
511 }
512
513 /* Read the string table. Return TRUE if reading is successful. */
514
515 bool
516 string_table::read ()
517 {
518 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
519 return false;
520 /* Skip the length of the section. */
521 gcov_read_unsigned ();
522 /* Read in the file name table. */
523 unsigned string_num = gcov_read_unsigned ();
524 for (unsigned i = 0; i < string_num; i++)
525 {
526 vector_.safe_push (get_original_name (gcov_read_string ()));
527 map_[vector_.last ()] = i;
528 }
529 return true;
530 }
531
532 /* Member functions for function_instance. */
533
534 function_instance::~function_instance ()
535 {
536 for (callsite_map::iterator iter = callsites.begin ();
537 iter != callsites.end (); ++iter)
538 delete iter->second;
539 }
540
541 /* Traverse callsites of the current function_instance to find one at the
542 location of LINENO and callee name represented in DECL. */
543
544 function_instance *
545 function_instance::get_function_instance_by_decl (unsigned lineno,
546 tree decl) const
547 {
548 int func_name_idx = afdo_string_table->get_index_by_decl (decl);
549 if (func_name_idx != -1)
550 {
551 callsite_map::const_iterator ret
552 = callsites.find (std::make_pair (lineno, func_name_idx));
553 if (ret != callsites.end ())
554 return ret->second;
555 }
556 func_name_idx
557 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
558 if (func_name_idx != -1)
559 {
560 callsite_map::const_iterator ret
561 = callsites.find (std::make_pair (lineno, func_name_idx));
562 if (ret != callsites.end ())
563 return ret->second;
564 }
565 if (DECL_ABSTRACT_ORIGIN (decl))
566 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
567
568 return NULL;
569 }
570
571 /* Store the profile info for LOC in INFO. Return TRUE if profile info
572 is found. */
573
574 bool
575 function_instance::get_count_info (location_t loc, count_info *info) const
576 {
577 position_count_map::const_iterator iter = pos_counts.find (loc);
578 if (iter == pos_counts.end ())
579 return false;
580 *info = iter->second;
581 return true;
582 }
583
584 /* Mark LOC as annotated. */
585
586 void
587 function_instance::mark_annotated (location_t loc)
588 {
589 position_count_map::iterator iter = pos_counts.find (loc);
590 if (iter == pos_counts.end ())
591 return;
592 iter->second.annotated = true;
593 }
594
595 /* Read the inlined indirect call target profile for STMT and store it in
596 MAP, return the total count for all inlined indirect calls. */
597
598 gcov_type
599 function_instance::find_icall_target_map (gcall *stmt,
600 icall_target_map *map) const
601 {
602 gcov_type ret = 0;
603 unsigned stmt_offset = get_relative_location_for_stmt (stmt);
604
605 for (callsite_map::const_iterator iter = callsites.begin ();
606 iter != callsites.end (); ++iter)
607 {
608 unsigned callee = iter->second->name ();
609 /* Check if callsite location match the stmt. */
610 if (iter->first.first != stmt_offset)
611 continue;
612 struct cgraph_node *node = cgraph_node::get_for_asmname (
613 get_identifier (afdo_string_table->get_name (callee)));
614 if (node == NULL)
615 continue;
616 if (!check_ic_target (stmt, node))
617 continue;
618 (*map)[callee] = iter->second->total_count ();
619 ret += iter->second->total_count ();
620 }
621 return ret;
622 }
623
624 /* Read the profile and create a function_instance with head count as
625 HEAD_COUNT. Recursively read callsites to create nested function_instances
626 too. STACK is used to track the recursive creation process. */
627
628 /* function instance profile format:
629
630 ENTRY_COUNT: 8 bytes
631 NAME_INDEX: 4 bytes
632 NUM_POS_COUNTS: 4 bytes
633 NUM_CALLSITES: 4 byte
634 POS_COUNT_1:
635 POS_1_OFFSET: 4 bytes
636 NUM_TARGETS: 4 bytes
637 COUNT: 8 bytes
638 TARGET_1:
639 VALUE_PROFILE_TYPE: 4 bytes
640 TARGET_IDX: 8 bytes
641 COUNT: 8 bytes
642 TARGET_2
643 ...
644 TARGET_n
645 POS_COUNT_2
646 ...
647 POS_COUNT_N
648 CALLSITE_1:
649 CALLSITE_1_OFFSET: 4 bytes
650 FUNCTION_INSTANCE_PROFILE (nested)
651 CALLSITE_2
652 ...
653 CALLSITE_n. */
654
655 function_instance *
656 function_instance::read_function_instance (function_instance_stack *stack,
657 gcov_type head_count)
658 {
659 unsigned name = gcov_read_unsigned ();
660 unsigned num_pos_counts = gcov_read_unsigned ();
661 unsigned num_callsites = gcov_read_unsigned ();
662 function_instance *s = new function_instance (name, head_count);
663 stack->safe_push (s);
664
665 for (unsigned i = 0; i < num_pos_counts; i++)
666 {
667 unsigned offset = gcov_read_unsigned () & 0xffff0000;
668 unsigned num_targets = gcov_read_unsigned ();
669 gcov_type count = gcov_read_counter ();
670 s->pos_counts[offset].count = count;
671 for (unsigned j = 0; j < stack->length (); j++)
672 (*stack)[j]->total_count_ += count;
673 for (unsigned j = 0; j < num_targets; j++)
674 {
675 /* Only indirect call target histogram is supported now. */
676 gcov_read_unsigned ();
677 gcov_type target_idx = gcov_read_counter ();
678 s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
679 }
680 }
681 for (unsigned i = 0; i < num_callsites; i++)
682 {
683 unsigned offset = gcov_read_unsigned ();
684 function_instance *callee_function_instance
685 = read_function_instance (stack, 0);
686 s->callsites[std::make_pair (offset, callee_function_instance->name ())]
687 = callee_function_instance;
688 }
689 stack->pop ();
690 return s;
691 }
692
693 /* Sum of counts that is used during annotation. */
694
695 gcov_type
696 function_instance::total_annotated_count () const
697 {
698 gcov_type ret = 0;
699 for (callsite_map::const_iterator iter = callsites.begin ();
700 iter != callsites.end (); ++iter)
701 ret += iter->second->total_annotated_count ();
702 for (position_count_map::const_iterator iter = pos_counts.begin ();
703 iter != pos_counts.end (); ++iter)
704 if (iter->second.annotated)
705 ret += iter->second.count;
706 return ret;
707 }
708
709 /* Member functions for autofdo_source_profile. */
710
711 autofdo_source_profile::~autofdo_source_profile ()
712 {
713 for (name_function_instance_map::const_iterator iter = map_.begin ();
714 iter != map_.end (); ++iter)
715 delete iter->second;
716 }
717
718 /* For a given DECL, returns the top-level function_instance. */
719
720 function_instance *
721 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
722 {
723 int index = afdo_string_table->get_index_by_decl (decl);
724 if (index == -1)
725 return NULL;
726 name_function_instance_map::const_iterator ret = map_.find (index);
727 return ret == map_.end () ? NULL : ret->second;
728 }
729
730 /* Find count_info for a given gimple STMT. If found, store the count_info
731 in INFO and return true; otherwise return false. */
732
733 bool
734 autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
735 {
736 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
737 return false;
738
739 inline_stack stack;
740 get_inline_stack (gimple_location (stmt), &stack);
741 if (stack.length () == 0)
742 return false;
743 function_instance *s = get_function_instance_by_inline_stack (stack);
744 if (s == NULL)
745 return false;
746 return s->get_count_info (stack[0].second, info);
747 }
748
749 /* Mark LOC as annotated. */
750
751 void
752 autofdo_source_profile::mark_annotated (location_t loc)
753 {
754 inline_stack stack;
755 get_inline_stack (loc, &stack);
756 if (stack.length () == 0)
757 return;
758 function_instance *s = get_function_instance_by_inline_stack (stack);
759 if (s == NULL)
760 return;
761 s->mark_annotated (stack[0].second);
762 }
763
764 /* Update value profile INFO for STMT from the inlined indirect callsite.
765 Return true if INFO is updated. */
766
767 bool
768 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
769 count_info *info)
770 {
771 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
772 return false;
773
774 count_info old_info;
775 get_count_info (stmt, &old_info);
776 gcov_type total = 0;
777 for (icall_target_map::const_iterator iter = old_info.targets.begin ();
778 iter != old_info.targets.end (); ++iter)
779 total += iter->second;
780
781 /* Program behavior changed, original promoted (and inlined) target is not
782 hot any more. Will avoid promote the original target.
783
784 To check if original promoted target is still hot, we check the total
785 count of the unpromoted targets (stored in old_info). If it is no less
786 than half of the callsite count (stored in INFO), the original promoted
787 target is considered not hot any more. */
788 if (total >= info->count / 2)
789 return false;
790
791 inline_stack stack;
792 get_inline_stack (gimple_location (stmt), &stack);
793 if (stack.length () == 0)
794 return false;
795 function_instance *s = get_function_instance_by_inline_stack (stack);
796 if (s == NULL)
797 return false;
798 icall_target_map map;
799 if (s->find_icall_target_map (stmt, &map) == 0)
800 return false;
801 for (icall_target_map::const_iterator iter = map.begin ();
802 iter != map.end (); ++iter)
803 info->targets[iter->first] = iter->second;
804 return true;
805 }
806
807 /* Find total count of the callee of EDGE. */
808
809 gcov_type
810 autofdo_source_profile::get_callsite_total_count (
811 struct cgraph_edge *edge) const
812 {
813 inline_stack stack;
814 stack.safe_push (std::make_pair (edge->callee->decl, 0));
815 get_inline_stack (gimple_location (edge->call_stmt), &stack);
816
817 function_instance *s = get_function_instance_by_inline_stack (stack);
818 if (s == NULL
819 || afdo_string_table->get_index (IDENTIFIER_POINTER (
820 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
821 return 0;
822
823 return s->total_count ();
824 }
825
826 /* Read AutoFDO profile and returns TRUE on success. */
827
828 /* source profile format:
829
830 GCOV_TAG_AFDO_FUNCTION: 4 bytes
831 LENGTH: 4 bytes
832 NUM_FUNCTIONS: 4 bytes
833 FUNCTION_INSTANCE_1
834 FUNCTION_INSTANCE_2
835 ...
836 FUNCTION_INSTANCE_N. */
837
838 bool
839 autofdo_source_profile::read ()
840 {
841 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
842 {
843 inform (0, "Not expected TAG.");
844 return false;
845 }
846
847 /* Skip the length of the section. */
848 gcov_read_unsigned ();
849
850 /* Read in the function/callsite profile, and store it in local
851 data structure. */
852 unsigned function_num = gcov_read_unsigned ();
853 for (unsigned i = 0; i < function_num; i++)
854 {
855 function_instance::function_instance_stack stack;
856 function_instance *s = function_instance::read_function_instance (
857 &stack, gcov_read_counter ());
858 afdo_profile_info->sum_all += s->total_count ();
859 map_[s->name ()] = s;
860 }
861 return true;
862 }
863
864 /* Return the function_instance in the profile that correspond to the
865 inline STACK. */
866
867 function_instance *
868 autofdo_source_profile::get_function_instance_by_inline_stack (
869 const inline_stack &stack) const
870 {
871 name_function_instance_map::const_iterator iter = map_.find (
872 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
873 if (iter == map_.end())
874 return NULL;
875 function_instance *s = iter->second;
876 for (unsigned i = stack.length() - 1; i > 0; i--)
877 {
878 s = s->get_function_instance_by_decl (
879 stack[i].second, stack[i - 1].first);
880 if (s == NULL)
881 return NULL;
882 }
883 return s;
884 }
885
886 /* Module profile is only used by LIPO. Here we simply ignore it. */
887
888 static void
889 fake_read_autofdo_module_profile ()
890 {
891 /* Read in the module info. */
892 gcov_read_unsigned ();
893
894 /* Skip the length of the section. */
895 gcov_read_unsigned ();
896
897 /* Read in the file name table. */
898 unsigned total_module_num = gcov_read_unsigned ();
899 gcc_assert (total_module_num == 0);
900 }
901
902 /* Read data from profile data file. */
903
904 static void
905 read_profile (void)
906 {
907 if (gcov_open (auto_profile_file, 1) == 0)
908 error ("Cannot open profile file %s.", auto_profile_file);
909
910 if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
911 error ("AutoFDO profile magic number does not mathch.");
912
913 /* Skip the version number. */
914 unsigned version = gcov_read_unsigned ();
915 if (version != AUTO_PROFILE_VERSION)
916 error ("AutoFDO profile version %u does match %u.",
917 version, AUTO_PROFILE_VERSION);
918
919 /* Skip the empty integer. */
920 gcov_read_unsigned ();
921
922 /* string_table. */
923 afdo_string_table = new string_table ();
924 if (!afdo_string_table->read())
925 error ("Cannot read string table from %s.", auto_profile_file);
926
927 /* autofdo_source_profile. */
928 afdo_source_profile = autofdo_source_profile::create ();
929 if (afdo_source_profile == NULL)
930 error ("Cannot read function profile from %s.", auto_profile_file);
931
932 /* autofdo_module_profile. */
933 fake_read_autofdo_module_profile ();
934
935 /* Read in the working set. */
936 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
937 error ("Cannot read working set from %s.", auto_profile_file);
938
939 /* Skip the length of the section. */
940 gcov_read_unsigned ();
941 gcov_working_set_t set[128];
942 for (unsigned i = 0; i < 128; i++)
943 {
944 set[i].num_counters = gcov_read_unsigned ();
945 set[i].min_counter = gcov_read_counter ();
946 }
947 add_working_set (set);
948 }
949
950 /* From AutoFDO profiles, find values inside STMT for that we want to measure
951 histograms for indirect-call optimization.
952
953 This function is actually served for 2 purposes:
954 * before annotation, we need to mark histogram, promote and inline
955 * after annotation, we just need to mark, and let follow-up logic to
956 decide if it needs to promote and inline. */
957
958 static void
959 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
960 bool transform)
961 {
962 gimple gs = gsi_stmt (*gsi);
963 tree callee;
964
965 if (map.size () == 0)
966 return;
967 gcall *stmt = dyn_cast <gcall *> (gs);
968 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
969 return;
970
971 callee = gimple_call_fn (stmt);
972
973 histogram_value hist = gimple_alloc_histogram_value (
974 cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
975 hist->n_counters = 3;
976 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
977 gimple_add_histogram_value (cfun, stmt, hist);
978
979 gcov_type total = 0;
980 icall_target_map::const_iterator max_iter = map.end ();
981
982 for (icall_target_map::const_iterator iter = map.begin ();
983 iter != map.end (); ++iter)
984 {
985 total += iter->second;
986 if (max_iter == map.end () || max_iter->second < iter->second)
987 max_iter = iter;
988 }
989
990 hist->hvalue.counters[0]
991 = (unsigned long long)afdo_string_table->get_name (max_iter->first);
992 hist->hvalue.counters[1] = max_iter->second;
993 hist->hvalue.counters[2] = total;
994
995 if (!transform)
996 return;
997
998 struct cgraph_edge *indirect_edge
999 = cgraph_node::get (current_function_decl)->get_edge (stmt);
1000 struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1001 get_identifier ((const char *) hist->hvalue.counters[0]));
1002
1003 if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1004 return;
1005 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1006 return;
1007 struct cgraph_edge *new_edge
1008 = indirect_edge->make_speculative (direct_call, 0, 0);
1009 new_edge->redirect_call_stmt_to_callee ();
1010 gimple_remove_histogram_value (cfun, stmt, hist);
1011 inline_call (new_edge, true, NULL, NULL, false);
1012 }
1013
1014 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1015 histograms and adds them to list VALUES. */
1016
1017 static void
1018 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1019 bool transform)
1020 {
1021 afdo_indirect_call (gsi, map, transform);
1022 }
1023
1024 typedef std::set<basic_block> bb_set;
1025 typedef std::set<edge> edge_set;
1026
1027 static bool
1028 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1029 {
1030 return annotated.find (bb) != annotated.end ();
1031 }
1032
1033 static void
1034 set_bb_annotated (basic_block bb, bb_set *annotated)
1035 {
1036 annotated->insert (bb);
1037 }
1038
1039 static bool
1040 is_edge_annotated (const edge e, const edge_set &annotated)
1041 {
1042 return annotated.find (e) != annotated.end ();
1043 }
1044
1045 static void
1046 set_edge_annotated (edge e, edge_set *annotated)
1047 {
1048 annotated->insert (e);
1049 }
1050
1051 /* For a given BB, set its execution count. Attach value profile if a stmt
1052 is not in PROMOTED, because we only want to promote an indirect call once.
1053 Return TRUE if BB is annotated. */
1054
1055 static bool
1056 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1057 {
1058 gimple_stmt_iterator gsi;
1059 edge e;
1060 edge_iterator ei;
1061 gcov_type max_count = 0;
1062 bool has_annotated = false;
1063
1064 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1065 {
1066 count_info info;
1067 gimple stmt = gsi_stmt (gsi);
1068 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1069 continue;
1070 if (afdo_source_profile->get_count_info (stmt, &info))
1071 {
1072 if (info.count > max_count)
1073 max_count = info.count;
1074 has_annotated = true;
1075 if (info.targets.size () > 0
1076 && promoted.find (stmt) == promoted.end ())
1077 afdo_vpt (&gsi, info.targets, false);
1078 }
1079 }
1080
1081 if (!has_annotated)
1082 return false;
1083
1084 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1085 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1086 for (gphi_iterator gpi = gsi_start_phis (bb);
1087 !gsi_end_p (gpi);
1088 gsi_next (&gpi))
1089 {
1090 gphi *phi = gpi.phi ();
1091 size_t i;
1092 for (i = 0; i < gimple_phi_num_args (phi); i++)
1093 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1094 }
1095 FOR_EACH_EDGE (e, ei, bb->succs)
1096 afdo_source_profile->mark_annotated (e->goto_locus);
1097
1098 bb->count = max_count;
1099 return true;
1100 }
1101
1102 /* BB1 and BB2 are in an equivalent class iff:
1103 1. BB1 dominates BB2.
1104 2. BB2 post-dominates BB1.
1105 3. BB1 and BB2 are in the same loop nest.
1106 This function finds the equivalent class for each basic block, and
1107 stores a pointer to the first BB in its equivalent class. Meanwhile,
1108 set bb counts for the same equivalent class to be idenical. Update
1109 ANNOTATED_BB for the first BB in its equivalent class. */
1110
1111 static void
1112 afdo_find_equiv_class (bb_set *annotated_bb)
1113 {
1114 basic_block bb;
1115
1116 FOR_ALL_BB_FN (bb, cfun)
1117 bb->aux = NULL;
1118
1119 FOR_ALL_BB_FN (bb, cfun)
1120 {
1121 vec<basic_block> dom_bbs;
1122 basic_block bb1;
1123 int i;
1124
1125 if (bb->aux != NULL)
1126 continue;
1127 bb->aux = bb;
1128 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1129 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1130 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1131 && bb1->loop_father == bb->loop_father)
1132 {
1133 bb1->aux = bb;
1134 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1135 {
1136 bb->count = bb1->count;
1137 set_bb_annotated (bb, annotated_bb);
1138 }
1139 }
1140 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1141 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1142 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1143 && bb1->loop_father == bb->loop_father)
1144 {
1145 bb1->aux = bb;
1146 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1147 {
1148 bb->count = bb1->count;
1149 set_bb_annotated (bb, annotated_bb);
1150 }
1151 }
1152 }
1153 }
1154
1155 /* If a basic block's count is known, and only one of its in/out edges' count
1156 is unknown, its count can be calculated. Meanwhile, if all of the in/out
1157 edges' counts are known, then the basic block's unknown count can also be
1158 calculated.
1159 IS_SUCC is true if out edges of a basic blocks are examined.
1160 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1161 Return TRUE if any basic block/edge count is changed. */
1162
1163 static bool
1164 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1165 edge_set *annotated_edge)
1166 {
1167 basic_block bb;
1168 bool changed = false;
1169
1170 FOR_EACH_BB_FN (bb, cfun)
1171 {
1172 edge e, unknown_edge = NULL;
1173 edge_iterator ei;
1174 int num_unknown_edge = 0;
1175 gcov_type total_known_count = 0;
1176
1177 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1178 if (!is_edge_annotated (e, *annotated_edge))
1179 num_unknown_edge++, unknown_edge = e;
1180 else
1181 total_known_count += e->count;
1182
1183 if (num_unknown_edge == 0)
1184 {
1185 if (total_known_count > bb->count)
1186 {
1187 bb->count = total_known_count;
1188 changed = true;
1189 }
1190 if (!is_bb_annotated (bb, *annotated_bb))
1191 {
1192 set_bb_annotated (bb, annotated_bb);
1193 changed = true;
1194 }
1195 }
1196 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1197 {
1198 if (bb->count >= total_known_count)
1199 unknown_edge->count = bb->count - total_known_count;
1200 else
1201 unknown_edge->count = 0;
1202 set_edge_annotated (unknown_edge, annotated_edge);
1203 changed = true;
1204 }
1205 }
1206 return changed;
1207 }
1208
1209 /* Special propagation for circuit expressions. Because GCC translates
1210 control flow into data flow for circuit expressions. E.g.
1211 BB1:
1212 if (a && b)
1213 BB2
1214 else
1215 BB3
1216
1217 will be translated into:
1218
1219 BB1:
1220 if (a)
1221 goto BB.t1
1222 else
1223 goto BB.t3
1224 BB.t1:
1225 if (b)
1226 goto BB.t2
1227 else
1228 goto BB.t3
1229 BB.t2:
1230 goto BB.t3
1231 BB.t3:
1232 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1233 if (tmp)
1234 goto BB2
1235 else
1236 goto BB3
1237
1238 In this case, we need to propagate through PHI to determine the edge
1239 count of BB1->BB.t1, BB.t1->BB.t2.
1240 Update ANNOTATED_EDGE accordingly. */
1241
1242 static void
1243 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1244 {
1245 basic_block bb;
1246 FOR_ALL_BB_FN (bb, cfun)
1247 {
1248 gimple def_stmt;
1249 tree cmp_rhs, cmp_lhs;
1250 gimple cmp_stmt = last_stmt (bb);
1251 edge e;
1252 edge_iterator ei;
1253
1254 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1255 continue;
1256 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1257 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1258 if (!TREE_CONSTANT (cmp_rhs)
1259 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1260 continue;
1261 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1262 continue;
1263 if (!is_bb_annotated (bb, annotated_bb))
1264 continue;
1265 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1266 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1267 && gimple_assign_single_p (def_stmt)
1268 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1269 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1270 if (!def_stmt)
1271 continue;
1272 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1273 if (!phi_stmt)
1274 continue;
1275 FOR_EACH_EDGE (e, ei, bb->succs)
1276 {
1277 unsigned i, total = 0;
1278 edge only_one;
1279 bool check_value_one = (((integer_onep (cmp_rhs))
1280 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1281 ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1282 if (!is_edge_annotated (e, *annotated_edge))
1283 continue;
1284 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1285 {
1286 tree val = gimple_phi_arg_def (phi_stmt, i);
1287 edge ep = gimple_phi_arg_edge (phi_stmt, i);
1288
1289 if (!TREE_CONSTANT (val)
1290 || !(integer_zerop (val) || integer_onep (val)))
1291 continue;
1292 if (check_value_one ^ integer_onep (val))
1293 continue;
1294 total++;
1295 only_one = ep;
1296 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1297 {
1298 ep->probability = 0;
1299 ep->count = 0;
1300 set_edge_annotated (ep, annotated_edge);
1301 }
1302 }
1303 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1304 {
1305 only_one->probability = e->probability;
1306 only_one->count = e->count;
1307 set_edge_annotated (only_one, annotated_edge);
1308 }
1309 }
1310 }
1311 }
1312
1313 /* Propagate the basic block count and edge count on the control flow
1314 graph. We do the propagation iteratively until stablize. */
1315
1316 static void
1317 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1318 {
1319 basic_block bb;
1320 bool changed = true;
1321 int i = 0;
1322
1323 FOR_ALL_BB_FN (bb, cfun)
1324 {
1325 bb->count = ((basic_block)bb->aux)->count;
1326 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1327 set_bb_annotated (bb, annotated_bb);
1328 }
1329
1330 while (changed && i++ < 10)
1331 {
1332 changed = false;
1333
1334 if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1335 changed = true;
1336 if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1337 changed = true;
1338 afdo_propagate_circuit (*annotated_bb, annotated_edge);
1339 }
1340 }
1341
1342 /* Propagate counts on control flow graph and calculate branch
1343 probabilities. */
1344
1345 static void
1346 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1347 {
1348 basic_block bb;
1349 bool has_sample = false;
1350
1351 FOR_EACH_BB_FN (bb, cfun)
1352 {
1353 if (bb->count > 0)
1354 {
1355 has_sample = true;
1356 break;
1357 }
1358 }
1359
1360 if (!has_sample)
1361 return;
1362
1363 calculate_dominance_info (CDI_POST_DOMINATORS);
1364 calculate_dominance_info (CDI_DOMINATORS);
1365 loop_optimizer_init (0);
1366
1367 afdo_find_equiv_class (annotated_bb);
1368 afdo_propagate (annotated_bb, annotated_edge);
1369
1370 FOR_EACH_BB_FN (bb, cfun)
1371 {
1372 edge e;
1373 edge_iterator ei;
1374 int num_unknown_succ = 0;
1375 gcov_type total_count = 0;
1376
1377 FOR_EACH_EDGE (e, ei, bb->succs)
1378 {
1379 if (!is_edge_annotated (e, *annotated_edge))
1380 num_unknown_succ++;
1381 else
1382 total_count += e->count;
1383 }
1384 if (num_unknown_succ == 0 && total_count > 0)
1385 {
1386 FOR_EACH_EDGE (e, ei, bb->succs)
1387 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1388 }
1389 }
1390 FOR_ALL_BB_FN (bb, cfun)
1391 {
1392 edge e;
1393 edge_iterator ei;
1394
1395 FOR_EACH_EDGE (e, ei, bb->succs)
1396 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1397 bb->aux = NULL;
1398 }
1399
1400 loop_optimizer_finalize ();
1401 free_dominance_info (CDI_DOMINATORS);
1402 free_dominance_info (CDI_POST_DOMINATORS);
1403 }
1404
1405 /* Perform value profile transformation using AutoFDO profile. Add the
1406 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1407 indirect call promoted. */
1408
1409 static bool
1410 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1411 {
1412 basic_block bb;
1413 if (afdo_source_profile->get_function_instance_by_decl (
1414 current_function_decl) == NULL)
1415 return false;
1416
1417 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1418
1419 bool has_vpt = false;
1420 FOR_EACH_BB_FN (bb, cfun)
1421 {
1422 if (!has_indirect_call (bb))
1423 continue;
1424 gimple_stmt_iterator gsi;
1425
1426 gcov_type bb_count = 0;
1427 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1428 {
1429 count_info info;
1430 gimple stmt = gsi_stmt (gsi);
1431 if (afdo_source_profile->get_count_info (stmt, &info))
1432 bb_count = MAX (bb_count, info.count);
1433 }
1434
1435 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1436 {
1437 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1438 /* IC_promotion and early_inline_2 is done in multiple iterations.
1439 No need to promoted the stmt if its in promoted_stmts (means
1440 it is already been promoted in the previous iterations). */
1441 if ((!stmt) || gimple_call_fn (stmt) == NULL
1442 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1443 || promoted_stmts->find (stmt) != promoted_stmts->end ())
1444 continue;
1445
1446 count_info info;
1447 afdo_source_profile->get_count_info (stmt, &info);
1448 info.count = bb_count;
1449 if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1450 {
1451 /* Promote the indirect call and update the promoted_stmts. */
1452 promoted_stmts->insert (stmt);
1453 afdo_vpt (&gsi, info.targets, true);
1454 has_vpt = true;
1455 }
1456 }
1457 }
1458
1459 if (has_vpt)
1460 {
1461 optimize_inline_calls (current_function_decl);
1462 return true;
1463 }
1464
1465 return false;
1466 }
1467
1468 /* Annotate auto profile to the control flow graph. Do not annotate value
1469 profile for stmts in PROMOTED_STMTS. */
1470
1471 static void
1472 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1473 {
1474 basic_block bb;
1475 bb_set annotated_bb;
1476 edge_set annotated_edge;
1477 const function_instance *s
1478 = afdo_source_profile->get_function_instance_by_decl (
1479 current_function_decl);
1480
1481 if (s == NULL)
1482 return;
1483 cgraph_node::get (current_function_decl)->count = s->head_count ();
1484 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1485 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1486
1487 FOR_EACH_BB_FN (bb, cfun)
1488 {
1489 edge e;
1490 edge_iterator ei;
1491
1492 bb->count = 0;
1493 FOR_EACH_EDGE (e, ei, bb->succs)
1494 e->count = 0;
1495
1496 if (afdo_set_bb_count (bb, promoted_stmts))
1497 set_bb_annotated (bb, &annotated_bb);
1498 if (bb->count > max_count)
1499 max_count = bb->count;
1500 }
1501 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1502 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1503 {
1504 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1505 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1506 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1507 }
1508 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1509 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1510 {
1511 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1512 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1513 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1514 }
1515 afdo_source_profile->mark_annotated (
1516 DECL_SOURCE_LOCATION (current_function_decl));
1517 afdo_source_profile->mark_annotated (cfun->function_start_locus);
1518 afdo_source_profile->mark_annotated (cfun->function_end_locus);
1519 if (max_count > 0)
1520 {
1521 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1522 counts_to_freqs ();
1523 profile_status_for_fn (cfun) = PROFILE_READ;
1524 }
1525 if (flag_value_profile_transformations)
1526 {
1527 gimple_value_profile_transformations ();
1528 free_dominance_info (CDI_DOMINATORS);
1529 free_dominance_info (CDI_POST_DOMINATORS);
1530 update_ssa (TODO_update_ssa);
1531 }
1532 }
1533
1534 /* Wrapper function to invoke early inliner. */
1535
1536 static void
1537 early_inline ()
1538 {
1539 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1540 unsigned todo = early_inliner (cfun);
1541 if (todo & TODO_update_ssa_any)
1542 update_ssa (TODO_update_ssa);
1543 }
1544
1545 /* Use AutoFDO profile to annoate the control flow graph.
1546 Return the todo flag. */
1547
1548 static unsigned int
1549 auto_profile (void)
1550 {
1551 struct cgraph_node *node;
1552
1553 if (symtab->state == FINISHED)
1554 return 0;
1555
1556 init_node_map (true);
1557 profile_info = autofdo::afdo_profile_info;
1558
1559 FOR_EACH_FUNCTION (node)
1560 {
1561 if (!gimple_has_body_p (node->decl))
1562 continue;
1563
1564 /* Don't profile functions produced for builtin stuff. */
1565 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1566 continue;
1567
1568 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1569
1570 /* First do indirect call promotion and early inline to make the
1571 IR match the profiled binary before actual annotation.
1572
1573 This is needed because an indirect call might have been promoted
1574 and inlined in the profiled binary. If we do not promote and
1575 inline these indirect calls before annotation, the profile for
1576 these promoted functions will be lost.
1577
1578 e.g. foo() --indirect_call--> bar()
1579 In profiled binary, the callsite is promoted and inlined, making
1580 the profile look like:
1581
1582 foo: {
1583 loc_foo_1: count_1
1584 bar@loc_foo_2: {
1585 loc_bar_1: count_2
1586 loc_bar_2: count_3
1587 }
1588 }
1589
1590 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1591 If we perform annotation on it, the profile inside bar@loc_foo2
1592 will be wasted.
1593
1594 To avoid this, we promote loc_foo_2 and inline the promoted bar
1595 function before annotation, so the profile inside bar@loc_foo2
1596 will be useful. */
1597 autofdo::stmt_set promoted_stmts;
1598 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1599 {
1600 if (!flag_value_profile_transformations
1601 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1602 break;
1603 early_inline ();
1604 }
1605
1606 early_inline ();
1607 autofdo::afdo_annotate_cfg (promoted_stmts);
1608 compute_function_frequency ();
1609
1610 /* Local pure-const may imply need to fixup the cfg. */
1611 if (execute_fixup_cfg () & TODO_cleanup_cfg)
1612 cleanup_tree_cfg ();
1613
1614 free_dominance_info (CDI_DOMINATORS);
1615 free_dominance_info (CDI_POST_DOMINATORS);
1616 cgraph_edge::rebuild_edges ();
1617 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1618 pop_cfun ();
1619 }
1620
1621 return TODO_rebuild_cgraph_edges;
1622 }
1623 } /* namespace autofdo. */
1624
1625 /* Read the profile from the profile data file. */
1626
1627 void
1628 read_autofdo_file (void)
1629 {
1630 if (auto_profile_file == NULL)
1631 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1632
1633 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1634 1, sizeof (struct gcov_ctr_summary));
1635 autofdo::afdo_profile_info->runs = 1;
1636 autofdo::afdo_profile_info->sum_max = 0;
1637 autofdo::afdo_profile_info->sum_all = 0;
1638
1639 /* Read the profile from the profile file. */
1640 autofdo::read_profile ();
1641 }
1642
1643 /* Free the resources. */
1644
1645 void
1646 end_auto_profile (void)
1647 {
1648 delete autofdo::afdo_source_profile;
1649 delete autofdo::afdo_string_table;
1650 profile_info = NULL;
1651 }
1652
1653 /* Returns TRUE if EDGE is hot enough to be inlined early. */
1654
1655 bool
1656 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1657 {
1658 gcov_type count
1659 = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1660
1661 if (count > 0)
1662 {
1663 bool is_hot;
1664 const struct gcov_ctr_summary *saved_profile_info = profile_info;
1665 /* At early inline stage, profile_info is not set yet. We need to
1666 temporarily set it to afdo_profile_info to calculate hotness. */
1667 profile_info = autofdo::afdo_profile_info;
1668 is_hot = maybe_hot_count_p (NULL, count);
1669 profile_info = saved_profile_info;
1670 return is_hot;
1671 }
1672
1673 return false;
1674 }
1675
1676 namespace
1677 {
1678
1679 const pass_data pass_data_ipa_auto_profile = {
1680 SIMPLE_IPA_PASS, "afdo", /* name */
1681 OPTGROUP_NONE, /* optinfo_flags */
1682 TV_IPA_AUTOFDO, /* tv_id */
1683 0, /* properties_required */
1684 0, /* properties_provided */
1685 0, /* properties_destroyed */
1686 0, /* todo_flags_start */
1687 0, /* todo_flags_finish */
1688 };
1689
1690 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1691 {
1692 public:
1693 pass_ipa_auto_profile (gcc::context *ctxt)
1694 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1695 {
1696 }
1697
1698 /* opt_pass methods: */
1699 virtual bool
1700 gate (function *)
1701 {
1702 return flag_auto_profile;
1703 }
1704 virtual unsigned int
1705 execute (function *)
1706 {
1707 return autofdo::auto_profile ();
1708 }
1709 }; // class pass_ipa_auto_profile
1710
1711 } // anon namespace
1712
1713 simple_ipa_opt_pass *
1714 make_pass_ipa_auto_profile (gcc::context *ctxt)
1715 {
1716 return new pass_ipa_auto_profile (ctxt);
1717 }