profile.c: Add file comment describing the overall algorithm and structures.
[gcc.git] / gcc / profile.c
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3 2000, 2001 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
5 based on some ideas from Dain Samples of UC Berkeley.
6 Further mangling by Bob Manson, Cygnus Support.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 02111-1307, USA. */
24
25 /* Generate basic block profile instrumentation and auxiliary files.
26 Profile generation is optimized, so that not all arcs in the basic
27 block graph need instrumenting. First, the BB graph is closed with
28 one entry (function start), and one exit (function exit). Any
29 ABNORMAL_EDGE cannot be instrumented (because there is no control
30 path to place the code). We close the graph by inserting fake
31 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32 edges that do not go to the exit_block. We ignore such abnormal
33 edges. Naturally these fake edges are never directly traversed,
34 and so *cannot* be directly instrumented. Some other graph
35 massaging is done. To optimize the instrumentation we generate the
36 BB minimal span tree, only edges that are not on the span tree
37 (plus the entry point) need instrumenting. From that information
38 all other edge counts can be deduced. By construction all fake
39 edges must be on the spanning tree. We also attempt to place
40 EDGE_CRITICAL edges on the spanning tree.
41
42 The two auxiliary files generated are <dumpbase>.bb and
43 <dumpbase>.bbg. The former contains the BB->linenumber
44 mappings, and the latter describes the BB graph.
45
46 The BB file contains line numbers for each block. For each basic
47 block, a zero count is output (to mark the start of a block), then
48 the line numbers of that block are listed. A zero ends the file
49 too.
50
51 The BBG file contains a count of the blocks, followed by edge
52 information, for every edge in the graph. The edge information
53 lists the source and target block numbers, and a bit mask
54 describing the type of edge.
55
56 The BB and BBG file formats are fully described in the gcov
57 documentation. */
58
59 /* ??? Register allocation should use basic block execution counts to
60 give preference to the most commonly executed blocks. */
61
62 /* ??? The .da files are not safe. Changing the program after creating .da
63 files or using different options when compiling with -fbranch-probabilities
64 can result the arc data not matching the program. Maybe add instrumented
65 arc count to .bbg file? Maybe check whether PFG matches the .bbg file? */
66
67 /* ??? Should calculate branch probabilities before instrumenting code, since
68 then we can use arc counts to help decide which arcs to instrument. */
69
70 #include "config.h"
71 #include "system.h"
72 #include "rtl.h"
73 #include "tree.h"
74 #include "flags.h"
75 #include "insn-config.h"
76 #include "output.h"
77 #include "regs.h"
78 #include "expr.h"
79 #include "function.h"
80 #include "toplev.h"
81 #include "ggc.h"
82 #include "hard-reg-set.h"
83 #include "basic-block.h"
84 #include "gcov-io.h"
85 #include "target.h"
86 #include "profile.h"
87 #include "libfuncs.h"
88 #include "langhooks.h"
89
90 /* Additional information about the edges we need. */
91 struct edge_info {
92 unsigned int count_valid : 1;
93
94 /* Is on the spanning tree. */
95 unsigned int on_tree : 1;
96
97 /* Pretend this edge does not exist (it is abnormal and we've
98 inserted a fake to compensate). */
99 unsigned int ignore : 1;
100 };
101
102 struct bb_info {
103 unsigned int count_valid : 1;
104
105 /* Number of successor and predecessor edges. */
106 gcov_type succ_count;
107 gcov_type pred_count;
108 };
109
110 #define EDGE_INFO(e) ((struct edge_info *) (e)->aux)
111 #define BB_INFO(b) ((struct bb_info *) (b)->aux)
112
113 /* Keep all basic block indexes nonnegative in the gcov output. Index 0
114 is used for entry block, last block exit block. */
115 #define BB_TO_GCOV_INDEX(bb) ((bb) == ENTRY_BLOCK_PTR ? 0 \
116 : ((bb) == EXIT_BLOCK_PTR \
117 ? last_basic_block + 1 : (bb)->index + 1))
118
119 /* Instantiate the profile info structure. */
120
121 struct profile_info profile_info;
122
123 /* Name and file pointer of the output file for the basic block graph. */
124
125 static FILE *bbg_file;
126
127 /* Name and file pointer of the input file for the arc count data. */
128
129 static FILE *da_file;
130
131 /* Pointer of the output file for the basic block/line number map. */
132 static FILE *bb_file;
133
134 /* Last source file name written to bb_file. */
135
136 static char *last_bb_file_name;
137
138 /* Collect statistics on the performance of this pass for the entire source
139 file. */
140
141 static int total_num_blocks;
142 static int total_num_edges;
143 static int total_num_edges_ignored;
144 static int total_num_edges_instrumented;
145 static int total_num_blocks_created;
146 static int total_num_passes;
147 static int total_num_times_called;
148 static int total_hist_br_prob[20];
149 static int total_num_never_executed;
150 static int total_num_branches;
151
152 /* Forward declarations. */
153 static void find_spanning_tree PARAMS ((struct edge_list *));
154 static void init_edge_profiler PARAMS ((void));
155 static rtx gen_edge_profiler PARAMS ((int));
156 static void instrument_edges PARAMS ((struct edge_list *));
157 static void output_gcov_string PARAMS ((const char *, long));
158 static void compute_branch_probabilities PARAMS ((void));
159 static gcov_type * get_exec_counts PARAMS ((void));
160 static long compute_checksum PARAMS ((void));
161 static basic_block find_group PARAMS ((basic_block));
162 static void union_groups PARAMS ((basic_block, basic_block));
163
164 /* If non-zero, we need to output a constructor to set up the
165 per-object-file data. */
166 static int need_func_profiler = 0;
167 \f
168 /* Add edge instrumentation code to the entire insn chain.
169
170 F is the first insn of the chain.
171 NUM_BLOCKS is the number of basic blocks found in F. */
172
173 static void
174 instrument_edges (el)
175 struct edge_list *el;
176 {
177 int num_instr_edges = 0;
178 int num_edges = NUM_EDGES (el);
179 basic_block bb;
180 remove_fake_edges ();
181
182 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
183 {
184 edge e = bb->succ;
185 while (e)
186 {
187 struct edge_info *inf = EDGE_INFO (e);
188 if (!inf->ignore && !inf->on_tree)
189 {
190 if (e->flags & EDGE_ABNORMAL)
191 abort ();
192 if (rtl_dump_file)
193 fprintf (rtl_dump_file, "Edge %d to %d instrumented%s\n",
194 e->src->index, e->dest->index,
195 EDGE_CRITICAL_P (e) ? " (and split)" : "");
196 need_func_profiler = 1;
197 insert_insn_on_edge (
198 gen_edge_profiler (total_num_edges_instrumented
199 + num_instr_edges++), e);
200 }
201 e = e->succ_next;
202 }
203 }
204
205 profile_info.count_edges_instrumented_now = num_instr_edges;
206 total_num_edges_instrumented += num_instr_edges;
207 profile_info.count_instrumented_edges = total_num_edges_instrumented;
208
209 total_num_blocks_created += num_edges;
210 if (rtl_dump_file)
211 fprintf (rtl_dump_file, "%d edges instrumented\n", num_instr_edges);
212
213 commit_edge_insertions_watch_calls ();
214 }
215
216 /* Output STRING to bb_file, surrounded by DELIMITER. */
217
218 static void
219 output_gcov_string (string, delimiter)
220 const char *string;
221 long delimiter;
222 {
223 long temp;
224
225 /* Write a delimiter to indicate that a file name follows. */
226 __write_long (delimiter, bb_file, 4);
227
228 /* Write the string. */
229 temp = strlen (string) + 1;
230 fwrite (string, temp, 1, bb_file);
231
232 /* Append a few zeros, to align the output to a 4 byte boundary. */
233 temp = temp & 0x3;
234 if (temp)
235 {
236 char c[4];
237
238 c[0] = c[1] = c[2] = c[3] = 0;
239 fwrite (c, sizeof (char), 4 - temp, bb_file);
240 }
241
242 /* Store another delimiter in the .bb file, just to make it easy to find
243 the end of the file name. */
244 __write_long (delimiter, bb_file, 4);
245 }
246 \f
247
248 /* Computes hybrid profile for all matching entries in da_file.
249 Sets max_counter_in_program as a side effect. */
250
251 static gcov_type *
252 get_exec_counts ()
253 {
254 int num_edges = 0;
255 basic_block bb;
256 int okay = 1, i;
257 int mismatch = 0;
258 gcov_type *profile;
259 char *function_name_buffer;
260 int function_name_buffer_len;
261 gcov_type max_counter_in_run;
262
263 profile_info.max_counter_in_program = 0;
264 profile_info.count_profiles_merged = 0;
265
266 /* No .da file, no execution counts. */
267 if (!da_file)
268 return 0;
269
270 /* Count the edges to be (possibly) instrumented. */
271
272 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
273 {
274 edge e;
275 for (e = bb->succ; e; e = e->succ_next)
276 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
277 num_edges++;
278 }
279
280 /* now read and combine all matching profiles. */
281
282 profile = xmalloc (sizeof (gcov_type) * num_edges);
283 rewind (da_file);
284 function_name_buffer_len = strlen (current_function_name) + 1;
285 function_name_buffer = xmalloc (function_name_buffer_len + 1);
286
287 for (i = 0; i < num_edges; i++)
288 profile[i] = 0;
289
290 while (1)
291 {
292 long magic, extra_bytes;
293 long func_count;
294 int i;
295
296 if (__read_long (&magic, da_file, 4) != 0)
297 break;
298
299 if (magic != -123)
300 {
301 okay = 0;
302 break;
303 }
304
305 if (__read_long (&func_count, da_file, 4) != 0)
306 {
307 okay = 0;
308 break;
309 }
310
311 if (__read_long (&extra_bytes, da_file, 4) != 0)
312 {
313 okay = 0;
314 break;
315 }
316
317 fseek (da_file, 4 + 8, SEEK_CUR);
318
319 /* read the maximal counter. */
320 __read_gcov_type (&max_counter_in_run, da_file, 8);
321
322 /* skip the rest of "statistics" emited by __bb_exit_func. */
323 fseek (da_file, extra_bytes - (4 + 8 + 8), SEEK_CUR);
324
325 for (i = 0; i < func_count; i++)
326 {
327 long arc_count;
328 long chksum;
329 int j;
330
331 if (__read_gcov_string
332 (function_name_buffer, function_name_buffer_len, da_file,
333 -1) != 0)
334 {
335 okay = 0;
336 break;
337 }
338
339 if (__read_long (&chksum, da_file, 4) != 0)
340 {
341 okay = 0;
342 break;
343 }
344
345 if (__read_long (&arc_count, da_file, 4) != 0)
346 {
347 okay = 0;
348 break;
349 }
350
351 if (strcmp (function_name_buffer, current_function_name) != 0)
352 {
353 /* skip */
354 if (fseek (da_file, arc_count * 8, SEEK_CUR) < 0)
355 {
356 okay = 0;
357 break;
358 }
359 }
360 else if (arc_count != num_edges
361 || chksum != profile_info.current_function_cfg_checksum)
362 okay = 0, mismatch = 1;
363 else
364 {
365 gcov_type tmp;
366
367 profile_info.max_counter_in_program += max_counter_in_run;
368 profile_info.count_profiles_merged++;
369
370 for (j = 0; j < arc_count; j++)
371 if (__read_gcov_type (&tmp, da_file, 8) != 0)
372 {
373 okay = 0;
374 break;
375 }
376 else
377 {
378 profile[j] += tmp;
379 }
380 }
381 }
382
383 if (!okay)
384 break;
385
386 }
387
388 free (function_name_buffer);
389
390 if (!okay)
391 {
392 if (mismatch)
393 error
394 ("Profile does not match flowgraph of function %s (out of date?)",
395 current_function_name);
396 else
397 error (".da file corrupted");
398 free (profile);
399 return 0;
400 }
401 if (rtl_dump_file)
402 {
403 fprintf(rtl_dump_file, "Merged %i profiles with maximal count %i.\n",
404 profile_info.count_profiles_merged,
405 (int)profile_info.max_counter_in_program);
406 }
407
408 return profile;
409 }
410 \f
411
412 /* Compute the branch probabilities for the various branches.
413 Annotate them accordingly. */
414
415 static void
416 compute_branch_probabilities ()
417 {
418 basic_block bb;
419 int i;
420 int num_edges = 0;
421 int changes;
422 int passes;
423 int hist_br_prob[20];
424 int num_never_executed;
425 int num_branches;
426 gcov_type *exec_counts = get_exec_counts ();
427 int exec_counts_pos = 0;
428
429 /* Attach extra info block to each bb. */
430
431 alloc_aux_for_blocks (sizeof (struct bb_info));
432 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
433 {
434 edge e;
435
436 for (e = bb->succ; e; e = e->succ_next)
437 if (!EDGE_INFO (e)->ignore)
438 BB_INFO (bb)->succ_count++;
439 for (e = bb->pred; e; e = e->pred_next)
440 if (!EDGE_INFO (e)->ignore)
441 BB_INFO (bb)->pred_count++;
442 }
443
444 /* Avoid predicting entry on exit nodes. */
445 BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
446 BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
447
448 /* For each edge not on the spanning tree, set its execution count from
449 the .da file. */
450
451 /* The first count in the .da file is the number of times that the function
452 was entered. This is the exec_count for block zero. */
453
454 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
455 {
456 edge e;
457 for (e = bb->succ; e; e = e->succ_next)
458 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
459 {
460 num_edges++;
461 if (exec_counts)
462 {
463 e->count = exec_counts[exec_counts_pos++];
464 }
465 else
466 e->count = 0;
467
468 EDGE_INFO (e)->count_valid = 1;
469 BB_INFO (bb)->succ_count--;
470 BB_INFO (e->dest)->pred_count--;
471 if (rtl_dump_file)
472 {
473 fprintf (rtl_dump_file, "\nRead edge from %i to %i, count:",
474 bb->index, e->dest->index);
475 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,
476 (HOST_WIDEST_INT) e->count);
477 }
478 }
479 }
480
481 if (rtl_dump_file)
482 fprintf (rtl_dump_file, "\n%d edge counts read\n", num_edges);
483
484 /* For every block in the file,
485 - if every exit/entrance edge has a known count, then set the block count
486 - if the block count is known, and every exit/entrance edge but one has
487 a known execution count, then set the count of the remaining edge
488
489 As edge counts are set, decrement the succ/pred count, but don't delete
490 the edge, that way we can easily tell when all edges are known, or only
491 one edge is unknown. */
492
493 /* The order that the basic blocks are iterated through is important.
494 Since the code that finds spanning trees starts with block 0, low numbered
495 edges are put on the spanning tree in preference to high numbered edges.
496 Hence, most instrumented edges are at the end. Graph solving works much
497 faster if we propagate numbers from the end to the start.
498
499 This takes an average of slightly more than 3 passes. */
500
501 changes = 1;
502 passes = 0;
503 while (changes)
504 {
505 passes++;
506 changes = 0;
507 FOR_BB_BETWEEN (bb, EXIT_BLOCK_PTR, NULL, prev_bb)
508 {
509 struct bb_info *bi = BB_INFO (bb);
510 if (! bi->count_valid)
511 {
512 if (bi->succ_count == 0)
513 {
514 edge e;
515 gcov_type total = 0;
516
517 for (e = bb->succ; e; e = e->succ_next)
518 total += e->count;
519 bb->count = total;
520 bi->count_valid = 1;
521 changes = 1;
522 }
523 else if (bi->pred_count == 0)
524 {
525 edge e;
526 gcov_type total = 0;
527
528 for (e = bb->pred; e; e = e->pred_next)
529 total += e->count;
530 bb->count = total;
531 bi->count_valid = 1;
532 changes = 1;
533 }
534 }
535 if (bi->count_valid)
536 {
537 if (bi->succ_count == 1)
538 {
539 edge e;
540 gcov_type total = 0;
541
542 /* One of the counts will be invalid, but it is zero,
543 so adding it in also doesn't hurt. */
544 for (e = bb->succ; e; e = e->succ_next)
545 total += e->count;
546
547 /* Seedgeh for the invalid edge, and set its count. */
548 for (e = bb->succ; e; e = e->succ_next)
549 if (! EDGE_INFO (e)->count_valid && ! EDGE_INFO (e)->ignore)
550 break;
551
552 /* Calculate count for remaining edge by conservation. */
553 total = bb->count - total;
554
555 if (! e)
556 abort ();
557 EDGE_INFO (e)->count_valid = 1;
558 e->count = total;
559 bi->succ_count--;
560
561 BB_INFO (e->dest)->pred_count--;
562 changes = 1;
563 }
564 if (bi->pred_count == 1)
565 {
566 edge e;
567 gcov_type total = 0;
568
569 /* One of the counts will be invalid, but it is zero,
570 so adding it in also doesn't hurt. */
571 for (e = bb->pred; e; e = e->pred_next)
572 total += e->count;
573
574 /* Seedgeh for the invalid edge, and set its count. */
575 for (e = bb->pred; e; e = e->pred_next)
576 if (! EDGE_INFO (e)->count_valid && ! EDGE_INFO (e)->ignore)
577 break;
578
579 /* Calculate count for remaining edge by conservation. */
580 total = bb->count - total + e->count;
581
582 if (! e)
583 abort ();
584 EDGE_INFO (e)->count_valid = 1;
585 e->count = total;
586 bi->pred_count--;
587
588 BB_INFO (e->src)->succ_count--;
589 changes = 1;
590 }
591 }
592 }
593 }
594 if (rtl_dump_file)
595 dump_flow_info (rtl_dump_file);
596
597 total_num_passes += passes;
598 if (rtl_dump_file)
599 fprintf (rtl_dump_file, "Graph solving took %d passes.\n\n", passes);
600
601 /* If the graph has been correctly solved, every block will have a
602 succ and pred count of zero. */
603 FOR_EACH_BB (bb)
604 {
605 if (BB_INFO (bb)->succ_count || BB_INFO (bb)->pred_count)
606 abort ();
607 }
608
609 /* For every edge, calculate its branch probability and add a reg_note
610 to the branch insn to indicate this. */
611
612 for (i = 0; i < 20; i++)
613 hist_br_prob[i] = 0;
614 num_never_executed = 0;
615 num_branches = 0;
616
617 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
618 {
619 edge e;
620 gcov_type total;
621 rtx note;
622
623 total = bb->count;
624 if (total)
625 {
626 for (e = bb->succ; e; e = e->succ_next)
627 {
628 e->probability = (e->count * REG_BR_PROB_BASE + total / 2) / total;
629 if (e->probability < 0 || e->probability > REG_BR_PROB_BASE)
630 {
631 error ("corrupted profile info: prob for %d-%d thought to be %d",
632 e->src->index, e->dest->index, e->probability);
633 e->probability = REG_BR_PROB_BASE / 2;
634 }
635 }
636 if (bb->index >= 0
637 && any_condjump_p (bb->end)
638 && bb->succ->succ_next)
639 {
640 int prob;
641 edge e;
642 int index;
643
644 /* Find the branch edge. It is possible that we do have fake
645 edges here. */
646 for (e = bb->succ; e->flags & (EDGE_FAKE | EDGE_FALLTHRU);
647 e = e->succ_next)
648 continue; /* Loop body has been intentionally left blank. */
649
650 prob = e->probability;
651 index = prob * 20 / REG_BR_PROB_BASE;
652
653 if (index == 20)
654 index = 19;
655 hist_br_prob[index]++;
656
657 note = find_reg_note (bb->end, REG_BR_PROB, 0);
658 /* There may be already note put by some other pass, such
659 as builtin_expect expander. */
660 if (note)
661 XEXP (note, 0) = GEN_INT (prob);
662 else
663 REG_NOTES (bb->end)
664 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
665 REG_NOTES (bb->end));
666 num_branches++;
667 }
668 }
669 /* Otherwise distribute the probabilities evenly so we get sane sum.
670 Use simple heuristics that if there are normal edges, give all abnormals
671 frequency of 0, otherwise distribute the frequency over abnormals
672 (this is the case of noreturn calls). */
673 else
674 {
675 for (e = bb->succ; e; e = e->succ_next)
676 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
677 total ++;
678 if (total)
679 {
680 for (e = bb->succ; e; e = e->succ_next)
681 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
682 e->probability = REG_BR_PROB_BASE / total;
683 else
684 e->probability = 0;
685 }
686 else
687 {
688 for (e = bb->succ; e; e = e->succ_next)
689 total ++;
690 for (e = bb->succ; e; e = e->succ_next)
691 e->probability = REG_BR_PROB_BASE / total;
692 }
693 if (bb->index >= 0
694 && any_condjump_p (bb->end)
695 && bb->succ->succ_next)
696 num_branches++, num_never_executed;
697 }
698 }
699
700 if (rtl_dump_file)
701 {
702 fprintf (rtl_dump_file, "%d branches\n", num_branches);
703 fprintf (rtl_dump_file, "%d branches never executed\n",
704 num_never_executed);
705 if (num_branches)
706 for (i = 0; i < 10; i++)
707 fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
708 (hist_br_prob[i] + hist_br_prob[19-i]) * 100 / num_branches,
709 5 * i, 5 * i + 5);
710
711 total_num_branches += num_branches;
712 total_num_never_executed += num_never_executed;
713 for (i = 0; i < 20; i++)
714 total_hist_br_prob[i] += hist_br_prob[i];
715
716 fputc ('\n', rtl_dump_file);
717 fputc ('\n', rtl_dump_file);
718 }
719
720 free_aux_for_blocks ();
721 if (exec_counts)
722 free (exec_counts);
723 }
724
725 /* Compute checksum for the current function. */
726
727 #define CHSUM_HASH 500000003
728 #define CHSUM_SHIFT 2
729
730 static long
731 compute_checksum ()
732 {
733 long chsum = 0;
734 basic_block bb;
735
736 FOR_EACH_BB (bb)
737 {
738 edge e;
739
740 for (e = bb->succ; e; e = e->succ_next)
741 {
742 chsum = ((chsum << CHSUM_SHIFT) + (BB_TO_GCOV_INDEX (e->dest) + 1)) % CHSUM_HASH;
743 }
744
745 chsum = (chsum << CHSUM_SHIFT) % CHSUM_HASH;
746 }
747
748 return chsum;
749 }
750
751 /* Instrument and/or analyze program behavior based on program flow graph.
752 In either case, this function builds a flow graph for the function being
753 compiled. The flow graph is stored in BB_GRAPH.
754
755 When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
756 the flow graph that are needed to reconstruct the dynamic behavior of the
757 flow graph.
758
759 When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
760 information from a data file containing edge count information from previous
761 executions of the function being compiled. In this case, the flow graph is
762 annotated with actual execution counts, which are later propagated into the
763 rtl for optimization purposes.
764
765 Main entry point of this file. */
766
767 void
768 branch_prob ()
769 {
770 basic_block bb;
771 int i;
772 int num_edges, ignored_edges;
773 struct edge_list *el;
774
775 profile_info.current_function_cfg_checksum = compute_checksum ();
776
777 if (rtl_dump_file)
778 fprintf (rtl_dump_file, "CFG checksum is %ld\n",
779 profile_info.current_function_cfg_checksum);
780
781 /* Start of a function. */
782 if (flag_test_coverage)
783 output_gcov_string (current_function_name, (long) -2);
784
785 total_num_times_called++;
786
787 flow_call_edges_add (NULL);
788 add_noreturn_fake_exit_edges ();
789
790 /* We can't handle cyclic regions constructed using abnormal edges.
791 To avoid these we replace every source of abnormal edge by a fake
792 edge from entry node and every destination by fake edge to exit.
793 This keeps graph acyclic and our calculation exact for all normal
794 edges except for exit and entrance ones.
795
796 We also add fake exit edges for each call and asm statement in the
797 basic, since it may not return. */
798
799 FOR_EACH_BB (bb)
800 {
801 int need_exit_edge = 0, need_entry_edge = 0;
802 int have_exit_edge = 0, have_entry_edge = 0;
803 rtx insn;
804 edge e;
805
806 /* Add fake edges from entry block to the call insns that may return
807 twice. The CFG is not quite correct then, as call insn plays more
808 role of CODE_LABEL, but for our purposes, everything should be OK,
809 as we never insert code to the beggining of basic block. */
810 for (insn = bb->head; insn != NEXT_INSN (bb->end);
811 insn = NEXT_INSN (insn))
812 {
813 if (GET_CODE (insn) == CALL_INSN
814 && find_reg_note (insn, REG_SETJMP, NULL))
815 {
816 if (GET_CODE (bb->head) == CODE_LABEL
817 || insn != NEXT_INSN (bb->head))
818 {
819 e = split_block (bb, PREV_INSN (insn));
820 make_edge (ENTRY_BLOCK_PTR, e->dest, EDGE_FAKE);
821 break;
822 }
823 else
824 {
825 /* We should not get abort here, as call to setjmp should not
826 be the very first instruction of function. */
827 if (bb == ENTRY_BLOCK_PTR)
828 abort ();
829 make_edge (ENTRY_BLOCK_PTR, bb, EDGE_FAKE);
830 }
831 }
832 }
833
834 for (e = bb->succ; e; e = e->succ_next)
835 {
836 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
837 && e->dest != EXIT_BLOCK_PTR)
838 need_exit_edge = 1;
839 if (e->dest == EXIT_BLOCK_PTR)
840 have_exit_edge = 1;
841 }
842 for (e = bb->pred; e; e = e->pred_next)
843 {
844 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
845 && e->src != ENTRY_BLOCK_PTR)
846 need_entry_edge = 1;
847 if (e->src == ENTRY_BLOCK_PTR)
848 have_entry_edge = 1;
849 }
850
851 if (need_exit_edge && !have_exit_edge)
852 {
853 if (rtl_dump_file)
854 fprintf (rtl_dump_file, "Adding fake exit edge to bb %i\n",
855 bb->index);
856 make_edge (bb, EXIT_BLOCK_PTR, EDGE_FAKE);
857 }
858 if (need_entry_edge && !have_entry_edge)
859 {
860 if (rtl_dump_file)
861 fprintf (rtl_dump_file, "Adding fake entry edge to bb %i\n",
862 bb->index);
863 make_edge (ENTRY_BLOCK_PTR, bb, EDGE_FAKE);
864 }
865 }
866
867 el = create_edge_list ();
868 num_edges = NUM_EDGES (el);
869 alloc_aux_for_edges (sizeof (struct edge_info));
870
871 /* The basic blocks are expected to be numbered sequentially. */
872 compact_blocks ();
873
874 ignored_edges = 0;
875 for (i = 0 ; i < num_edges ; i++)
876 {
877 edge e = INDEX_EDGE (el, i);
878 e->count = 0;
879
880 /* Mark edges we've replaced by fake edges above as ignored. */
881 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
882 && e->src != ENTRY_BLOCK_PTR && e->dest != EXIT_BLOCK_PTR)
883 {
884 EDGE_INFO (e)->ignore = 1;
885 ignored_edges++;
886 }
887 }
888
889 #ifdef ENABLE_CHECKING
890 verify_flow_info ();
891 #endif
892
893 /* Output line number information about each basic block for
894 GCOV utility. */
895 if (flag_test_coverage)
896 {
897 FOR_EACH_BB (bb)
898 {
899 rtx insn = bb->head;
900 static int ignore_next_note = 0;
901
902 /* We are looking for line number notes. Search backward before
903 basic block to find correct ones. */
904 insn = prev_nonnote_insn (insn);
905 if (!insn)
906 insn = get_insns ();
907 else
908 insn = NEXT_INSN (insn);
909
910 /* Output a zero to the .bb file to indicate that a new
911 block list is starting. */
912 __write_long (0, bb_file, 4);
913
914 while (insn != bb->end)
915 {
916 if (GET_CODE (insn) == NOTE)
917 {
918 /* Must ignore the line number notes that immediately
919 follow the end of an inline function to avoid counting
920 it twice. There is a note before the call, and one
921 after the call. */
922 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_REPEATED_LINE_NUMBER)
923 ignore_next_note = 1;
924 else if (NOTE_LINE_NUMBER (insn) > 0)
925 {
926 if (ignore_next_note)
927 ignore_next_note = 0;
928 else
929 {
930 /* If this is a new source file, then output the
931 file's name to the .bb file. */
932 if (! last_bb_file_name
933 || strcmp (NOTE_SOURCE_FILE (insn),
934 last_bb_file_name))
935 {
936 if (last_bb_file_name)
937 free (last_bb_file_name);
938 last_bb_file_name
939 = xstrdup (NOTE_SOURCE_FILE (insn));
940 output_gcov_string (NOTE_SOURCE_FILE (insn),
941 (long)-1);
942 }
943 /* Output the line number to the .bb file. Must be
944 done after the output_bb_profile_data() call, and
945 after the file name is written, to ensure that it
946 is correctly handled by gcov. */
947 __write_long (NOTE_LINE_NUMBER (insn), bb_file, 4);
948 }
949 }
950 }
951 insn = NEXT_INSN (insn);
952 }
953 }
954 __write_long (0, bb_file, 4);
955 }
956
957 /* Create spanning tree from basic block graph, mark each edge that is
958 on the spanning tree. We insert as many abnormal and critical edges
959 as possible to minimize number of edge splits necessary. */
960
961 find_spanning_tree (el);
962
963 /* Fake edges that are not on the tree will not be instrumented, so
964 mark them ignored. */
965 for (i = 0; i < num_edges; i++)
966 {
967 edge e = INDEX_EDGE (el, i);
968 struct edge_info *inf = EDGE_INFO (e);
969 if ((e->flags & EDGE_FAKE) && !inf->ignore && !inf->on_tree)
970 {
971 inf->ignore = 1;
972 ignored_edges++;
973 }
974 }
975
976 total_num_blocks += n_basic_blocks + 2;
977 if (rtl_dump_file)
978 fprintf (rtl_dump_file, "%d basic blocks\n", n_basic_blocks);
979
980 total_num_edges += num_edges;
981 if (rtl_dump_file)
982 fprintf (rtl_dump_file, "%d edges\n", num_edges);
983
984 total_num_edges_ignored += ignored_edges;
985 if (rtl_dump_file)
986 fprintf (rtl_dump_file, "%d ignored edges\n", ignored_edges);
987
988 /* Create a .bbg file from which gcov can reconstruct the basic block
989 graph. First output the number of basic blocks, and then for every
990 edge output the source and target basic block numbers.
991 NOTE: The format of this file must be compatible with gcov. */
992
993 if (flag_test_coverage)
994 {
995 int flag_bits;
996
997 __write_gcov_string (current_function_name,
998 strlen (current_function_name), bbg_file, -1);
999
1000 /* write checksum. */
1001 __write_long (profile_info.current_function_cfg_checksum, bbg_file, 4);
1002
1003 /* The plus 2 stands for entry and exit block. */
1004 __write_long (n_basic_blocks + 2, bbg_file, 4);
1005 __write_long (num_edges - ignored_edges + 1, bbg_file, 4);
1006
1007 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1008 {
1009 edge e;
1010 long count = 0;
1011
1012 for (e = bb->succ; e; e = e->succ_next)
1013 if (!EDGE_INFO (e)->ignore)
1014 count++;
1015 __write_long (count, bbg_file, 4);
1016
1017 for (e = bb->succ; e; e = e->succ_next)
1018 {
1019 struct edge_info *i = EDGE_INFO (e);
1020 if (!i->ignore)
1021 {
1022 flag_bits = 0;
1023 if (i->on_tree)
1024 flag_bits |= 0x1;
1025 if (e->flags & EDGE_FAKE)
1026 flag_bits |= 0x2;
1027 if (e->flags & EDGE_FALLTHRU)
1028 flag_bits |= 0x4;
1029
1030 __write_long (BB_TO_GCOV_INDEX (e->dest), bbg_file, 4);
1031 __write_long (flag_bits, bbg_file, 4);
1032 }
1033 }
1034 }
1035 /* Emit fake loopback edge for EXIT block to maintain compatibility with
1036 old gcov format. */
1037 __write_long (1, bbg_file, 4);
1038 __write_long (0, bbg_file, 4);
1039 __write_long (0x1, bbg_file, 4);
1040
1041 /* Emit a -1 to separate the list of all edges from the list of
1042 loop back edges that follows. */
1043 __write_long (-1, bbg_file, 4);
1044 }
1045
1046 if (flag_branch_probabilities)
1047 compute_branch_probabilities ();
1048
1049 /* For each edge not on the spanning tree, add counting code as rtl. */
1050
1051 if (profile_arc_flag)
1052 {
1053 instrument_edges (el);
1054 allocate_reg_info (max_reg_num (), FALSE, FALSE);
1055 }
1056
1057 remove_fake_edges ();
1058 /* Re-merge split basic blocks and the mess introduced by
1059 insert_insn_on_edge. */
1060 cleanup_cfg (profile_arc_flag ? CLEANUP_EXPENSIVE : 0);
1061 if (rtl_dump_file)
1062 dump_flow_info (rtl_dump_file);
1063
1064 free_aux_for_edges ();
1065 free_edge_list (el);
1066 }
1067 \f
1068 /* Union find algorithm implementation for the basic blocks using
1069 aux fields. */
1070
1071 static basic_block
1072 find_group (bb)
1073 basic_block bb;
1074 {
1075 basic_block group = bb, bb1;
1076
1077 while ((basic_block) group->aux != group)
1078 group = (basic_block) group->aux;
1079
1080 /* Compress path. */
1081 while ((basic_block) bb->aux != group)
1082 {
1083 bb1 = (basic_block) bb->aux;
1084 bb->aux = (void *) group;
1085 bb = bb1;
1086 }
1087 return group;
1088 }
1089
1090 static void
1091 union_groups (bb1, bb2)
1092 basic_block bb1, bb2;
1093 {
1094 basic_block bb1g = find_group (bb1);
1095 basic_block bb2g = find_group (bb2);
1096
1097 /* ??? I don't have a place for the rank field. OK. Lets go w/o it,
1098 this code is unlikely going to be performance problem anyway. */
1099 if (bb1g == bb2g)
1100 abort ();
1101
1102 bb1g->aux = bb2g;
1103 }
1104 \f
1105 /* This function searches all of the edges in the program flow graph, and puts
1106 as many bad edges as possible onto the spanning tree. Bad edges include
1107 abnormals edges, which can't be instrumented at the moment. Since it is
1108 possible for fake edges to form an cycle, we will have to develop some
1109 better way in the future. Also put critical edges to the tree, since they
1110 are more expensive to instrument. */
1111
1112 static void
1113 find_spanning_tree (el)
1114 struct edge_list *el;
1115 {
1116 int i;
1117 int num_edges = NUM_EDGES (el);
1118 basic_block bb;
1119
1120 /* We use aux field for standard union-find algorithm. */
1121 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1122 bb->aux = bb;
1123
1124 /* Add fake edge exit to entry we can't instrument. */
1125 union_groups (EXIT_BLOCK_PTR, ENTRY_BLOCK_PTR);
1126
1127 /* First add all abnormal edges to the tree unless they form an cycle. Also
1128 add all edges to EXIT_BLOCK_PTR to avoid inserting profiling code behind
1129 setting return value from function. */
1130 for (i = 0; i < num_edges; i++)
1131 {
1132 edge e = INDEX_EDGE (el, i);
1133 if (((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1134 || e->dest == EXIT_BLOCK_PTR
1135 )
1136 && !EDGE_INFO (e)->ignore
1137 && (find_group (e->src) != find_group (e->dest)))
1138 {
1139 if (rtl_dump_file)
1140 fprintf (rtl_dump_file, "Abnormal edge %d to %d put to tree\n",
1141 e->src->index, e->dest->index);
1142 EDGE_INFO (e)->on_tree = 1;
1143 union_groups (e->src, e->dest);
1144 }
1145 }
1146
1147 /* Now insert all critical edges to the tree unless they form an cycle. */
1148 for (i = 0; i < num_edges; i++)
1149 {
1150 edge e = INDEX_EDGE (el, i);
1151 if ((EDGE_CRITICAL_P (e))
1152 && !EDGE_INFO (e)->ignore
1153 && (find_group (e->src) != find_group (e->dest)))
1154 {
1155 if (rtl_dump_file)
1156 fprintf (rtl_dump_file, "Critical edge %d to %d put to tree\n",
1157 e->src->index, e->dest->index);
1158 EDGE_INFO (e)->on_tree = 1;
1159 union_groups (e->src, e->dest);
1160 }
1161 }
1162
1163 /* And now the rest. */
1164 for (i = 0; i < num_edges; i++)
1165 {
1166 edge e = INDEX_EDGE (el, i);
1167 if (find_group (e->src) != find_group (e->dest)
1168 && !EDGE_INFO (e)->ignore)
1169 {
1170 if (rtl_dump_file)
1171 fprintf (rtl_dump_file, "Normal edge %d to %d put to tree\n",
1172 e->src->index, e->dest->index);
1173 EDGE_INFO (e)->on_tree = 1;
1174 union_groups (e->src, e->dest);
1175 }
1176 }
1177
1178 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1179 bb->aux = NULL;
1180 }
1181 \f
1182 /* Perform file-level initialization for branch-prob processing. */
1183
1184 void
1185 init_branch_prob (filename)
1186 const char *filename;
1187 {
1188 long len;
1189 int i;
1190
1191 if (flag_test_coverage)
1192 {
1193 int len = strlen (filename);
1194 char *data_file, *bbg_file_name;
1195
1196 /* Open an output file for the basic block/line number map. */
1197 data_file = (char *) alloca (len + 4);
1198 strcpy (data_file, filename);
1199 strip_off_ending (data_file, len);
1200 strcat (data_file, ".bb");
1201 if ((bb_file = fopen (data_file, "wb")) == 0)
1202 fatal_io_error ("can't open %s", data_file);
1203
1204 /* Open an output file for the program flow graph. */
1205 bbg_file_name = (char *) alloca (len + 5);
1206 strcpy (bbg_file_name, filename);
1207 strip_off_ending (bbg_file_name, len);
1208 strcat (bbg_file_name, ".bbg");
1209 if ((bbg_file = fopen (bbg_file_name, "wb")) == 0)
1210 fatal_io_error ("can't open %s", bbg_file_name);
1211
1212 /* Initialize to zero, to ensure that the first file name will be
1213 written to the .bb file. */
1214 last_bb_file_name = 0;
1215 }
1216
1217 if (flag_branch_probabilities)
1218 {
1219 char *da_file_name;
1220
1221 len = strlen (filename);
1222 da_file_name = (char *) alloca (len + 4);
1223 strcpy (da_file_name, filename);
1224 strip_off_ending (da_file_name, len);
1225 strcat (da_file_name, ".da");
1226 if ((da_file = fopen (da_file_name, "rb")) == 0)
1227 warning ("file %s not found, execution counts assumed to be zero",
1228 da_file_name);
1229 }
1230
1231 if (profile_arc_flag)
1232 init_edge_profiler ();
1233
1234 total_num_blocks = 0;
1235 total_num_edges = 0;
1236 total_num_edges_ignored = 0;
1237 total_num_edges_instrumented = 0;
1238 total_num_blocks_created = 0;
1239 total_num_passes = 0;
1240 total_num_times_called = 0;
1241 total_num_branches = 0;
1242 total_num_never_executed = 0;
1243 for (i = 0; i < 20; i++)
1244 total_hist_br_prob[i] = 0;
1245 }
1246
1247 /* Performs file-level cleanup after branch-prob processing
1248 is completed. */
1249
1250 void
1251 end_branch_prob ()
1252 {
1253 if (flag_test_coverage)
1254 {
1255 fclose (bb_file);
1256 fclose (bbg_file);
1257 }
1258
1259 if (flag_branch_probabilities && da_file)
1260 fclose (da_file);
1261
1262 if (rtl_dump_file)
1263 {
1264 fprintf (rtl_dump_file, "\n");
1265 fprintf (rtl_dump_file, "Total number of blocks: %d\n",
1266 total_num_blocks);
1267 fprintf (rtl_dump_file, "Total number of edges: %d\n", total_num_edges);
1268 fprintf (rtl_dump_file, "Total number of ignored edges: %d\n",
1269 total_num_edges_ignored);
1270 fprintf (rtl_dump_file, "Total number of instrumented edges: %d\n",
1271 total_num_edges_instrumented);
1272 fprintf (rtl_dump_file, "Total number of blocks created: %d\n",
1273 total_num_blocks_created);
1274 fprintf (rtl_dump_file, "Total number of graph solution passes: %d\n",
1275 total_num_passes);
1276 if (total_num_times_called != 0)
1277 fprintf (rtl_dump_file, "Average number of graph solution passes: %d\n",
1278 (total_num_passes + (total_num_times_called >> 1))
1279 / total_num_times_called);
1280 fprintf (rtl_dump_file, "Total number of branches: %d\n",
1281 total_num_branches);
1282 fprintf (rtl_dump_file, "Total number of branches never executed: %d\n",
1283 total_num_never_executed);
1284 if (total_num_branches)
1285 {
1286 int i;
1287
1288 for (i = 0; i < 10; i++)
1289 fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
1290 (total_hist_br_prob[i] + total_hist_br_prob[19-i]) * 100
1291 / total_num_branches, 5*i, 5*i+5);
1292 }
1293 }
1294 }
1295 \f
1296 /* The label used by the edge profiling code. */
1297
1298 static GTY(()) rtx profiler_label;
1299
1300 /* Initialize the profiler_label. */
1301
1302 static void
1303 init_edge_profiler ()
1304 {
1305 /* Generate and save a copy of this so it can be shared. */
1306 char buf[20];
1307 ASM_GENERATE_INTERNAL_LABEL (buf, "LPBX", 2);
1308 profiler_label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
1309 }
1310
1311 /* Output instructions as RTL to increment the edge execution count. */
1312
1313 static rtx
1314 gen_edge_profiler (edgeno)
1315 int edgeno;
1316 {
1317 enum machine_mode mode = mode_for_size (GCOV_TYPE_SIZE, MODE_INT, 0);
1318 rtx mem_ref, tmp;
1319 rtx sequence;
1320
1321 start_sequence ();
1322
1323 tmp = force_reg (Pmode, profiler_label);
1324 tmp = plus_constant (tmp, GCOV_TYPE_SIZE / BITS_PER_UNIT * edgeno);
1325 mem_ref = validize_mem (gen_rtx_MEM (mode, tmp));
1326
1327 set_mem_alias_set (mem_ref, new_alias_set ());
1328
1329 tmp = expand_simple_binop (mode, PLUS, mem_ref, const1_rtx,
1330 mem_ref, 0, OPTAB_WIDEN);
1331
1332 if (tmp != mem_ref)
1333 emit_move_insn (copy_rtx (mem_ref), tmp);
1334
1335 sequence = get_insns ();
1336 end_sequence ();
1337 return sequence;
1338 }
1339
1340 /* Output code for a constructor that will invoke __bb_init_func, if
1341 this has not already been done. */
1342
1343 void
1344 output_func_start_profiler ()
1345 {
1346 tree fnname, fndecl;
1347 char *name;
1348 char buf[20];
1349 const char *cfnname;
1350 rtx table_address;
1351 enum machine_mode mode = mode_for_size (GCOV_TYPE_SIZE, MODE_INT, 0);
1352 int save_flag_inline_functions = flag_inline_functions;
1353
1354 /* It's either already been output, or we don't need it because we're
1355 not doing profile-edges. */
1356 if (! need_func_profiler)
1357 return;
1358
1359 need_func_profiler = 0;
1360
1361 /* Synthesize a constructor function to invoke __bb_init_func with a
1362 pointer to this object file's profile block. */
1363
1364 /* Try and make a unique name given the "file function name".
1365
1366 And no, I don't like this either. */
1367
1368 fnname = get_file_function_name ('I');
1369 cfnname = IDENTIFIER_POINTER (fnname);
1370 name = concat (cfnname, "GCOV", NULL);
1371 fnname = get_identifier (name);
1372 free (name);
1373
1374 fndecl = build_decl (FUNCTION_DECL, fnname,
1375 build_function_type (void_type_node, NULL_TREE));
1376 DECL_EXTERNAL (fndecl) = 0;
1377
1378 /* It can be a static function as long as collect2 does not have
1379 to scan the object file to find its ctor/dtor routine. */
1380 TREE_PUBLIC (fndecl) = ! targetm.have_ctors_dtors;
1381
1382 TREE_USED (fndecl) = 1;
1383
1384 DECL_RESULT (fndecl) = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1385
1386 fndecl = (*lang_hooks.decls.pushdecl) (fndecl);
1387 rest_of_decl_compilation (fndecl, 0, 1, 0);
1388 announce_function (fndecl);
1389 current_function_decl = fndecl;
1390 DECL_INITIAL (fndecl) = error_mark_node;
1391 make_decl_rtl (fndecl, NULL);
1392 init_function_start (fndecl, input_filename, lineno);
1393 (*lang_hooks.decls.pushlevel) (0);
1394 expand_function_start (fndecl, 0);
1395 cfun->arc_profile = 0;
1396
1397 /* Actually generate the code to call __bb_init_func. */
1398 ASM_GENERATE_INTERNAL_LABEL (buf, "LPBX", 0);
1399 table_address = force_reg (Pmode,
1400 gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)));
1401 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__bb_init_func"), LCT_NORMAL,
1402 mode, 1, table_address, Pmode);
1403
1404 expand_function_end (input_filename, lineno, 0);
1405 (*lang_hooks.decls.poplevel) (1, 0, 1);
1406
1407 /* Since fndecl isn't in the list of globals, it would never be emitted
1408 when it's considered to be 'safe' for inlining, so turn off
1409 flag_inline_functions. */
1410 flag_inline_functions = 0;
1411
1412 rest_of_compilation (fndecl);
1413
1414 /* Reset flag_inline_functions to its original value. */
1415 flag_inline_functions = save_flag_inline_functions;
1416
1417 if (! quiet_flag)
1418 fflush (asm_out_file);
1419 current_function_decl = NULL_TREE;
1420
1421 if (targetm.have_ctors_dtors)
1422 (* targetm.asm_out.constructor) (XEXP (DECL_RTL (fndecl), 0),
1423 DEFAULT_INIT_PRIORITY);
1424 }
1425
1426 #include "gt-profile.h"