profile.c (da_file_name): New static var.
[gcc.git] / gcc / profile.c
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3 2000, 2001 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
5 based on some ideas from Dain Samples of UC Berkeley.
6 Further mangling by Bob Manson, Cygnus Support.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 02111-1307, USA. */
24
25 /* Generate basic block profile instrumentation and auxiliary files.
26 Profile generation is optimized, so that not all arcs in the basic
27 block graph need instrumenting. First, the BB graph is closed with
28 one entry (function start), and one exit (function exit). Any
29 ABNORMAL_EDGE cannot be instrumented (because there is no control
30 path to place the code). We close the graph by inserting fake
31 EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32 edges that do not go to the exit_block. We ignore such abnormal
33 edges. Naturally these fake edges are never directly traversed,
34 and so *cannot* be directly instrumented. Some other graph
35 massaging is done. To optimize the instrumentation we generate the
36 BB minimal span tree, only edges that are not on the span tree
37 (plus the entry point) need instrumenting. From that information
38 all other edge counts can be deduced. By construction all fake
39 edges must be on the spanning tree. We also attempt to place
40 EDGE_CRITICAL edges on the spanning tree.
41
42 The two auxiliary files generated are <dumpbase>.bb and
43 <dumpbase>.bbg. The former contains the BB->linenumber
44 mappings, and the latter describes the BB graph.
45
46 The BB file contains line numbers for each block. For each basic
47 block, a zero count is output (to mark the start of a block), then
48 the line numbers of that block are listed. A zero ends the file
49 too.
50
51 The BBG file contains a count of the blocks, followed by edge
52 information, for every edge in the graph. The edge information
53 lists the source and target block numbers, and a bit mask
54 describing the type of edge.
55
56 The BB and BBG file formats are fully described in the gcov
57 documentation. */
58
59 /* ??? Register allocation should use basic block execution counts to
60 give preference to the most commonly executed blocks. */
61
62 /* ??? The .da files are not safe. Changing the program after creating .da
63 files or using different options when compiling with -fbranch-probabilities
64 can result the arc data not matching the program. Maybe add instrumented
65 arc count to .bbg file? Maybe check whether PFG matches the .bbg file? */
66
67 /* ??? Should calculate branch probabilities before instrumenting code, since
68 then we can use arc counts to help decide which arcs to instrument. */
69
70 #include "config.h"
71 #include "system.h"
72 #include "rtl.h"
73 #include "tree.h"
74 #include "flags.h"
75 #include "insn-config.h"
76 #include "output.h"
77 #include "regs.h"
78 #include "expr.h"
79 #include "function.h"
80 #include "toplev.h"
81 #include "ggc.h"
82 #include "hard-reg-set.h"
83 #include "basic-block.h"
84 #include "gcov-io.h"
85 #include "target.h"
86 #include "profile.h"
87 #include "libfuncs.h"
88 #include "langhooks.h"
89
90 /* Additional information about the edges we need. */
91 struct edge_info {
92 unsigned int count_valid : 1;
93
94 /* Is on the spanning tree. */
95 unsigned int on_tree : 1;
96
97 /* Pretend this edge does not exist (it is abnormal and we've
98 inserted a fake to compensate). */
99 unsigned int ignore : 1;
100 };
101
102 struct bb_info {
103 unsigned int count_valid : 1;
104
105 /* Number of successor and predecessor edges. */
106 gcov_type succ_count;
107 gcov_type pred_count;
108 };
109
110 #define EDGE_INFO(e) ((struct edge_info *) (e)->aux)
111 #define BB_INFO(b) ((struct bb_info *) (b)->aux)
112
113 /* Keep all basic block indexes nonnegative in the gcov output. Index 0
114 is used for entry block, last block exit block. */
115 #define BB_TO_GCOV_INDEX(bb) ((bb) == ENTRY_BLOCK_PTR ? 0 \
116 : ((bb) == EXIT_BLOCK_PTR \
117 ? last_basic_block + 1 : (bb)->index + 1))
118
119 /* Instantiate the profile info structure. */
120
121 struct profile_info profile_info;
122
123 /* Name and file pointer of the output file for the basic block graph. */
124
125 static FILE *bbg_file;
126
127 /* Name and file pointer of the input file for the arc count data. */
128
129 static FILE *da_file;
130 static char *da_file_name;
131
132 /* Pointer of the output file for the basic block/line number map. */
133 static FILE *bb_file;
134
135 /* Last source file name written to bb_file. */
136
137 static char *last_bb_file_name;
138
139 /* Collect statistics on the performance of this pass for the entire source
140 file. */
141
142 static int total_num_blocks;
143 static int total_num_edges;
144 static int total_num_edges_ignored;
145 static int total_num_edges_instrumented;
146 static int total_num_blocks_created;
147 static int total_num_passes;
148 static int total_num_times_called;
149 static int total_hist_br_prob[20];
150 static int total_num_never_executed;
151 static int total_num_branches;
152
153 /* Forward declarations. */
154 static void find_spanning_tree PARAMS ((struct edge_list *));
155 static void init_edge_profiler PARAMS ((void));
156 static rtx gen_edge_profiler PARAMS ((int));
157 static void instrument_edges PARAMS ((struct edge_list *));
158 static void output_gcov_string PARAMS ((const char *, long));
159 static void compute_branch_probabilities PARAMS ((void));
160 static gcov_type * get_exec_counts PARAMS ((void));
161 static long compute_checksum PARAMS ((void));
162 static basic_block find_group PARAMS ((basic_block));
163 static void union_groups PARAMS ((basic_block, basic_block));
164
165 /* If non-zero, we need to output a constructor to set up the
166 per-object-file data. */
167 static int need_func_profiler = 0;
168 \f
169 /* Add edge instrumentation code to the entire insn chain.
170
171 F is the first insn of the chain.
172 NUM_BLOCKS is the number of basic blocks found in F. */
173
174 static void
175 instrument_edges (el)
176 struct edge_list *el;
177 {
178 int num_instr_edges = 0;
179 int num_edges = NUM_EDGES (el);
180 basic_block bb;
181 remove_fake_edges ();
182
183 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
184 {
185 edge e = bb->succ;
186 while (e)
187 {
188 struct edge_info *inf = EDGE_INFO (e);
189 if (!inf->ignore && !inf->on_tree)
190 {
191 if (e->flags & EDGE_ABNORMAL)
192 abort ();
193 if (rtl_dump_file)
194 fprintf (rtl_dump_file, "Edge %d to %d instrumented%s\n",
195 e->src->index, e->dest->index,
196 EDGE_CRITICAL_P (e) ? " (and split)" : "");
197 need_func_profiler = 1;
198 insert_insn_on_edge (
199 gen_edge_profiler (total_num_edges_instrumented
200 + num_instr_edges++), e);
201 }
202 e = e->succ_next;
203 }
204 }
205
206 profile_info.count_edges_instrumented_now = num_instr_edges;
207 total_num_edges_instrumented += num_instr_edges;
208 profile_info.count_instrumented_edges = total_num_edges_instrumented;
209
210 total_num_blocks_created += num_edges;
211 if (rtl_dump_file)
212 fprintf (rtl_dump_file, "%d edges instrumented\n", num_instr_edges);
213
214 commit_edge_insertions_watch_calls ();
215 }
216
217 /* Output STRING to bb_file, surrounded by DELIMITER. */
218
219 static void
220 output_gcov_string (string, delimiter)
221 const char *string;
222 long delimiter;
223 {
224 long temp;
225
226 /* Write a delimiter to indicate that a file name follows. */
227 __write_long (delimiter, bb_file, 4);
228
229 /* Write the string. */
230 temp = strlen (string) + 1;
231 fwrite (string, temp, 1, bb_file);
232
233 /* Append a few zeros, to align the output to a 4 byte boundary. */
234 temp = temp & 0x3;
235 if (temp)
236 {
237 char c[4];
238
239 c[0] = c[1] = c[2] = c[3] = 0;
240 fwrite (c, sizeof (char), 4 - temp, bb_file);
241 }
242
243 /* Store another delimiter in the .bb file, just to make it easy to find
244 the end of the file name. */
245 __write_long (delimiter, bb_file, 4);
246 }
247 \f
248
249 /* Computes hybrid profile for all matching entries in da_file.
250 Sets max_counter_in_program as a side effect. */
251
252 static gcov_type *
253 get_exec_counts ()
254 {
255 int num_edges = 0;
256 basic_block bb;
257 int okay = 1, i;
258 int mismatch = 0;
259 gcov_type *profile;
260 char *function_name_buffer;
261 int function_name_buffer_len;
262 gcov_type max_counter_in_run;
263
264 profile_info.max_counter_in_program = 0;
265 profile_info.count_profiles_merged = 0;
266
267 /* No .da file, no execution counts. */
268 if (!da_file)
269 return 0;
270
271 /* Count the edges to be (possibly) instrumented. */
272
273 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
274 {
275 edge e;
276 for (e = bb->succ; e; e = e->succ_next)
277 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
278 num_edges++;
279 }
280
281 /* now read and combine all matching profiles. */
282
283 profile = xmalloc (sizeof (gcov_type) * num_edges);
284 rewind (da_file);
285 function_name_buffer_len = strlen (current_function_name) + 1;
286 function_name_buffer = xmalloc (function_name_buffer_len + 1);
287
288 for (i = 0; i < num_edges; i++)
289 profile[i] = 0;
290
291 while (1)
292 {
293 long magic, extra_bytes;
294 long func_count;
295 int i;
296
297 if (__read_long (&magic, da_file, 4) != 0)
298 break;
299
300 if (magic != -123)
301 {
302 okay = 0;
303 break;
304 }
305
306 if (__read_long (&func_count, da_file, 4) != 0)
307 {
308 okay = 0;
309 break;
310 }
311
312 if (__read_long (&extra_bytes, da_file, 4) != 0)
313 {
314 okay = 0;
315 break;
316 }
317
318 fseek (da_file, 4 + 8, SEEK_CUR);
319
320 /* read the maximal counter. */
321 __read_gcov_type (&max_counter_in_run, da_file, 8);
322
323 /* skip the rest of "statistics" emited by __bb_exit_func. */
324 fseek (da_file, extra_bytes - (4 + 8 + 8), SEEK_CUR);
325
326 for (i = 0; i < func_count; i++)
327 {
328 long arc_count;
329 long chksum;
330 int j;
331
332 if (__read_gcov_string
333 (function_name_buffer, function_name_buffer_len, da_file,
334 -1) != 0)
335 {
336 okay = 0;
337 break;
338 }
339
340 if (__read_long (&chksum, da_file, 4) != 0)
341 {
342 okay = 0;
343 break;
344 }
345
346 if (__read_long (&arc_count, da_file, 4) != 0)
347 {
348 okay = 0;
349 break;
350 }
351
352 if (strcmp (function_name_buffer, current_function_name) != 0)
353 {
354 /* skip */
355 if (fseek (da_file, arc_count * 8, SEEK_CUR) < 0)
356 {
357 okay = 0;
358 break;
359 }
360 }
361 else if (arc_count != num_edges
362 || chksum != profile_info.current_function_cfg_checksum)
363 okay = 0, mismatch = 1;
364 else
365 {
366 gcov_type tmp;
367
368 profile_info.max_counter_in_program += max_counter_in_run;
369 profile_info.count_profiles_merged++;
370
371 for (j = 0; j < arc_count; j++)
372 if (__read_gcov_type (&tmp, da_file, 8) != 0)
373 {
374 okay = 0;
375 break;
376 }
377 else
378 {
379 profile[j] += tmp;
380 }
381 }
382 }
383
384 if (!okay)
385 break;
386
387 }
388
389 free (function_name_buffer);
390
391 if (!okay)
392 {
393 if (mismatch)
394 error
395 ("Profile does not match flowgraph of function %s (out of date?)",
396 current_function_name);
397 else
398 error (".da file corrupted");
399 free (profile);
400 return 0;
401 }
402 if (rtl_dump_file)
403 {
404 fprintf(rtl_dump_file, "Merged %i profiles with maximal count %i.\n",
405 profile_info.count_profiles_merged,
406 (int)profile_info.max_counter_in_program);
407 }
408
409 return profile;
410 }
411 \f
412
413 /* Compute the branch probabilities for the various branches.
414 Annotate them accordingly. */
415
416 static void
417 compute_branch_probabilities ()
418 {
419 basic_block bb;
420 int i;
421 int num_edges = 0;
422 int changes;
423 int passes;
424 int hist_br_prob[20];
425 int num_never_executed;
426 int num_branches;
427 gcov_type *exec_counts = get_exec_counts ();
428 int exec_counts_pos = 0;
429
430 /* Attach extra info block to each bb. */
431
432 alloc_aux_for_blocks (sizeof (struct bb_info));
433 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
434 {
435 edge e;
436
437 for (e = bb->succ; e; e = e->succ_next)
438 if (!EDGE_INFO (e)->ignore)
439 BB_INFO (bb)->succ_count++;
440 for (e = bb->pred; e; e = e->pred_next)
441 if (!EDGE_INFO (e)->ignore)
442 BB_INFO (bb)->pred_count++;
443 }
444
445 /* Avoid predicting entry on exit nodes. */
446 BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
447 BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
448
449 /* For each edge not on the spanning tree, set its execution count from
450 the .da file. */
451
452 /* The first count in the .da file is the number of times that the function
453 was entered. This is the exec_count for block zero. */
454
455 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
456 {
457 edge e;
458 for (e = bb->succ; e; e = e->succ_next)
459 if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
460 {
461 num_edges++;
462 if (exec_counts)
463 {
464 e->count = exec_counts[exec_counts_pos++];
465 }
466 else
467 e->count = 0;
468
469 EDGE_INFO (e)->count_valid = 1;
470 BB_INFO (bb)->succ_count--;
471 BB_INFO (e->dest)->pred_count--;
472 if (rtl_dump_file)
473 {
474 fprintf (rtl_dump_file, "\nRead edge from %i to %i, count:",
475 bb->index, e->dest->index);
476 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,
477 (HOST_WIDEST_INT) e->count);
478 }
479 }
480 }
481
482 if (rtl_dump_file)
483 fprintf (rtl_dump_file, "\n%d edge counts read\n", num_edges);
484
485 /* For every block in the file,
486 - if every exit/entrance edge has a known count, then set the block count
487 - if the block count is known, and every exit/entrance edge but one has
488 a known execution count, then set the count of the remaining edge
489
490 As edge counts are set, decrement the succ/pred count, but don't delete
491 the edge, that way we can easily tell when all edges are known, or only
492 one edge is unknown. */
493
494 /* The order that the basic blocks are iterated through is important.
495 Since the code that finds spanning trees starts with block 0, low numbered
496 edges are put on the spanning tree in preference to high numbered edges.
497 Hence, most instrumented edges are at the end. Graph solving works much
498 faster if we propagate numbers from the end to the start.
499
500 This takes an average of slightly more than 3 passes. */
501
502 changes = 1;
503 passes = 0;
504 while (changes)
505 {
506 passes++;
507 changes = 0;
508 FOR_BB_BETWEEN (bb, EXIT_BLOCK_PTR, NULL, prev_bb)
509 {
510 struct bb_info *bi = BB_INFO (bb);
511 if (! bi->count_valid)
512 {
513 if (bi->succ_count == 0)
514 {
515 edge e;
516 gcov_type total = 0;
517
518 for (e = bb->succ; e; e = e->succ_next)
519 total += e->count;
520 bb->count = total;
521 bi->count_valid = 1;
522 changes = 1;
523 }
524 else if (bi->pred_count == 0)
525 {
526 edge e;
527 gcov_type total = 0;
528
529 for (e = bb->pred; e; e = e->pred_next)
530 total += e->count;
531 bb->count = total;
532 bi->count_valid = 1;
533 changes = 1;
534 }
535 }
536 if (bi->count_valid)
537 {
538 if (bi->succ_count == 1)
539 {
540 edge e;
541 gcov_type total = 0;
542
543 /* One of the counts will be invalid, but it is zero,
544 so adding it in also doesn't hurt. */
545 for (e = bb->succ; e; e = e->succ_next)
546 total += e->count;
547
548 /* Seedgeh for the invalid edge, and set its count. */
549 for (e = bb->succ; e; e = e->succ_next)
550 if (! EDGE_INFO (e)->count_valid && ! EDGE_INFO (e)->ignore)
551 break;
552
553 /* Calculate count for remaining edge by conservation. */
554 total = bb->count - total;
555
556 if (! e)
557 abort ();
558 EDGE_INFO (e)->count_valid = 1;
559 e->count = total;
560 bi->succ_count--;
561
562 BB_INFO (e->dest)->pred_count--;
563 changes = 1;
564 }
565 if (bi->pred_count == 1)
566 {
567 edge e;
568 gcov_type total = 0;
569
570 /* One of the counts will be invalid, but it is zero,
571 so adding it in also doesn't hurt. */
572 for (e = bb->pred; e; e = e->pred_next)
573 total += e->count;
574
575 /* Seedgeh for the invalid edge, and set its count. */
576 for (e = bb->pred; e; e = e->pred_next)
577 if (! EDGE_INFO (e)->count_valid && ! EDGE_INFO (e)->ignore)
578 break;
579
580 /* Calculate count for remaining edge by conservation. */
581 total = bb->count - total + e->count;
582
583 if (! e)
584 abort ();
585 EDGE_INFO (e)->count_valid = 1;
586 e->count = total;
587 bi->pred_count--;
588
589 BB_INFO (e->src)->succ_count--;
590 changes = 1;
591 }
592 }
593 }
594 }
595 if (rtl_dump_file)
596 dump_flow_info (rtl_dump_file);
597
598 total_num_passes += passes;
599 if (rtl_dump_file)
600 fprintf (rtl_dump_file, "Graph solving took %d passes.\n\n", passes);
601
602 /* If the graph has been correctly solved, every block will have a
603 succ and pred count of zero. */
604 FOR_EACH_BB (bb)
605 {
606 if (BB_INFO (bb)->succ_count || BB_INFO (bb)->pred_count)
607 abort ();
608 }
609
610 /* For every edge, calculate its branch probability and add a reg_note
611 to the branch insn to indicate this. */
612
613 for (i = 0; i < 20; i++)
614 hist_br_prob[i] = 0;
615 num_never_executed = 0;
616 num_branches = 0;
617
618 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
619 {
620 edge e;
621 gcov_type total;
622 rtx note;
623
624 total = bb->count;
625 if (total)
626 {
627 for (e = bb->succ; e; e = e->succ_next)
628 {
629 e->probability = (e->count * REG_BR_PROB_BASE + total / 2) / total;
630 if (e->probability < 0 || e->probability > REG_BR_PROB_BASE)
631 {
632 error ("corrupted profile info: prob for %d-%d thought to be %d",
633 e->src->index, e->dest->index, e->probability);
634 e->probability = REG_BR_PROB_BASE / 2;
635 }
636 }
637 if (bb->index >= 0
638 && any_condjump_p (bb->end)
639 && bb->succ->succ_next)
640 {
641 int prob;
642 edge e;
643 int index;
644
645 /* Find the branch edge. It is possible that we do have fake
646 edges here. */
647 for (e = bb->succ; e->flags & (EDGE_FAKE | EDGE_FALLTHRU);
648 e = e->succ_next)
649 continue; /* Loop body has been intentionally left blank. */
650
651 prob = e->probability;
652 index = prob * 20 / REG_BR_PROB_BASE;
653
654 if (index == 20)
655 index = 19;
656 hist_br_prob[index]++;
657
658 note = find_reg_note (bb->end, REG_BR_PROB, 0);
659 /* There may be already note put by some other pass, such
660 as builtin_expect expander. */
661 if (note)
662 XEXP (note, 0) = GEN_INT (prob);
663 else
664 REG_NOTES (bb->end)
665 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
666 REG_NOTES (bb->end));
667 num_branches++;
668 }
669 }
670 /* Otherwise distribute the probabilities evenly so we get sane sum.
671 Use simple heuristics that if there are normal edges, give all abnormals
672 frequency of 0, otherwise distribute the frequency over abnormals
673 (this is the case of noreturn calls). */
674 else
675 {
676 for (e = bb->succ; e; e = e->succ_next)
677 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
678 total ++;
679 if (total)
680 {
681 for (e = bb->succ; e; e = e->succ_next)
682 if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
683 e->probability = REG_BR_PROB_BASE / total;
684 else
685 e->probability = 0;
686 }
687 else
688 {
689 for (e = bb->succ; e; e = e->succ_next)
690 total ++;
691 for (e = bb->succ; e; e = e->succ_next)
692 e->probability = REG_BR_PROB_BASE / total;
693 }
694 if (bb->index >= 0
695 && any_condjump_p (bb->end)
696 && bb->succ->succ_next)
697 num_branches++, num_never_executed;
698 }
699 }
700
701 if (rtl_dump_file)
702 {
703 fprintf (rtl_dump_file, "%d branches\n", num_branches);
704 fprintf (rtl_dump_file, "%d branches never executed\n",
705 num_never_executed);
706 if (num_branches)
707 for (i = 0; i < 10; i++)
708 fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
709 (hist_br_prob[i] + hist_br_prob[19-i]) * 100 / num_branches,
710 5 * i, 5 * i + 5);
711
712 total_num_branches += num_branches;
713 total_num_never_executed += num_never_executed;
714 for (i = 0; i < 20; i++)
715 total_hist_br_prob[i] += hist_br_prob[i];
716
717 fputc ('\n', rtl_dump_file);
718 fputc ('\n', rtl_dump_file);
719 }
720
721 free_aux_for_blocks ();
722 if (exec_counts)
723 free (exec_counts);
724 }
725
726 /* Compute checksum for the current function. */
727
728 #define CHSUM_HASH 500000003
729 #define CHSUM_SHIFT 2
730
731 static long
732 compute_checksum ()
733 {
734 long chsum = 0;
735 basic_block bb;
736
737 FOR_EACH_BB (bb)
738 {
739 edge e;
740
741 for (e = bb->succ; e; e = e->succ_next)
742 {
743 chsum = ((chsum << CHSUM_SHIFT) + (BB_TO_GCOV_INDEX (e->dest) + 1)) % CHSUM_HASH;
744 }
745
746 chsum = (chsum << CHSUM_SHIFT) % CHSUM_HASH;
747 }
748
749 return chsum;
750 }
751
752 /* Instrument and/or analyze program behavior based on program flow graph.
753 In either case, this function builds a flow graph for the function being
754 compiled. The flow graph is stored in BB_GRAPH.
755
756 When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
757 the flow graph that are needed to reconstruct the dynamic behavior of the
758 flow graph.
759
760 When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
761 information from a data file containing edge count information from previous
762 executions of the function being compiled. In this case, the flow graph is
763 annotated with actual execution counts, which are later propagated into the
764 rtl for optimization purposes.
765
766 Main entry point of this file. */
767
768 void
769 branch_prob ()
770 {
771 basic_block bb;
772 int i;
773 int num_edges, ignored_edges;
774 struct edge_list *el;
775
776 profile_info.current_function_cfg_checksum = compute_checksum ();
777
778 if (rtl_dump_file)
779 fprintf (rtl_dump_file, "CFG checksum is %ld\n",
780 profile_info.current_function_cfg_checksum);
781
782 /* Start of a function. */
783 if (flag_test_coverage)
784 output_gcov_string (current_function_name, (long) -2);
785
786 total_num_times_called++;
787
788 flow_call_edges_add (NULL);
789 add_noreturn_fake_exit_edges ();
790
791 /* We can't handle cyclic regions constructed using abnormal edges.
792 To avoid these we replace every source of abnormal edge by a fake
793 edge from entry node and every destination by fake edge to exit.
794 This keeps graph acyclic and our calculation exact for all normal
795 edges except for exit and entrance ones.
796
797 We also add fake exit edges for each call and asm statement in the
798 basic, since it may not return. */
799
800 FOR_EACH_BB (bb)
801 {
802 int need_exit_edge = 0, need_entry_edge = 0;
803 int have_exit_edge = 0, have_entry_edge = 0;
804 rtx insn;
805 edge e;
806
807 /* Add fake edges from entry block to the call insns that may return
808 twice. The CFG is not quite correct then, as call insn plays more
809 role of CODE_LABEL, but for our purposes, everything should be OK,
810 as we never insert code to the beggining of basic block. */
811 for (insn = bb->head; insn != NEXT_INSN (bb->end);
812 insn = NEXT_INSN (insn))
813 {
814 if (GET_CODE (insn) == CALL_INSN
815 && find_reg_note (insn, REG_SETJMP, NULL))
816 {
817 if (GET_CODE (bb->head) == CODE_LABEL
818 || insn != NEXT_INSN (bb->head))
819 {
820 e = split_block (bb, PREV_INSN (insn));
821 make_edge (ENTRY_BLOCK_PTR, e->dest, EDGE_FAKE);
822 break;
823 }
824 else
825 {
826 /* We should not get abort here, as call to setjmp should not
827 be the very first instruction of function. */
828 if (bb == ENTRY_BLOCK_PTR)
829 abort ();
830 make_edge (ENTRY_BLOCK_PTR, bb, EDGE_FAKE);
831 }
832 }
833 }
834
835 for (e = bb->succ; e; e = e->succ_next)
836 {
837 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
838 && e->dest != EXIT_BLOCK_PTR)
839 need_exit_edge = 1;
840 if (e->dest == EXIT_BLOCK_PTR)
841 have_exit_edge = 1;
842 }
843 for (e = bb->pred; e; e = e->pred_next)
844 {
845 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
846 && e->src != ENTRY_BLOCK_PTR)
847 need_entry_edge = 1;
848 if (e->src == ENTRY_BLOCK_PTR)
849 have_entry_edge = 1;
850 }
851
852 if (need_exit_edge && !have_exit_edge)
853 {
854 if (rtl_dump_file)
855 fprintf (rtl_dump_file, "Adding fake exit edge to bb %i\n",
856 bb->index);
857 make_edge (bb, EXIT_BLOCK_PTR, EDGE_FAKE);
858 }
859 if (need_entry_edge && !have_entry_edge)
860 {
861 if (rtl_dump_file)
862 fprintf (rtl_dump_file, "Adding fake entry edge to bb %i\n",
863 bb->index);
864 make_edge (ENTRY_BLOCK_PTR, bb, EDGE_FAKE);
865 }
866 }
867
868 el = create_edge_list ();
869 num_edges = NUM_EDGES (el);
870 alloc_aux_for_edges (sizeof (struct edge_info));
871
872 /* The basic blocks are expected to be numbered sequentially. */
873 compact_blocks ();
874
875 ignored_edges = 0;
876 for (i = 0 ; i < num_edges ; i++)
877 {
878 edge e = INDEX_EDGE (el, i);
879 e->count = 0;
880
881 /* Mark edges we've replaced by fake edges above as ignored. */
882 if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
883 && e->src != ENTRY_BLOCK_PTR && e->dest != EXIT_BLOCK_PTR)
884 {
885 EDGE_INFO (e)->ignore = 1;
886 ignored_edges++;
887 }
888 }
889
890 #ifdef ENABLE_CHECKING
891 verify_flow_info ();
892 #endif
893
894 /* Output line number information about each basic block for
895 GCOV utility. */
896 if (flag_test_coverage)
897 {
898 FOR_EACH_BB (bb)
899 {
900 rtx insn = bb->head;
901 static int ignore_next_note = 0;
902
903 /* We are looking for line number notes. Search backward before
904 basic block to find correct ones. */
905 insn = prev_nonnote_insn (insn);
906 if (!insn)
907 insn = get_insns ();
908 else
909 insn = NEXT_INSN (insn);
910
911 /* Output a zero to the .bb file to indicate that a new
912 block list is starting. */
913 __write_long (0, bb_file, 4);
914
915 while (insn != bb->end)
916 {
917 if (GET_CODE (insn) == NOTE)
918 {
919 /* Must ignore the line number notes that immediately
920 follow the end of an inline function to avoid counting
921 it twice. There is a note before the call, and one
922 after the call. */
923 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_REPEATED_LINE_NUMBER)
924 ignore_next_note = 1;
925 else if (NOTE_LINE_NUMBER (insn) > 0)
926 {
927 if (ignore_next_note)
928 ignore_next_note = 0;
929 else
930 {
931 /* If this is a new source file, then output the
932 file's name to the .bb file. */
933 if (! last_bb_file_name
934 || strcmp (NOTE_SOURCE_FILE (insn),
935 last_bb_file_name))
936 {
937 if (last_bb_file_name)
938 free (last_bb_file_name);
939 last_bb_file_name
940 = xstrdup (NOTE_SOURCE_FILE (insn));
941 output_gcov_string (NOTE_SOURCE_FILE (insn),
942 (long)-1);
943 }
944 /* Output the line number to the .bb file. Must be
945 done after the output_bb_profile_data() call, and
946 after the file name is written, to ensure that it
947 is correctly handled by gcov. */
948 __write_long (NOTE_LINE_NUMBER (insn), bb_file, 4);
949 }
950 }
951 }
952 insn = NEXT_INSN (insn);
953 }
954 }
955 __write_long (0, bb_file, 4);
956 }
957
958 /* Create spanning tree from basic block graph, mark each edge that is
959 on the spanning tree. We insert as many abnormal and critical edges
960 as possible to minimize number of edge splits necessary. */
961
962 find_spanning_tree (el);
963
964 /* Fake edges that are not on the tree will not be instrumented, so
965 mark them ignored. */
966 for (i = 0; i < num_edges; i++)
967 {
968 edge e = INDEX_EDGE (el, i);
969 struct edge_info *inf = EDGE_INFO (e);
970 if ((e->flags & EDGE_FAKE) && !inf->ignore && !inf->on_tree)
971 {
972 inf->ignore = 1;
973 ignored_edges++;
974 }
975 }
976
977 total_num_blocks += n_basic_blocks + 2;
978 if (rtl_dump_file)
979 fprintf (rtl_dump_file, "%d basic blocks\n", n_basic_blocks);
980
981 total_num_edges += num_edges;
982 if (rtl_dump_file)
983 fprintf (rtl_dump_file, "%d edges\n", num_edges);
984
985 total_num_edges_ignored += ignored_edges;
986 if (rtl_dump_file)
987 fprintf (rtl_dump_file, "%d ignored edges\n", ignored_edges);
988
989 /* Create a .bbg file from which gcov can reconstruct the basic block
990 graph. First output the number of basic blocks, and then for every
991 edge output the source and target basic block numbers.
992 NOTE: The format of this file must be compatible with gcov. */
993
994 if (flag_test_coverage)
995 {
996 int flag_bits;
997
998 __write_gcov_string (current_function_name,
999 strlen (current_function_name), bbg_file, -1);
1000
1001 /* write checksum. */
1002 __write_long (profile_info.current_function_cfg_checksum, bbg_file, 4);
1003
1004 /* The plus 2 stands for entry and exit block. */
1005 __write_long (n_basic_blocks + 2, bbg_file, 4);
1006 __write_long (num_edges - ignored_edges + 1, bbg_file, 4);
1007
1008 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1009 {
1010 edge e;
1011 long count = 0;
1012
1013 for (e = bb->succ; e; e = e->succ_next)
1014 if (!EDGE_INFO (e)->ignore)
1015 count++;
1016 __write_long (count, bbg_file, 4);
1017
1018 for (e = bb->succ; e; e = e->succ_next)
1019 {
1020 struct edge_info *i = EDGE_INFO (e);
1021 if (!i->ignore)
1022 {
1023 flag_bits = 0;
1024 if (i->on_tree)
1025 flag_bits |= 0x1;
1026 if (e->flags & EDGE_FAKE)
1027 flag_bits |= 0x2;
1028 if (e->flags & EDGE_FALLTHRU)
1029 flag_bits |= 0x4;
1030
1031 __write_long (BB_TO_GCOV_INDEX (e->dest), bbg_file, 4);
1032 __write_long (flag_bits, bbg_file, 4);
1033 }
1034 }
1035 }
1036 /* Emit fake loopback edge for EXIT block to maintain compatibility with
1037 old gcov format. */
1038 __write_long (1, bbg_file, 4);
1039 __write_long (0, bbg_file, 4);
1040 __write_long (0x1, bbg_file, 4);
1041
1042 /* Emit a -1 to separate the list of all edges from the list of
1043 loop back edges that follows. */
1044 __write_long (-1, bbg_file, 4);
1045 }
1046
1047 if (flag_branch_probabilities)
1048 compute_branch_probabilities ();
1049
1050 /* For each edge not on the spanning tree, add counting code as rtl. */
1051
1052 if (profile_arc_flag)
1053 {
1054 instrument_edges (el);
1055 allocate_reg_info (max_reg_num (), FALSE, FALSE);
1056 }
1057
1058 remove_fake_edges ();
1059 /* Re-merge split basic blocks and the mess introduced by
1060 insert_insn_on_edge. */
1061 cleanup_cfg (profile_arc_flag ? CLEANUP_EXPENSIVE : 0);
1062 if (rtl_dump_file)
1063 dump_flow_info (rtl_dump_file);
1064
1065 free_aux_for_edges ();
1066 free_edge_list (el);
1067 }
1068 \f
1069 /* Union find algorithm implementation for the basic blocks using
1070 aux fields. */
1071
1072 static basic_block
1073 find_group (bb)
1074 basic_block bb;
1075 {
1076 basic_block group = bb, bb1;
1077
1078 while ((basic_block) group->aux != group)
1079 group = (basic_block) group->aux;
1080
1081 /* Compress path. */
1082 while ((basic_block) bb->aux != group)
1083 {
1084 bb1 = (basic_block) bb->aux;
1085 bb->aux = (void *) group;
1086 bb = bb1;
1087 }
1088 return group;
1089 }
1090
1091 static void
1092 union_groups (bb1, bb2)
1093 basic_block bb1, bb2;
1094 {
1095 basic_block bb1g = find_group (bb1);
1096 basic_block bb2g = find_group (bb2);
1097
1098 /* ??? I don't have a place for the rank field. OK. Lets go w/o it,
1099 this code is unlikely going to be performance problem anyway. */
1100 if (bb1g == bb2g)
1101 abort ();
1102
1103 bb1g->aux = bb2g;
1104 }
1105 \f
1106 /* This function searches all of the edges in the program flow graph, and puts
1107 as many bad edges as possible onto the spanning tree. Bad edges include
1108 abnormals edges, which can't be instrumented at the moment. Since it is
1109 possible for fake edges to form an cycle, we will have to develop some
1110 better way in the future. Also put critical edges to the tree, since they
1111 are more expensive to instrument. */
1112
1113 static void
1114 find_spanning_tree (el)
1115 struct edge_list *el;
1116 {
1117 int i;
1118 int num_edges = NUM_EDGES (el);
1119 basic_block bb;
1120
1121 /* We use aux field for standard union-find algorithm. */
1122 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1123 bb->aux = bb;
1124
1125 /* Add fake edge exit to entry we can't instrument. */
1126 union_groups (EXIT_BLOCK_PTR, ENTRY_BLOCK_PTR);
1127
1128 /* First add all abnormal edges to the tree unless they form an cycle. Also
1129 add all edges to EXIT_BLOCK_PTR to avoid inserting profiling code behind
1130 setting return value from function. */
1131 for (i = 0; i < num_edges; i++)
1132 {
1133 edge e = INDEX_EDGE (el, i);
1134 if (((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1135 || e->dest == EXIT_BLOCK_PTR
1136 )
1137 && !EDGE_INFO (e)->ignore
1138 && (find_group (e->src) != find_group (e->dest)))
1139 {
1140 if (rtl_dump_file)
1141 fprintf (rtl_dump_file, "Abnormal edge %d to %d put to tree\n",
1142 e->src->index, e->dest->index);
1143 EDGE_INFO (e)->on_tree = 1;
1144 union_groups (e->src, e->dest);
1145 }
1146 }
1147
1148 /* Now insert all critical edges to the tree unless they form an cycle. */
1149 for (i = 0; i < num_edges; i++)
1150 {
1151 edge e = INDEX_EDGE (el, i);
1152 if ((EDGE_CRITICAL_P (e))
1153 && !EDGE_INFO (e)->ignore
1154 && (find_group (e->src) != find_group (e->dest)))
1155 {
1156 if (rtl_dump_file)
1157 fprintf (rtl_dump_file, "Critical edge %d to %d put to tree\n",
1158 e->src->index, e->dest->index);
1159 EDGE_INFO (e)->on_tree = 1;
1160 union_groups (e->src, e->dest);
1161 }
1162 }
1163
1164 /* And now the rest. */
1165 for (i = 0; i < num_edges; i++)
1166 {
1167 edge e = INDEX_EDGE (el, i);
1168 if (find_group (e->src) != find_group (e->dest)
1169 && !EDGE_INFO (e)->ignore)
1170 {
1171 if (rtl_dump_file)
1172 fprintf (rtl_dump_file, "Normal edge %d to %d put to tree\n",
1173 e->src->index, e->dest->index);
1174 EDGE_INFO (e)->on_tree = 1;
1175 union_groups (e->src, e->dest);
1176 }
1177 }
1178
1179 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1180 bb->aux = NULL;
1181 }
1182 \f
1183 /* Perform file-level initialization for branch-prob processing. */
1184
1185 void
1186 init_branch_prob (filename)
1187 const char *filename;
1188 {
1189 int len = strlen (filename);
1190 int i;
1191
1192 if (flag_test_coverage)
1193 {
1194 char *data_file, *bbg_file_name;
1195
1196 /* Open an output file for the basic block/line number map. */
1197 data_file = (char *) alloca (len + 4);
1198 strcpy (data_file, filename);
1199 strcat (data_file, ".bb");
1200 if ((bb_file = fopen (data_file, "wb")) == 0)
1201 fatal_io_error ("can't open %s", data_file);
1202
1203 /* Open an output file for the program flow graph. */
1204 bbg_file_name = (char *) alloca (len + 5);
1205 strcpy (bbg_file_name, filename);
1206 strcat (bbg_file_name, ".bbg");
1207 if ((bbg_file = fopen (bbg_file_name, "wb")) == 0)
1208 fatal_io_error ("can't open %s", bbg_file_name);
1209
1210 /* Initialize to zero, to ensure that the first file name will be
1211 written to the .bb file. */
1212 last_bb_file_name = 0;
1213 }
1214
1215 da_file_name = (char *) xmalloc (len + 4);
1216 strcpy (da_file_name, filename);
1217 strcat (da_file_name, ".da");
1218
1219 if (flag_branch_probabilities)
1220 {
1221 da_file = fopen (da_file_name, "rb");
1222 if (!da_file)
1223 warning ("file %s not found, execution counts assumed to be zero",
1224 da_file_name);
1225 }
1226
1227 if (profile_arc_flag)
1228 init_edge_profiler ();
1229
1230 total_num_blocks = 0;
1231 total_num_edges = 0;
1232 total_num_edges_ignored = 0;
1233 total_num_edges_instrumented = 0;
1234 total_num_blocks_created = 0;
1235 total_num_passes = 0;
1236 total_num_times_called = 0;
1237 total_num_branches = 0;
1238 total_num_never_executed = 0;
1239 for (i = 0; i < 20; i++)
1240 total_hist_br_prob[i] = 0;
1241 }
1242
1243 /* Performs file-level cleanup after branch-prob processing
1244 is completed. */
1245
1246 void
1247 end_branch_prob ()
1248 {
1249 if (flag_test_coverage)
1250 {
1251 fclose (bb_file);
1252 fclose (bbg_file);
1253 unlink (da_file_name);
1254 }
1255
1256 if (flag_branch_probabilities && da_file)
1257 fclose (da_file);
1258
1259 if (rtl_dump_file)
1260 {
1261 fprintf (rtl_dump_file, "\n");
1262 fprintf (rtl_dump_file, "Total number of blocks: %d\n",
1263 total_num_blocks);
1264 fprintf (rtl_dump_file, "Total number of edges: %d\n", total_num_edges);
1265 fprintf (rtl_dump_file, "Total number of ignored edges: %d\n",
1266 total_num_edges_ignored);
1267 fprintf (rtl_dump_file, "Total number of instrumented edges: %d\n",
1268 total_num_edges_instrumented);
1269 fprintf (rtl_dump_file, "Total number of blocks created: %d\n",
1270 total_num_blocks_created);
1271 fprintf (rtl_dump_file, "Total number of graph solution passes: %d\n",
1272 total_num_passes);
1273 if (total_num_times_called != 0)
1274 fprintf (rtl_dump_file, "Average number of graph solution passes: %d\n",
1275 (total_num_passes + (total_num_times_called >> 1))
1276 / total_num_times_called);
1277 fprintf (rtl_dump_file, "Total number of branches: %d\n",
1278 total_num_branches);
1279 fprintf (rtl_dump_file, "Total number of branches never executed: %d\n",
1280 total_num_never_executed);
1281 if (total_num_branches)
1282 {
1283 int i;
1284
1285 for (i = 0; i < 10; i++)
1286 fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
1287 (total_hist_br_prob[i] + total_hist_br_prob[19-i]) * 100
1288 / total_num_branches, 5*i, 5*i+5);
1289 }
1290 }
1291 }
1292 \f
1293 /* The label used by the edge profiling code. */
1294
1295 static GTY(()) rtx profiler_label;
1296
1297 /* Initialize the profiler_label. */
1298
1299 static void
1300 init_edge_profiler ()
1301 {
1302 /* Generate and save a copy of this so it can be shared. */
1303 char buf[20];
1304 ASM_GENERATE_INTERNAL_LABEL (buf, "LPBX", 2);
1305 profiler_label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
1306 }
1307
1308 /* Output instructions as RTL to increment the edge execution count. */
1309
1310 static rtx
1311 gen_edge_profiler (edgeno)
1312 int edgeno;
1313 {
1314 enum machine_mode mode = mode_for_size (GCOV_TYPE_SIZE, MODE_INT, 0);
1315 rtx mem_ref, tmp;
1316 rtx sequence;
1317
1318 start_sequence ();
1319
1320 tmp = force_reg (Pmode, profiler_label);
1321 tmp = plus_constant (tmp, GCOV_TYPE_SIZE / BITS_PER_UNIT * edgeno);
1322 mem_ref = validize_mem (gen_rtx_MEM (mode, tmp));
1323
1324 set_mem_alias_set (mem_ref, new_alias_set ());
1325
1326 tmp = expand_simple_binop (mode, PLUS, mem_ref, const1_rtx,
1327 mem_ref, 0, OPTAB_WIDEN);
1328
1329 if (tmp != mem_ref)
1330 emit_move_insn (copy_rtx (mem_ref), tmp);
1331
1332 sequence = get_insns ();
1333 end_sequence ();
1334 return sequence;
1335 }
1336
1337 /* Output code for a constructor that will invoke __bb_init_func, if
1338 this has not already been done. */
1339
1340 void
1341 output_func_start_profiler ()
1342 {
1343 tree fnname, fndecl;
1344 char *name;
1345 char buf[20];
1346 const char *cfnname;
1347 rtx table_address;
1348 enum machine_mode mode = mode_for_size (GCOV_TYPE_SIZE, MODE_INT, 0);
1349 int save_flag_inline_functions = flag_inline_functions;
1350
1351 /* It's either already been output, or we don't need it because we're
1352 not doing profile-edges. */
1353 if (! need_func_profiler)
1354 return;
1355
1356 need_func_profiler = 0;
1357
1358 /* Synthesize a constructor function to invoke __bb_init_func with a
1359 pointer to this object file's profile block. */
1360
1361 /* Try and make a unique name given the "file function name".
1362
1363 And no, I don't like this either. */
1364
1365 fnname = get_file_function_name ('I');
1366 cfnname = IDENTIFIER_POINTER (fnname);
1367 name = concat (cfnname, "GCOV", NULL);
1368 fnname = get_identifier (name);
1369 free (name);
1370
1371 fndecl = build_decl (FUNCTION_DECL, fnname,
1372 build_function_type (void_type_node, NULL_TREE));
1373 DECL_EXTERNAL (fndecl) = 0;
1374
1375 /* It can be a static function as long as collect2 does not have
1376 to scan the object file to find its ctor/dtor routine. */
1377 TREE_PUBLIC (fndecl) = ! targetm.have_ctors_dtors;
1378
1379 TREE_USED (fndecl) = 1;
1380
1381 DECL_RESULT (fndecl) = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1382
1383 fndecl = (*lang_hooks.decls.pushdecl) (fndecl);
1384 rest_of_decl_compilation (fndecl, 0, 1, 0);
1385 announce_function (fndecl);
1386 current_function_decl = fndecl;
1387 DECL_INITIAL (fndecl) = error_mark_node;
1388 make_decl_rtl (fndecl, NULL);
1389 init_function_start (fndecl, input_filename, lineno);
1390 (*lang_hooks.decls.pushlevel) (0);
1391 expand_function_start (fndecl, 0);
1392 cfun->arc_profile = 0;
1393
1394 /* Actually generate the code to call __bb_init_func. */
1395 ASM_GENERATE_INTERNAL_LABEL (buf, "LPBX", 0);
1396 table_address = force_reg (Pmode,
1397 gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)));
1398 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__bb_init_func"), LCT_NORMAL,
1399 mode, 1, table_address, Pmode);
1400
1401 expand_function_end (input_filename, lineno, 0);
1402 (*lang_hooks.decls.poplevel) (1, 0, 1);
1403
1404 /* Since fndecl isn't in the list of globals, it would never be emitted
1405 when it's considered to be 'safe' for inlining, so turn off
1406 flag_inline_functions. */
1407 flag_inline_functions = 0;
1408
1409 rest_of_compilation (fndecl);
1410
1411 /* Reset flag_inline_functions to its original value. */
1412 flag_inline_functions = save_flag_inline_functions;
1413
1414 if (! quiet_flag)
1415 fflush (asm_out_file);
1416 current_function_decl = NULL_TREE;
1417
1418 if (targetm.have_ctors_dtors)
1419 (* targetm.asm_out.constructor) (XEXP (DECL_RTL (fndecl), 0),
1420 DEFAULT_INIT_PRIORITY);
1421 }
1422
1423 #include "gt-profile.h"