Update copyrights
[gcc.git] / gcc / profile.c
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990, 91-94, 96-99, 2000 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 based on some ideas from Dain Samples of UC Berkeley.
5 Further mangling by Bob Manson, Cygnus Support.
6
7 This file is part of GNU CC.
8
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 /* ??? Really should not put insns inside of LIBCALL sequences, when putting
25 insns after a call, should look for the insn setting the retval, and
26 insert the insns after that one. */
27
28 /* ??? Register allocation should use basic block execution counts to
29 give preference to the most commonly executed blocks. */
30
31 /* ??? The .da files are not safe. Changing the program after creating .da
32 files or using different options when compiling with -fbranch-probabilities
33 can result the arc data not matching the program. Maybe add instrumented
34 arc count to .bbg file? Maybe check whether PFG matches the .bbg file? */
35
36 /* ??? Should calculate branch probabilities before instrumenting code, since
37 then we can use arc counts to help decide which arcs to instrument. */
38
39 /* ??? Rearrange code so that the most frequently executed arcs become from
40 one block to the next block (i.e. a fall through), move seldom executed
41 code outside of loops even at the expense of adding a few branches to
42 achieve this, see Dain Sample's UC Berkeley thesis. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "rtl.h"
47 #include "tree.h"
48 #include "flags.h"
49 #include "insn-flags.h"
50 #include "insn-config.h"
51 #include "output.h"
52 #include "regs.h"
53 #include "function.h"
54 #include "gcov-io.h"
55 #include "toplev.h"
56 #include "ggc.h"
57
58 /* One of these is dynamically created whenever we identify an arc in the
59 function. */
60
61 struct adj_list
62 {
63 int source;
64 int target;
65 int arc_count;
66 unsigned int count_valid : 1;
67 unsigned int on_tree : 1;
68 unsigned int fake : 1;
69 unsigned int fall_through : 1;
70 rtx branch_insn;
71 struct adj_list *pred_next;
72 struct adj_list *succ_next;
73 };
74
75 #define ARC_TARGET(ARCPTR) (ARCPTR->target)
76 #define ARC_SOURCE(ARCPTR) (ARCPTR->source)
77 #define ARC_COUNT(ARCPTR) (ARCPTR->arc_count)
78
79 /* Count the number of basic blocks, and create an array of these structures,
80 one for each bb in the function. */
81
82 struct bb_info
83 {
84 struct adj_list *succ;
85 struct adj_list *pred;
86 int succ_count;
87 int pred_count;
88 int exec_count;
89 unsigned int count_valid : 1;
90 unsigned int on_tree : 1;
91 rtx first_insn;
92 };
93
94 /* Indexed by label number, gives the basic block number containing that
95 label. */
96
97 static int *label_to_bb;
98
99 /* Number of valid entries in the label_to_bb array. */
100
101 static int label_to_bb_size;
102
103 /* Indexed by block index, holds the basic block graph. */
104
105 static struct bb_info *bb_graph;
106
107 /* Name and file pointer of the output file for the basic block graph. */
108
109 static char *bbg_file_name;
110 static FILE *bbg_file;
111
112 /* Name and file pointer of the input file for the arc count data. */
113
114 static char *da_file_name;
115 static FILE *da_file;
116
117 /* Pointer of the output file for the basic block/line number map. */
118 static FILE *bb_file;
119
120 /* Last source file name written to bb_file. */
121
122 static char *last_bb_file_name;
123
124 /* Indicates whether the next line number note should be output to
125 bb_file or not. Used to eliminate a redundant note after an
126 expanded inline function call. */
127
128 static int ignore_next_note;
129
130 /* Used by final, for allocating the proper amount of storage for the
131 instrumented arc execution counts. */
132
133 int count_instrumented_arcs;
134
135 /* Number of executions for the return label. */
136
137 int return_label_execution_count;
138
139 /* Collect statistics on the performance of this pass for the entire source
140 file. */
141
142 static int total_num_blocks;
143 static int total_num_arcs;
144 static int total_num_arcs_instrumented;
145 static int total_num_blocks_created;
146 static int total_num_passes;
147 static int total_num_times_called;
148 static int total_hist_br_prob[20];
149 static int total_num_never_executed;
150 static int total_num_branches;
151
152 /* Forward declarations. */
153 static void init_arc PARAMS ((struct adj_list *, int, int, rtx));
154 static void find_spanning_tree PARAMS ((int));
155 static void expand_spanning_tree PARAMS ((int));
156 static void fill_spanning_tree PARAMS ((int));
157 static void init_arc_profiler PARAMS ((void));
158 static void output_arc_profiler PARAMS ((int, rtx));
159 static void instrument_arcs PARAMS ((rtx, int, FILE *));
160 static void output_gcov_string PARAMS ((const char *, long));
161 static int tablejump_entry_p PARAMS ((rtx, rtx));
162 static void compute_branch_probabilities PARAMS ((int, FILE *));
163
164 #ifndef LONG_TYPE_SIZE
165 #define LONG_TYPE_SIZE BITS_PER_WORD
166 #endif
167
168 /* If non-zero, we need to output a constructor to set up the
169 per-object-file data. */
170 static int need_func_profiler = 0;
171
172 \f
173 /* Add arc instrumentation code to the entire insn chain.
174
175 F is the first insn of the chain.
176 NUM_BLOCKS is the number of basic blocks found in F.
177 DUMP_FILE, if nonzero, is an rtl dump file we can write to. */
178
179 static void
180 instrument_arcs (f, num_blocks, dump_file)
181 rtx f;
182 int num_blocks;
183 FILE *dump_file;
184 {
185 register int i;
186 register struct adj_list *arcptr, *backptr;
187 int num_arcs = 0;
188 int num_instr_arcs = 0;
189 rtx insn;
190
191 /* Instrument the program start. */
192 /* Handle block 0 specially, since it will always be instrumented,
193 but it doesn't have a valid first_insn or branch_insn. We must
194 put the instructions before the NOTE_INSN_FUNCTION_BEG note, so
195 that they don't clobber any of the parameters of the current
196 function. */
197 for (insn = f; insn; insn = NEXT_INSN (insn))
198 if (GET_CODE (insn) == NOTE
199 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_FUNCTION_BEG)
200 break;
201 insn = PREV_INSN (insn);
202 need_func_profiler = 1;
203 output_arc_profiler (total_num_arcs_instrumented + num_instr_arcs++, insn);
204
205 for (i = 1; i < num_blocks; i++)
206 for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next)
207 if (! arcptr->on_tree)
208 {
209 if (dump_file)
210 fprintf (dump_file, "Arc %d to %d instrumented\n", i,
211 ARC_TARGET (arcptr));
212
213 /* Check to see if this arc is the only exit from its source block,
214 or the only entrance to its target block. In either case,
215 we don't need to create a new block to instrument the arc. */
216
217 if (bb_graph[i].succ == arcptr && arcptr->succ_next == 0)
218 {
219 /* Instrument the source block. */
220 output_arc_profiler (total_num_arcs_instrumented
221 + num_instr_arcs++,
222 PREV_INSN (bb_graph[i].first_insn));
223 }
224 else if (arcptr == bb_graph[ARC_TARGET (arcptr)].pred
225 && arcptr->pred_next == 0)
226 {
227 /* Instrument the target block. */
228 output_arc_profiler (total_num_arcs_instrumented
229 + num_instr_arcs++,
230 PREV_INSN (bb_graph[ARC_TARGET (arcptr)].first_insn));
231 }
232 else if (arcptr->fall_through)
233 {
234 /* This is a fall-through; put the instrumentation code after
235 the branch that ends this block. */
236
237 for (backptr = bb_graph[i].succ; backptr;
238 backptr = backptr->succ_next)
239 if (backptr != arcptr)
240 break;
241
242 output_arc_profiler (total_num_arcs_instrumented
243 + num_instr_arcs++,
244 backptr->branch_insn);
245 }
246 else
247 {
248 /* Must emit a new basic block to hold the arc counting code. */
249 enum rtx_code code = GET_CODE (PATTERN (arcptr->branch_insn));
250
251 if (code == SET)
252 {
253 /* Create the new basic block right after the branch.
254 Invert the branch so that it jumps past the end of the new
255 block. The new block will consist of the instrumentation
256 code, and a jump to the target of this arc. */
257 int this_is_simplejump = simplejump_p (arcptr->branch_insn);
258 rtx new_label = gen_label_rtx ();
259 rtx old_label, set_src;
260 rtx after = arcptr->branch_insn;
261
262 /* Simplejumps can't reach here. */
263 if (this_is_simplejump)
264 abort ();
265
266 /* We can't use JUMP_LABEL, because it won't be set if we
267 are compiling without optimization. */
268
269 set_src = SET_SRC (single_set (arcptr->branch_insn));
270 if (GET_CODE (set_src) == LABEL_REF)
271 old_label = set_src;
272 else if (GET_CODE (set_src) != IF_THEN_ELSE)
273 abort ();
274 else if (XEXP (set_src, 1) == pc_rtx)
275 old_label = XEXP (XEXP (set_src, 2), 0);
276 else
277 old_label = XEXP (XEXP (set_src, 1), 0);
278
279 /* Set the JUMP_LABEL so that redirect_jump will work. */
280 JUMP_LABEL (arcptr->branch_insn) = old_label;
281
282 /* Add a use for OLD_LABEL that will be needed when we emit
283 the JUMP_INSN below. If we don't do this here,
284 `invert_jump' might delete it for us. We must add two
285 when not optimizing, because the NUSES is zero now,
286 but must be at least two to prevent the label from being
287 deleted. */
288 LABEL_NUSES (old_label) += 2;
289
290 /* Emit the insns for the new block in reverse order,
291 since that is most convenient. */
292
293 if (this_is_simplejump)
294 {
295 after = NEXT_INSN (arcptr->branch_insn);
296 if (! redirect_jump (arcptr->branch_insn, new_label))
297 /* Don't know what to do if this branch won't
298 redirect. */
299 abort ();
300 }
301 else
302 {
303 if (! invert_jump (arcptr->branch_insn, new_label))
304 /* Don't know what to do if this branch won't invert. */
305 abort ();
306
307 emit_label_after (new_label, after);
308 LABEL_NUSES (new_label)++;
309 }
310 emit_barrier_after (after);
311 emit_jump_insn_after (gen_jump (old_label), after);
312 JUMP_LABEL (NEXT_INSN (after)) = old_label;
313
314 /* Instrument the source arc. */
315 output_arc_profiler (total_num_arcs_instrumented
316 + num_instr_arcs++,
317 after);
318 if (this_is_simplejump)
319 {
320 emit_label_after (new_label, after);
321 LABEL_NUSES (new_label)++;
322 }
323 }
324 else if (code == ADDR_VEC || code == ADDR_DIFF_VEC)
325 {
326 /* A table jump. Create a new basic block immediately
327 after the table, by emitting a barrier, a label, a
328 counting note, and a jump to the old label. Put the
329 new label in the table. */
330
331 rtx new_label = gen_label_rtx ();
332 rtx old_lref, new_lref;
333 int index;
334
335 /* Must determine the old_label reference, do this
336 by counting the arcs after this one, which will
337 give the index of our label in the table. */
338
339 index = 0;
340 for (backptr = arcptr->succ_next; backptr;
341 backptr = backptr->succ_next)
342 index++;
343
344 old_lref = XVECEXP (PATTERN (arcptr->branch_insn),
345 (code == ADDR_DIFF_VEC), index);
346
347 /* Emit the insns for the new block in reverse order,
348 since that is most convenient. */
349 emit_jump_insn_after (gen_jump (XEXP (old_lref, 0)),
350 arcptr->branch_insn);
351 JUMP_LABEL (NEXT_INSN (arcptr->branch_insn))
352 = XEXP (old_lref, 0);
353
354 /* Instrument the source arc. */
355 output_arc_profiler (total_num_arcs_instrumented
356 + num_instr_arcs++,
357 arcptr->branch_insn);
358
359 emit_label_after (new_label, arcptr->branch_insn);
360 LABEL_NUSES (NEXT_INSN (arcptr->branch_insn))++;
361 emit_barrier_after (arcptr->branch_insn);
362
363 /* Fix up the table jump. */
364 new_lref = gen_rtx_LABEL_REF (Pmode, new_label);
365 XVECEXP (PATTERN (arcptr->branch_insn),
366 (code == ADDR_DIFF_VEC), index) = new_lref;
367 }
368 else
369 abort ();
370
371 num_arcs += 1;
372 if (dump_file)
373 fprintf (dump_file,
374 "Arc %d to %d needed new basic block\n", i,
375 ARC_TARGET (arcptr));
376 }
377 }
378
379 total_num_arcs_instrumented += num_instr_arcs;
380 count_instrumented_arcs = total_num_arcs_instrumented;
381
382 total_num_blocks_created += num_arcs;
383 if (dump_file)
384 {
385 fprintf (dump_file, "%d arcs instrumented\n", num_instr_arcs);
386 fprintf (dump_file, "%d extra basic blocks created\n", num_arcs);
387 }
388 }
389
390 /* Output STRING to bb_file, surrounded by DELIMITER. */
391
392 static void
393 output_gcov_string (string, delimiter)
394 const char *string;
395 long delimiter;
396 {
397 long temp;
398
399 /* Write a delimiter to indicate that a file name follows. */
400 __write_long (delimiter, bb_file, 4);
401
402 /* Write the string. */
403 temp = strlen (string) + 1;
404 fwrite (string, temp, 1, bb_file);
405
406 /* Append a few zeros, to align the output to a 4 byte boundary. */
407 temp = temp & 0x3;
408 if (temp)
409 {
410 char c[4];
411
412 c[0] = c[1] = c[2] = c[3] = 0;
413 fwrite (c, sizeof (char), 4 - temp, bb_file);
414 }
415
416 /* Store another delimiter in the .bb file, just to make it easy to find the
417 end of the file name. */
418 __write_long (delimiter, bb_file, 4);
419 }
420 \f
421 /* Return TRUE if this insn must be a tablejump entry insn. This works for
422 the MIPS port, but may give false negatives for some targets. */
423
424 static int
425 tablejump_entry_p (insn, label)
426 rtx insn, label;
427 {
428 rtx next = next_active_insn (insn);
429 enum rtx_code code = GET_CODE (PATTERN (next));
430
431 if (code != ADDR_DIFF_VEC && code != ADDR_VEC)
432 return 0;
433
434 if (PREV_INSN (next) == XEXP (label, 0))
435 return 1;
436
437 return 0;
438 }
439
440 /* Compute the branch probabilities for the various branches.
441 Annotate them accordingly. */
442
443 static void
444 compute_branch_probabilities (num_blocks, dump_file)
445 int num_blocks;
446 FILE *dump_file;
447 {
448 int i;
449 int bad_counts = 0;
450 int num_arcs;
451 int changes;
452 int passes;
453 int prob;
454 int total;
455 int num_branches;
456 int num_never_executed;
457 int hist_br_prob[20];
458 struct adj_list *arcptr;
459
460 /* For each arc not on the spanning tree, set its execution count from
461 the .da file. */
462
463 /* The first count in the .da file is the number of times that the function
464 was entered. This is the exec_count for block zero. */
465
466 num_arcs = 0;
467 for (i = 0; i < num_blocks; i++)
468 for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next)
469 if (! arcptr->on_tree)
470 {
471 num_arcs++;
472 if (da_file)
473 {
474 long value;
475 __read_long (&value, da_file, 8);
476 ARC_COUNT (arcptr) = value;
477 }
478 else
479 ARC_COUNT (arcptr) = 0;
480 arcptr->count_valid = 1;
481 bb_graph[i].succ_count--;
482 bb_graph[ARC_TARGET (arcptr)].pred_count--;
483 }
484
485 if (dump_file)
486 fprintf (dump_file, "%d arc counts read\n", num_arcs);
487
488 /* For every block in the file,
489 - if every exit/entrance arc has a known count, then set the block count
490 - if the block count is known, and every exit/entrance arc but one has
491 a known execution count, then set the count of the remaining arc
492
493 As arc counts are set, decrement the succ/pred count, but don't delete
494 the arc, that way we can easily tell when all arcs are known, or only
495 one arc is unknown. */
496
497 /* The order that the basic blocks are iterated through is important.
498 Since the code that finds spanning trees starts with block 0, low numbered
499 arcs are put on the spanning tree in preference to high numbered arcs.
500 Hence, most instrumented arcs are at the end. Graph solving works much
501 faster if we propagate numbers from the end to the start.
502
503 This takes an average of slightly more than 3 passes. */
504
505 changes = 1;
506 passes = 0;
507 while (changes)
508 {
509 passes++;
510 changes = 0;
511
512 for (i = num_blocks - 1; i >= 0; i--)
513 {
514 struct bb_info *binfo = &bb_graph[i];
515 if (! binfo->count_valid)
516 {
517 if (binfo->succ_count == 0)
518 {
519 total = 0;
520 for (arcptr = binfo->succ; arcptr;
521 arcptr = arcptr->succ_next)
522 total += ARC_COUNT (arcptr);
523 binfo->exec_count = total;
524 binfo->count_valid = 1;
525 changes = 1;
526 }
527 else if (binfo->pred_count == 0)
528 {
529 total = 0;
530 for (arcptr = binfo->pred; arcptr;
531 arcptr = arcptr->pred_next)
532 total += ARC_COUNT (arcptr);
533 binfo->exec_count = total;
534 binfo->count_valid = 1;
535 changes = 1;
536 }
537 }
538 if (binfo->count_valid)
539 {
540 if (binfo->succ_count == 1)
541 {
542 total = 0;
543 /* One of the counts will be invalid, but it is zero,
544 so adding it in also doesn't hurt. */
545 for (arcptr = binfo->succ; arcptr;
546 arcptr = arcptr->succ_next)
547 total += ARC_COUNT (arcptr);
548 /* Calculate count for remaining arc by conservation. */
549 total = binfo->exec_count - total;
550 /* Search for the invalid arc, and set its count. */
551 for (arcptr = binfo->succ; arcptr;
552 arcptr = arcptr->succ_next)
553 if (! arcptr->count_valid)
554 break;
555 if (! arcptr)
556 abort ();
557 arcptr->count_valid = 1;
558 ARC_COUNT (arcptr) = total;
559 binfo->succ_count--;
560
561 bb_graph[ARC_TARGET (arcptr)].pred_count--;
562 changes = 1;
563 }
564 if (binfo->pred_count == 1)
565 {
566 total = 0;
567 /* One of the counts will be invalid, but it is zero,
568 so adding it in also doesn't hurt. */
569 for (arcptr = binfo->pred; arcptr;
570 arcptr = arcptr->pred_next)
571 total += ARC_COUNT (arcptr);
572 /* Calculate count for remaining arc by conservation. */
573 total = binfo->exec_count - total;
574 /* Search for the invalid arc, and set its count. */
575 for (arcptr = binfo->pred; arcptr;
576 arcptr = arcptr->pred_next)
577 if (! arcptr->count_valid)
578 break;
579 if (! arcptr)
580 abort ();
581 arcptr->count_valid = 1;
582 ARC_COUNT (arcptr) = total;
583 binfo->pred_count--;
584
585 bb_graph[ARC_SOURCE (arcptr)].succ_count--;
586 changes = 1;
587 }
588 }
589 }
590 }
591
592 total_num_passes += passes;
593 if (dump_file)
594 fprintf (dump_file, "Graph solving took %d passes.\n\n", passes);
595
596 /* If the graph has been correctly solved, every block will have a
597 succ and pred count of zero. */
598 for (i = 0; i < num_blocks; i++)
599 {
600 struct bb_info *binfo = &bb_graph[i];
601 if (binfo->succ_count || binfo->pred_count)
602 abort ();
603 }
604
605 /* For every arc, calculate its branch probability and add a reg_note
606 to the branch insn to indicate this. */
607
608 for (i = 0; i < 20; i++)
609 hist_br_prob[i] = 0;
610 num_never_executed = 0;
611 num_branches = 0;
612
613 for (i = 0; i < num_blocks; i++)
614 {
615 struct bb_info *binfo = &bb_graph[i];
616
617 total = binfo->exec_count;
618 for (arcptr = binfo->succ; arcptr; arcptr = arcptr->succ_next)
619 {
620 if (arcptr->branch_insn)
621 {
622 /* This calculates the branch probability as an integer between
623 0 and REG_BR_PROB_BASE, properly rounded to the nearest
624 integer. Perform the arithmetic in double to avoid
625 overflowing the range of ints. */
626
627 if (total == 0)
628 prob = -1;
629 else
630 {
631 rtx pat = PATTERN (arcptr->branch_insn);
632
633 prob = (((double)ARC_COUNT (arcptr) * REG_BR_PROB_BASE)
634 + (total >> 1)) / total;
635 if (prob < 0 || prob > REG_BR_PROB_BASE)
636 {
637 if (dump_file)
638 fprintf (dump_file, "bad count: prob for %d-%d thought to be %d (forcibly normalized)\n",
639 ARC_SOURCE (arcptr), ARC_TARGET (arcptr),
640 prob);
641
642 bad_counts = 1;
643 prob = REG_BR_PROB_BASE / 2;
644 }
645
646 /* Match up probability with JUMP pattern. */
647
648 if (GET_CODE (pat) == SET
649 && GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
650 {
651 if (ARC_TARGET (arcptr) == ARC_SOURCE (arcptr) + 1)
652 {
653 /* A fall through arc should never have a
654 branch insn. */
655 abort ();
656 }
657 else
658 {
659 /* This is the arc for the taken branch. */
660 if (GET_CODE (XEXP (SET_SRC (pat), 2)) != PC)
661 prob = REG_BR_PROB_BASE - prob;
662 }
663 }
664 }
665
666 if (prob == -1)
667 num_never_executed++;
668 else
669 {
670 int index = prob * 20 / REG_BR_PROB_BASE;
671 if (index == 20)
672 index = 19;
673 hist_br_prob[index]++;
674 }
675 num_branches++;
676
677 REG_NOTES (arcptr->branch_insn)
678 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
679 REG_NOTES (arcptr->branch_insn));
680 }
681 }
682
683 /* Add a REG_EXEC_COUNT note to the first instruction of this block. */
684 if (! binfo->first_insn
685 || GET_RTX_CLASS (GET_CODE (binfo->first_insn)) != 'i')
686 {
687 /* Block 0 is a fake block representing function entry, and does
688 not have a real first insn. The second last block might not
689 begin with a real insn. */
690 if (i == num_blocks - 1)
691 return_label_execution_count = total;
692 else if (i != 0 && i != num_blocks - 2)
693 abort ();
694 }
695 else
696 {
697 REG_NOTES (binfo->first_insn)
698 = gen_rtx_EXPR_LIST (REG_EXEC_COUNT, GEN_INT (total),
699 REG_NOTES (binfo->first_insn));
700 if (i == num_blocks - 1)
701 return_label_execution_count = total;
702 }
703 }
704
705 /* This should never happen. */
706 if (bad_counts)
707 warning ("Arc profiling: some arc counts were bad.");
708
709 if (dump_file)
710 {
711 fprintf (dump_file, "%d branches\n", num_branches);
712 fprintf (dump_file, "%d branches never executed\n",
713 num_never_executed);
714 if (num_branches)
715 for (i = 0; i < 10; i++)
716 fprintf (dump_file, "%d%% branches in range %d-%d%%\n",
717 (hist_br_prob[i]+hist_br_prob[19-i])*100/num_branches,
718 5*i, 5*i+5);
719
720 total_num_branches += num_branches;
721 total_num_never_executed += num_never_executed;
722 for (i = 0; i < 20; i++)
723 total_hist_br_prob[i] += hist_br_prob[i];
724 }
725 }
726
727 /* Instrument and/or analyze program behavior based on program flow graph.
728 In either case, this function builds a flow graph for the function being
729 compiled. The flow graph is stored in BB_GRAPH.
730
731 When FLAG_PROFILE_ARCS is nonzero, this function instruments the arcs in
732 the flow graph that are needed to reconstruct the dynamic behavior of the
733 flow graph.
734
735 When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
736 information from a data file containing arc count information from previous
737 executions of the function being compiled. In this case, the flow graph is
738 annotated with actual execution counts, which are later propagated into the
739 rtl for optimization purposes.
740
741 Main entry point of this file. */
742
743 void
744 branch_prob (f, dump_file)
745 rtx f;
746 FILE *dump_file;
747 {
748 int i, num_blocks;
749 struct adj_list *arcptr;
750 int num_arcs;
751
752 /* start of a function. */
753 if (flag_test_coverage)
754 output_gcov_string (current_function_name, (long) -2);
755
756 /* Execute this only if doing arc profiling or branch probabilities. */
757 if (! profile_arc_flag && ! flag_branch_probabilities
758 && ! flag_test_coverage)
759 abort ();
760
761 total_num_times_called++;
762
763 /* Create an array label_to_bb of ints of size max_label_num. */
764 label_to_bb_size = max_label_num ();
765 label_to_bb = (int *) oballoc (label_to_bb_size * sizeof (int));
766 bzero ((char *) label_to_bb, label_to_bb_size * sizeof (int));
767
768 /* Scan the insns in the function, count the number of basic blocks
769 present. When a code label is passed, set label_to_bb[label] = bb
770 number. */
771
772 /* The first block found will be block 1, so that function entry can be
773 block 0. */
774
775 {
776 register RTX_CODE prev_code = JUMP_INSN;
777 register RTX_CODE code;
778 register rtx insn;
779 register int i;
780 int block_separator_emitted = 0;
781
782 ignore_next_note = 0;
783
784 for (insn = NEXT_INSN (f), i = 0; insn; insn = NEXT_INSN (insn))
785 {
786 code = GET_CODE (insn);
787
788 if (code == BARRIER)
789 ;
790 else if (code == CODE_LABEL)
791 /* This label is part of the next block, but we can't increment
792 block number yet since there might be multiple labels. */
793 label_to_bb[CODE_LABEL_NUMBER (insn)] = i + 1;
794 /* We make NOTE_INSN_SETJMP notes into a block of their own, so that
795 they can be the target of the fake arc for the setjmp call.
796 This avoids creating cycles of fake arcs, which would happen if
797 the block after the setjmp call contained a call insn. */
798 else if ((prev_code == JUMP_INSN || prev_code == CALL_INSN
799 || prev_code == CODE_LABEL || prev_code == BARRIER)
800 && (GET_RTX_CLASS (code) == 'i'
801 || (code == NOTE
802 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)))
803 {
804 i += 1;
805
806 /* Emit the block separator if it hasn't already been emitted. */
807 if (flag_test_coverage && ! block_separator_emitted)
808 {
809 /* Output a zero to the .bb file to indicate that a new
810 block list is starting. */
811 __write_long (0, bb_file, 4);
812 }
813 block_separator_emitted = 0;
814 }
815 /* If flag_test_coverage is true, then we must add an entry to the
816 .bb file for every note. */
817 else if (code == NOTE && flag_test_coverage)
818 {
819 /* Must ignore the line number notes that immediately follow the
820 end of an inline function to avoid counting it twice. There
821 is a note before the call, and one after the call. */
822 if (NOTE_LINE_NUMBER (insn) == NOTE_REPEATED_LINE_NUMBER)
823 ignore_next_note = 1;
824 else if (NOTE_LINE_NUMBER (insn) > 0)
825 {
826 if (ignore_next_note)
827 ignore_next_note = 0;
828 else
829 {
830 /* Emit a block separator here to ensure that a NOTE
831 immediately following a JUMP_INSN or CALL_INSN will end
832 up in the right basic block list. */
833 if ((prev_code == JUMP_INSN || prev_code == CALL_INSN
834 || prev_code == CODE_LABEL || prev_code == BARRIER)
835 && ! block_separator_emitted)
836 {
837 /* Output a zero to the .bb file to indicate that
838 a new block list is starting. */
839 __write_long (0, bb_file, 4);
840
841 block_separator_emitted = 1;
842 }
843
844 /* If this is a new source file, then output the file's
845 name to the .bb file. */
846 if (! last_bb_file_name
847 || strcmp (NOTE_SOURCE_FILE (insn),
848 last_bb_file_name))
849 {
850 if (last_bb_file_name)
851 free (last_bb_file_name);
852 last_bb_file_name = xstrdup (NOTE_SOURCE_FILE (insn));
853 output_gcov_string (NOTE_SOURCE_FILE (insn), (long)-1);
854 }
855
856 /* Output the line number to the .bb file. Must be done
857 after the output_bb_profile_data() call, and after the
858 file name is written, to ensure that it is correctly
859 handled by gcov. */
860 __write_long (NOTE_LINE_NUMBER (insn), bb_file, 4);
861 }
862 }
863 }
864
865 if (code != NOTE)
866 prev_code = code;
867 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
868 prev_code = CALL_INSN;
869 }
870
871 /* Allocate last `normal' entry for bb_graph. */
872
873 /* The last insn was a jump, call, or label. In that case we have
874 a block at the end of the function with no insns. */
875 if (prev_code == JUMP_INSN || prev_code == CALL_INSN
876 || prev_code == CODE_LABEL || prev_code == BARRIER)
877 {
878 i++;
879
880 /* Emit the block separator if it hasn't already been emitted. */
881 if (flag_test_coverage && ! block_separator_emitted)
882 {
883 /* Output a zero to the .bb file to indicate that a new
884 block list is starting. */
885 __write_long (0, bb_file, 4);
886 }
887 }
888
889 /* Create another block to stand for EXIT, and make all return insns, and
890 the last basic block point here. Add one more to account for block
891 zero. */
892 num_blocks = i + 2;
893 }
894
895 total_num_blocks += num_blocks;
896 if (dump_file)
897 fprintf (dump_file, "%d basic blocks\n", num_blocks);
898
899 /* If we are only doing test coverage here, then return now. */
900 if (! profile_arc_flag && ! flag_branch_probabilities)
901 return;
902
903 /* Create and initialize the arrays that will hold bb_graph
904 and execution count info. */
905
906 bb_graph = (struct bb_info *) xcalloc (num_blocks,
907 sizeof (struct bb_info));
908
909 {
910 /* Scan the insns again:
911 - at the entry to each basic block, increment the predecessor count
912 (and successor of previous block) if it is a fall through entry,
913 create adj_list entries for this and the previous block
914 - at each jump insn, increment predecessor/successor counts for
915 target/source basic blocks, add this insn to pred/succ lists.
916
917 This also cannot be broken out as a separate subroutine
918 because it uses `alloca'. */
919
920 register RTX_CODE prev_code = JUMP_INSN;
921 register RTX_CODE code;
922 register rtx insn;
923 register int i;
924 int fall_through = 0;
925 struct adj_list *arcptr;
926 int dest = 0;
927
928 /* Block 0 always falls through to block 1. */
929 num_arcs = 0;
930 arcptr = (struct adj_list *) alloca (sizeof (struct adj_list));
931 init_arc (arcptr, 0, 1, 0);
932 arcptr->fall_through = 1;
933 num_arcs++;
934
935 /* Add a fake fall through arc from the last block to block 0, to make the
936 graph complete. */
937 arcptr = (struct adj_list *) alloca (sizeof (struct adj_list));
938 init_arc (arcptr, num_blocks - 1, 0, 0);
939 arcptr->fake = 1;
940 num_arcs++;
941
942 /* Exit must be one node of the graph, and all exits from the function
943 must point there. When see a return branch, must point the arc to the
944 exit node. */
945
946 /* Must start scan with second insn in function as above. */
947 for (insn = NEXT_INSN (f), i = 0; insn; insn = NEXT_INSN (insn))
948 {
949 code = GET_CODE (insn);
950
951 if (code == BARRIER)
952 fall_through = 0;
953 else if (code == CODE_LABEL)
954 ;
955 /* We make NOTE_INSN_SETJMP notes into a block of their own, so that
956 they can be the target of the fake arc for the setjmp call.
957 This avoids creating cycles of fake arcs, which would happen if
958 the block after the setjmp call ended with a call. */
959 else if ((prev_code == JUMP_INSN || prev_code == CALL_INSN
960 || prev_code == CODE_LABEL || prev_code == BARRIER)
961 && (GET_RTX_CLASS (code) == 'i'
962 || (code == NOTE
963 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)))
964 {
965 /* This is the first insn of the block. */
966 i += 1;
967 if (fall_through)
968 {
969 arcptr = (struct adj_list *) alloca (sizeof (struct adj_list));
970 init_arc (arcptr, i - 1, i, 0);
971 arcptr->fall_through = 1;
972
973 num_arcs++;
974 }
975 fall_through = 1;
976 bb_graph[i].first_insn = insn;
977 }
978 else if (code == NOTE)
979 {;}
980
981 if (code == CALL_INSN)
982 {
983 /* In the normal case, the call returns, and this is just like
984 a branch fall through. */
985 fall_through = 1;
986
987 /* Setjmp may return more times than called, so to make the graph
988 solvable, add a fake arc from the function entrance to the
989 next block.
990
991 All other functions may return fewer times than called (if
992 a descendent call longjmp or exit), so to make the graph
993 solvable, add a fake arc to the function exit from the
994 current block.
995
996 Distinguish the cases by checking for a SETJUMP note.
997 A call_insn can be the last ins of a function, so must check
998 to see if next insn actually exists. */
999 arcptr = (struct adj_list *) alloca (sizeof (struct adj_list));
1000 if (NEXT_INSN (insn)
1001 && GET_CODE (NEXT_INSN (insn)) == NOTE
1002 && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP)
1003 init_arc (arcptr, 0, i+1, insn);
1004 else
1005 init_arc (arcptr, i, num_blocks-1, insn);
1006 arcptr->fake = 1;
1007 num_arcs++;
1008 }
1009 else if (code == JUMP_INSN)
1010 {
1011 rtx tem, pattern = PATTERN (insn);
1012 rtx tablejump = 0;
1013
1014 /* If running without optimization, then jump label won't be valid,
1015 so we must search for the destination label in that case.
1016 We have to handle tablejumps and returns specially anyways, so
1017 we don't check the JUMP_LABEL at all here. */
1018
1019 /* ??? This code should be rewritten. We need a more elegant way
1020 to find the LABEL_REF. We need a more elegant way to
1021 differentiate tablejump entries from computed gotos.
1022 We should perhaps reuse code from flow to compute the CFG
1023 instead of trying to compute it here.
1024
1025 We can't use current_function_has_computed_jump, because that
1026 is calculated later by flow. We can't use computed_jump_p,
1027 because that returns true for tablejump entry insns for some
1028 targets, e.g. HPPA and MIPS. */
1029
1030 if (GET_CODE (pattern) == PARALLEL)
1031 {
1032 /* This assumes that PARALLEL jumps with a USE are
1033 tablejump entry jumps. The same assumption can be found
1034 in computed_jump_p. */
1035 /* Make an arc from this jump to the label of the
1036 jump table. This will instrument the number of
1037 times the switch statement is executed. */
1038 if (GET_CODE (XVECEXP (pattern, 0, 1)) == USE)
1039 {
1040 tem = XEXP (XVECEXP (pattern, 0, 1), 0);
1041 if (GET_CODE (tem) != LABEL_REF)
1042 abort ();
1043 dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (tem, 0))];
1044 }
1045 else if (GET_CODE (XVECEXP (pattern, 0, 0)) == SET
1046 && SET_DEST (XVECEXP (pattern, 0, 0)) == pc_rtx)
1047 {
1048 tem = SET_SRC (XVECEXP (pattern, 0, 0));
1049 if (GET_CODE (tem) == PLUS
1050 && GET_CODE (XEXP (tem, 1)) == LABEL_REF)
1051 {
1052 tem = XEXP (tem, 1);
1053 dest = label_to_bb [CODE_LABEL_NUMBER (XEXP (tem, 0))];
1054 }
1055 }
1056 else
1057 abort ();
1058 }
1059 else if (GET_CODE (pattern) == ADDR_VEC
1060 || GET_CODE (pattern) == ADDR_DIFF_VEC)
1061 tablejump = pattern;
1062 else if (GET_CODE (pattern) == RETURN)
1063 dest = num_blocks - 1;
1064 else if (GET_CODE (pattern) != SET)
1065 abort ();
1066 else if ((tem = SET_SRC (pattern))
1067 && GET_CODE (tem) == LABEL_REF)
1068 dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (tem, 0))];
1069 /* Recognize HPPA table jump entry. This code is similar to
1070 the code above in the PARALLEL case. */
1071 else if (GET_CODE (tem) == PLUS
1072 && GET_CODE (XEXP (tem, 0)) == MEM
1073 && GET_CODE (XEXP (XEXP (tem, 0), 0)) == PLUS
1074 && GET_CODE (XEXP (XEXP (XEXP (tem, 0), 0), 0)) == PC
1075 && GET_CODE (XEXP (tem, 1)) == LABEL_REF
1076 && tablejump_entry_p (insn, XEXP (tem, 1)))
1077 dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (XEXP (tem, 1), 0))];
1078 /* Recognize the MIPS table jump entry. */
1079 else if (GET_CODE (tem) == PLUS
1080 && GET_CODE (XEXP (tem, 0)) == REG
1081 && GET_CODE (XEXP (tem, 1)) == LABEL_REF
1082 && tablejump_entry_p (insn, XEXP (tem, 1)))
1083 dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (XEXP (tem, 1), 0))];
1084 else
1085 {
1086 rtx label_ref;
1087
1088 /* Must be an IF_THEN_ELSE branch. If it isn't, assume it
1089 is a computed goto, which aren't supported yet. */
1090 if (GET_CODE (tem) != IF_THEN_ELSE)
1091 fatal ("-fprofile-arcs does not support computed gotos");
1092 if (XEXP (tem, 1) != pc_rtx)
1093 label_ref = XEXP (tem, 1);
1094 else
1095 label_ref = XEXP (tem, 2);
1096 dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (label_ref, 0))];
1097 }
1098
1099 if (tablejump)
1100 {
1101 int diff_vec_p = GET_CODE (tablejump) == ADDR_DIFF_VEC;
1102 int len = XVECLEN (tablejump, diff_vec_p);
1103 int k;
1104
1105 for (k = 0; k < len; k++)
1106 {
1107 rtx tem = XEXP (XVECEXP (tablejump, diff_vec_p, k), 0);
1108 dest = label_to_bb[CODE_LABEL_NUMBER (tem)];
1109
1110 arcptr = (struct adj_list *) alloca (sizeof(struct adj_list));
1111 init_arc (arcptr, i, dest, insn);
1112
1113 num_arcs++;
1114 }
1115 }
1116 else
1117 {
1118 arcptr = (struct adj_list *) alloca (sizeof (struct adj_list));
1119 init_arc (arcptr, i, dest, insn);
1120
1121 num_arcs++;
1122 }
1123
1124 /* Determine whether or not this jump will fall through.
1125 Unconditional jumps and returns are not always followed by
1126 barriers. */
1127 pattern = PATTERN (insn);
1128 if (GET_CODE (pattern) == PARALLEL
1129 || GET_CODE (pattern) == RETURN)
1130 fall_through = 0;
1131 else if (GET_CODE (pattern) == ADDR_VEC
1132 || GET_CODE (pattern) == ADDR_DIFF_VEC)
1133 /* These aren't actually jump insns, but they never fall
1134 through, so... */
1135 fall_through = 0;
1136 else
1137 {
1138 if (GET_CODE (pattern) != SET || SET_DEST (pattern) != pc_rtx)
1139 abort ();
1140 if (GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
1141 fall_through = 0;
1142 }
1143 }
1144
1145 if (code != NOTE)
1146 prev_code = code;
1147 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
1148 {
1149 /* Make a fake insn to tag our notes on. */
1150 bb_graph[i].first_insn = insn
1151 = emit_insn_after (gen_rtx_USE (VOIDmode, stack_pointer_rtx),
1152 insn);
1153 prev_code = CALL_INSN;
1154 }
1155 }
1156
1157 /* If the code at the end of the function would give a new block, then
1158 do the following. */
1159
1160 if (prev_code == JUMP_INSN || prev_code == CALL_INSN
1161 || prev_code == CODE_LABEL || prev_code == BARRIER)
1162 {
1163 if (fall_through)
1164 {
1165 arcptr = (struct adj_list *) alloca (sizeof (struct adj_list));
1166 init_arc (arcptr, i, i + 1, 0);
1167 arcptr->fall_through = 1;
1168
1169 num_arcs++;
1170 }
1171
1172 /* This may not be a real insn, but that should not cause a problem. */
1173 bb_graph[i+1].first_insn = get_last_insn ();
1174 }
1175
1176 /* There is always a fake arc from the last block of the function
1177 to the function exit block. */
1178 arcptr = (struct adj_list *) alloca (sizeof (struct adj_list));
1179 init_arc (arcptr, num_blocks-2, num_blocks-1, 0);
1180 arcptr->fake = 1;
1181 num_arcs++;
1182 }
1183
1184 total_num_arcs += num_arcs;
1185 if (dump_file)
1186 fprintf (dump_file, "%d arcs\n", num_arcs);
1187
1188 /* Create spanning tree from basic block graph, mark each arc that is
1189 on the spanning tree. */
1190
1191 /* To reduce the instrumentation cost, make two passes over the tree.
1192 First, put as many must-split (crowded and fake) arcs on the tree as
1193 possible, then on the second pass fill in the rest of the tree.
1194 Note that the spanning tree is considered undirected, so that as many
1195 must-split arcs as possible can be put on it.
1196
1197 Fallthrough arcs which are crowded should not be chosen on the first
1198 pass, since they do not require creating a new basic block. These
1199 arcs will have fall_through set. */
1200
1201 find_spanning_tree (num_blocks);
1202
1203 /* Create a .bbg file from which gcov can reconstruct the basic block
1204 graph. First output the number of basic blocks, and then for every
1205 arc output the source and target basic block numbers.
1206 NOTE: The format of this file must be compatible with gcov. */
1207
1208 if (flag_test_coverage)
1209 {
1210 int flag_bits;
1211
1212 __write_long (num_blocks, bbg_file, 4);
1213 __write_long (num_arcs, bbg_file, 4);
1214
1215 for (i = 0; i < num_blocks; i++)
1216 {
1217 long count = 0;
1218 for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next)
1219 count++;
1220 __write_long (count, bbg_file, 4);
1221
1222 for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next)
1223 {
1224 flag_bits = 0;
1225 if (arcptr->on_tree)
1226 flag_bits |= 0x1;
1227 if (arcptr->fake)
1228 flag_bits |= 0x2;
1229 if (arcptr->fall_through)
1230 flag_bits |= 0x4;
1231
1232 __write_long (ARC_TARGET (arcptr), bbg_file, 4);
1233 __write_long (flag_bits, bbg_file, 4);
1234 }
1235 }
1236
1237 /* Emit a -1 to separate the list of all arcs from the list of
1238 loop back edges that follows. */
1239 __write_long (-1, bbg_file, 4);
1240 }
1241
1242 /* For each arc not on the spanning tree, add counting code as rtl. */
1243
1244 if (profile_arc_flag)
1245 {
1246 instrument_arcs (f, num_blocks, dump_file);
1247 allocate_reg_info (max_reg_num (), FALSE, FALSE);
1248 }
1249
1250 /* Execute the rest only if doing branch probabilities. */
1251 if (flag_branch_probabilities)
1252 compute_branch_probabilities (num_blocks, dump_file);
1253
1254 /* Clean up. */
1255 free (bb_graph);
1256 }
1257 \f
1258 /* Initialize a new arc.
1259 ARCPTR is the empty adj_list this function fills in.
1260 SOURCE is the block number of the source block.
1261 TARGET is the block number of the target block.
1262 INSN is the insn which transfers control from SOURCE to TARGET,
1263 or zero if the transfer is implicit. */
1264
1265 static void
1266 init_arc (arcptr, source, target, insn)
1267 struct adj_list *arcptr;
1268 int source, target;
1269 rtx insn;
1270 {
1271 ARC_TARGET (arcptr) = target;
1272 ARC_SOURCE (arcptr) = source;
1273
1274 ARC_COUNT (arcptr) = 0;
1275 arcptr->count_valid = 0;
1276 arcptr->on_tree = 0;
1277 arcptr->fake = 0;
1278 arcptr->fall_through = 0;
1279 arcptr->branch_insn = insn;
1280
1281 arcptr->succ_next = bb_graph[source].succ;
1282 bb_graph[source].succ = arcptr;
1283 bb_graph[source].succ_count++;
1284
1285 arcptr->pred_next = bb_graph[target].pred;
1286 bb_graph[target].pred = arcptr;
1287 bb_graph[target].pred_count++;
1288 }
1289
1290 /* This function searches all of the arcs in the program flow graph, and puts
1291 as many bad arcs as possible onto the spanning tree. Bad arcs include
1292 fake arcs (needed for setjmp(), longjmp(), exit()) which MUST be on the
1293 spanning tree as they can't be instrumented. Also, arcs which must be
1294 split when instrumented should be part of the spanning tree if possible. */
1295
1296 static void
1297 find_spanning_tree (num_blocks)
1298 int num_blocks;
1299 {
1300 int i;
1301 struct adj_list *arcptr;
1302 struct bb_info *binfo = &bb_graph[0];
1303
1304 /* Fake arcs must be part of the spanning tree, and are always safe to put
1305 on the spanning tree. Fake arcs will either be a successor of node 0,
1306 a predecessor of the last node, or from the last node to node 0. */
1307
1308 for (arcptr = bb_graph[0].succ; arcptr; arcptr = arcptr->succ_next)
1309 if (arcptr->fake)
1310 {
1311 /* Adding this arc should never cause a cycle. This is a fatal
1312 error if it would. */
1313 if (bb_graph[ARC_TARGET (arcptr)].on_tree && binfo->on_tree)
1314 abort();
1315 else
1316 {
1317 arcptr->on_tree = 1;
1318 bb_graph[ARC_TARGET (arcptr)].on_tree = 1;
1319 binfo->on_tree = 1;
1320 }
1321 }
1322
1323 binfo = &bb_graph[num_blocks-1];
1324 for (arcptr = binfo->pred; arcptr; arcptr = arcptr->pred_next)
1325 if (arcptr->fake)
1326 {
1327 /* Adding this arc should never cause a cycle. This is a fatal
1328 error if it would. */
1329 if (bb_graph[ARC_SOURCE (arcptr)].on_tree && binfo->on_tree)
1330 abort();
1331 else
1332 {
1333 arcptr->on_tree = 1;
1334 bb_graph[ARC_SOURCE (arcptr)].on_tree = 1;
1335 binfo->on_tree = 1;
1336 }
1337 }
1338 /* The only entrace to node zero is a fake arc. */
1339 bb_graph[0].pred->on_tree = 1;
1340
1341 /* Arcs which are crowded at both the source and target should be put on
1342 the spanning tree if possible, except for fall_throuch arcs which never
1343 require adding a new block even if crowded, add arcs with the same source
1344 and dest which must always be instrumented. */
1345 for (i = 0; i < num_blocks; i++)
1346 {
1347 binfo = &bb_graph[i];
1348
1349 for (arcptr = binfo->succ; arcptr; arcptr = arcptr->succ_next)
1350 if (! ((binfo->succ == arcptr && arcptr->succ_next == 0)
1351 || (bb_graph[ARC_TARGET (arcptr)].pred
1352 && arcptr->pred_next == 0))
1353 && ! arcptr->fall_through
1354 && ARC_TARGET (arcptr) != i)
1355 {
1356 /* This is a crowded arc at both source and target. Try to put
1357 in on the spanning tree. Can do this if either the source or
1358 target block is not yet on the tree. */
1359 if (! bb_graph[ARC_TARGET (arcptr)].on_tree || ! binfo->on_tree)
1360 {
1361 arcptr->on_tree = 1;
1362 bb_graph[ARC_TARGET (arcptr)].on_tree = 1;
1363 binfo->on_tree = 1;
1364 }
1365 }
1366 }
1367
1368 /* Clear all of the basic block on_tree bits, so that we can use them to
1369 create the spanning tree. */
1370 for (i = 0; i < num_blocks; i++)
1371 bb_graph[i].on_tree = 0;
1372
1373 /* Now fill in the spanning tree until every basic block is on it.
1374 Don't put the 0 to 1 fall through arc on the tree, since it is
1375 always cheap to instrument, so start filling the tree from node 1. */
1376
1377 for (i = 1; i < num_blocks; i++)
1378 for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next)
1379 if (! arcptr->on_tree
1380 && ! bb_graph[ARC_TARGET (arcptr)].on_tree)
1381 {
1382 fill_spanning_tree (i);
1383 break;
1384 }
1385 }
1386
1387 /* Add arcs reached from BLOCK to the spanning tree if they are needed and
1388 not already there. */
1389
1390 static void
1391 fill_spanning_tree (block)
1392 int block;
1393 {
1394 struct adj_list *arcptr;
1395
1396 expand_spanning_tree (block);
1397
1398 for (arcptr = bb_graph[block].succ; arcptr; arcptr = arcptr->succ_next)
1399 if (! arcptr->on_tree
1400 && ! bb_graph[ARC_TARGET (arcptr)].on_tree)
1401 {
1402 arcptr->on_tree = 1;
1403 fill_spanning_tree (ARC_TARGET (arcptr));
1404 }
1405 }
1406
1407 /* When first visit a block, must add all blocks that are already connected
1408 to this block via tree arcs to the spanning tree. */
1409
1410 static void
1411 expand_spanning_tree (block)
1412 int block;
1413 {
1414 struct adj_list *arcptr;
1415
1416 bb_graph[block].on_tree = 1;
1417
1418 for (arcptr = bb_graph[block].succ; arcptr; arcptr = arcptr->succ_next)
1419 if (arcptr->on_tree && ! bb_graph[ARC_TARGET (arcptr)].on_tree)
1420 expand_spanning_tree (ARC_TARGET (arcptr));
1421
1422 for (arcptr = bb_graph[block].pred;
1423 arcptr; arcptr = arcptr->pred_next)
1424 if (arcptr->on_tree && ! bb_graph[ARC_SOURCE (arcptr)].on_tree)
1425 expand_spanning_tree (ARC_SOURCE (arcptr));
1426 }
1427 \f
1428 /* Perform file-level initialization for branch-prob processing. */
1429
1430 void
1431 init_branch_prob (filename)
1432 const char *filename;
1433 {
1434 long len;
1435 int i;
1436
1437 if (flag_test_coverage)
1438 {
1439 /* Open an output file for the basic block/line number map. */
1440 int len = strlen (filename);
1441 char *data_file = (char *) alloca (len + 4);
1442 strcpy (data_file, filename);
1443 strip_off_ending (data_file, len);
1444 strcat (data_file, ".bb");
1445 if ((bb_file = fopen (data_file, "wb")) == 0)
1446 pfatal_with_name (data_file);
1447
1448 /* Open an output file for the program flow graph. */
1449 len = strlen (filename);
1450 bbg_file_name = (char *) alloca (len + 5);
1451 strcpy (bbg_file_name, filename);
1452 strip_off_ending (bbg_file_name, len);
1453 strcat (bbg_file_name, ".bbg");
1454 if ((bbg_file = fopen (bbg_file_name, "wb")) == 0)
1455 pfatal_with_name (bbg_file_name);
1456
1457 /* Initialize to zero, to ensure that the first file name will be
1458 written to the .bb file. */
1459 last_bb_file_name = 0;
1460 }
1461
1462 if (flag_branch_probabilities)
1463 {
1464 len = strlen (filename);
1465 da_file_name = (char *) alloca (len + 4);
1466 strcpy (da_file_name, filename);
1467 strip_off_ending (da_file_name, len);
1468 strcat (da_file_name, ".da");
1469 if ((da_file = fopen (da_file_name, "rb")) == 0)
1470 warning ("file %s not found, execution counts assumed to be zero.",
1471 da_file_name);
1472
1473 /* The first word in the .da file gives the number of instrumented arcs,
1474 which is not needed for our purposes. */
1475
1476 if (da_file)
1477 __read_long (&len, da_file, 8);
1478 }
1479
1480 if (profile_arc_flag)
1481 init_arc_profiler ();
1482
1483 total_num_blocks = 0;
1484 total_num_arcs = 0;
1485 total_num_arcs_instrumented = 0;
1486 total_num_blocks_created = 0;
1487 total_num_passes = 0;
1488 total_num_times_called = 0;
1489 total_num_branches = 0;
1490 total_num_never_executed = 0;
1491 for (i = 0; i < 20; i++)
1492 total_hist_br_prob[i] = 0;
1493 }
1494
1495 /* Performs file-level cleanup after branch-prob processing
1496 is completed. */
1497
1498 void
1499 end_branch_prob (dump_file)
1500 FILE *dump_file;
1501 {
1502 if (flag_test_coverage)
1503 {
1504 fclose (bb_file);
1505 fclose (bbg_file);
1506 }
1507
1508 if (flag_branch_probabilities)
1509 {
1510 if (da_file)
1511 {
1512 long temp;
1513 /* This seems slightly dangerous, as it presumes the EOF
1514 flag will not be set until an attempt is made to read
1515 past the end of the file. */
1516 if (feof (da_file))
1517 warning (".da file contents exhausted too early\n");
1518 /* Should be at end of file now. */
1519 if (__read_long (&temp, da_file, 8) == 0)
1520 warning (".da file contents not exhausted\n");
1521 fclose (da_file);
1522 }
1523 }
1524
1525 if (dump_file)
1526 {
1527 fprintf (dump_file, "\n");
1528 fprintf (dump_file, "Total number of blocks: %d\n", total_num_blocks);
1529 fprintf (dump_file, "Total number of arcs: %d\n", total_num_arcs);
1530 fprintf (dump_file, "Total number of instrumented arcs: %d\n",
1531 total_num_arcs_instrumented);
1532 fprintf (dump_file, "Total number of blocks created: %d\n",
1533 total_num_blocks_created);
1534 fprintf (dump_file, "Total number of graph solution passes: %d\n",
1535 total_num_passes);
1536 if (total_num_times_called != 0)
1537 fprintf (dump_file, "Average number of graph solution passes: %d\n",
1538 (total_num_passes + (total_num_times_called >> 1))
1539 / total_num_times_called);
1540 fprintf (dump_file, "Total number of branches: %d\n", total_num_branches);
1541 fprintf (dump_file, "Total number of branches never executed: %d\n",
1542 total_num_never_executed);
1543 if (total_num_branches)
1544 {
1545 int i;
1546
1547 for (i = 0; i < 10; i++)
1548 fprintf (dump_file, "%d%% branches in range %d-%d%%\n",
1549 (total_hist_br_prob[i] + total_hist_br_prob[19-i]) * 100
1550 / total_num_branches, 5*i, 5*i+5);
1551 }
1552 }
1553 }
1554 \f
1555 /* The label used by the arc profiling code. */
1556
1557 static rtx profiler_label;
1558
1559 /* Initialize the profiler_label. */
1560
1561 static void
1562 init_arc_profiler ()
1563 {
1564 /* Generate and save a copy of this so it can be shared. */
1565 char *name = ggc_alloc_string (NULL, 20);
1566 ASM_GENERATE_INTERNAL_LABEL (name, "LPBX", 2);
1567 profiler_label = gen_rtx_SYMBOL_REF (Pmode, name);
1568 ggc_add_rtx_root (&profiler_label, 1);
1569 }
1570
1571 /* Output instructions as RTL to increment the arc execution count. */
1572
1573 static void
1574 output_arc_profiler (arcno, insert_after)
1575 int arcno;
1576 rtx insert_after;
1577 {
1578 rtx profiler_target_addr
1579 = (arcno ? plus_constant (profiler_label,
1580 LONG_TYPE_SIZE / BITS_PER_UNIT * arcno)
1581 : profiler_label);
1582 enum machine_mode mode = mode_for_size (LONG_TYPE_SIZE, MODE_INT, 0);
1583 rtx profiler_reg = gen_reg_rtx (mode);
1584 rtx address_reg = gen_reg_rtx (Pmode);
1585 rtx mem_ref, add_ref;
1586 rtx sequence;
1587
1588 /* In this case, reload can use explicitly mentioned hard registers for
1589 reloads. It is not safe to output profiling code between a call
1590 and the instruction that copies the result to a pseudo-reg. This
1591 is because reload may allocate one of the profiling code pseudo-regs
1592 to the return value reg, thus clobbering the return value. So we
1593 must check for calls here, and emit the profiling code after the
1594 instruction that uses the return value, if any.
1595
1596 ??? The code here performs the same tests that reload does so hopefully
1597 all the bases are covered. */
1598
1599 if (SMALL_REGISTER_CLASSES
1600 && GET_CODE (insert_after) == CALL_INSN
1601 && (GET_CODE (PATTERN (insert_after)) == SET
1602 || (GET_CODE (PATTERN (insert_after)) == PARALLEL
1603 && GET_CODE (XVECEXP (PATTERN (insert_after), 0, 0)) == SET)))
1604 {
1605 rtx return_reg;
1606 rtx next_insert_after = next_nonnote_insn (insert_after);
1607
1608 /* The first insn after the call may be a stack pop, skip it. */
1609 if (next_insert_after
1610 && GET_CODE (next_insert_after) == INSN
1611 && GET_CODE (PATTERN (next_insert_after)) == SET
1612 && SET_DEST (PATTERN (next_insert_after)) == stack_pointer_rtx)
1613 next_insert_after = next_nonnote_insn (next_insert_after);
1614
1615 if (next_insert_after
1616 && GET_CODE (next_insert_after) == INSN)
1617 {
1618 if (GET_CODE (PATTERN (insert_after)) == SET)
1619 return_reg = SET_DEST (PATTERN (insert_after));
1620 else
1621 return_reg = SET_DEST (XVECEXP (PATTERN (insert_after), 0, 0));
1622
1623 /* Now, NEXT_INSERT_AFTER may be an instruction that uses the
1624 return value. However, it could also be something else,
1625 like a CODE_LABEL, so check that the code is INSN. */
1626 if (next_insert_after != 0
1627 && GET_RTX_CLASS (GET_CODE (next_insert_after)) == 'i'
1628 && reg_referenced_p (return_reg, PATTERN (next_insert_after)))
1629 insert_after = next_insert_after;
1630 }
1631 }
1632
1633 start_sequence ();
1634
1635 emit_move_insn (address_reg, profiler_target_addr);
1636 mem_ref = gen_rtx_MEM (mode, address_reg);
1637 emit_move_insn (profiler_reg, mem_ref);
1638
1639 add_ref = gen_rtx_PLUS (mode, profiler_reg, GEN_INT (1));
1640 emit_move_insn (profiler_reg, add_ref);
1641
1642 /* This is the same rtx as above, but it is not legal to share this rtx. */
1643 mem_ref = gen_rtx_MEM (mode, address_reg);
1644 emit_move_insn (mem_ref, profiler_reg);
1645
1646 sequence = gen_sequence ();
1647 end_sequence ();
1648 emit_insn_after (sequence, insert_after);
1649 }
1650
1651 /* Output code for a constructor that will invoke __bb_init_func, if
1652 this has not already been done. */
1653
1654 void
1655 output_func_start_profiler ()
1656 {
1657 tree fnname, fndecl;
1658 char *name, *cfnname;
1659 rtx table_address;
1660 enum machine_mode mode = mode_for_size (LONG_TYPE_SIZE, MODE_INT, 0);
1661 int save_flag_inline_functions = flag_inline_functions;
1662
1663 /* It's either already been output, or we don't need it because we're
1664 not doing profile-arcs. */
1665 if (! need_func_profiler)
1666 return;
1667
1668 need_func_profiler = 0;
1669
1670 /* Synthesize a constructor function to invoke __bb_init_func with a
1671 pointer to this object file's profile block. */
1672
1673 /* Try and make a unique name given the "file function name".
1674
1675 And no, I don't like this either. */
1676
1677 fnname = get_file_function_name ('I');
1678 cfnname = IDENTIFIER_POINTER (fnname);
1679 name = xmalloc (strlen (cfnname) + 5);
1680 sprintf (name, "%sGCOV",cfnname);
1681 fnname = get_identifier (name);
1682 free (name);
1683
1684 fndecl = build_decl (FUNCTION_DECL, fnname,
1685 build_function_type (void_type_node, NULL_TREE));
1686 DECL_EXTERNAL (fndecl) = 0;
1687 TREE_PUBLIC (fndecl) = 1;
1688 DECL_ASSEMBLER_NAME (fndecl) = fnname;
1689 DECL_RESULT (fndecl) = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1690
1691 fndecl = pushdecl (fndecl);
1692 rest_of_decl_compilation (fndecl, 0, 1, 0);
1693 announce_function (fndecl);
1694 current_function_decl = fndecl;
1695 DECL_INITIAL (fndecl) = error_mark_node;
1696 temporary_allocation ();
1697 make_function_rtl (fndecl);
1698 init_function_start (fndecl, input_filename, lineno);
1699 pushlevel (0);
1700 expand_function_start (fndecl, 0);
1701
1702 /* Actually generate the code to call __bb_init_func. */
1703 name = ggc_alloc_string (NULL, 20);
1704 ASM_GENERATE_INTERNAL_LABEL (name, "LPBX", 0);
1705 table_address = force_reg (Pmode, gen_rtx_SYMBOL_REF (Pmode, name));
1706 emit_library_call (gen_rtx_SYMBOL_REF
1707 (Pmode, ggc_alloc_string ("__bb_init_func", 14)), 0,
1708 mode, 1, table_address, Pmode);
1709
1710 expand_function_end (input_filename, lineno, 0);
1711 poplevel (1, 0, 1);
1712
1713 /* Since fndecl isn't in the list of globals, it would never be emitted
1714 when it's considered to be 'safe' for inlining, so turn off
1715 flag_inline_functions. */
1716 flag_inline_functions = 0;
1717
1718 rest_of_compilation (fndecl);
1719
1720 /* Reset flag_inline_functions to its original value. */
1721 flag_inline_functions = save_flag_inline_functions;
1722
1723 if (! quiet_flag)
1724 fflush (asm_out_file);
1725 current_function_decl = NULL_TREE;
1726
1727 assemble_constructor (IDENTIFIER_POINTER (DECL_NAME (fndecl)));
1728 }