Makefile.in (bb-reorder.o): Add dependency on $(FIBHEAP_H).
[gcc.git] / gcc / bb-reorder.c
1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000, 2002, 2003 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* This (greedy) algorithm constructs traces in several rounds.
22 The construction starts from "seeds". The seed for the first round
23 is the entry point of function. When there are more than one seed
24 that one is selected first that has the lowest key in the heap
25 (see function bb_to_key). Then the algorithm repeatedly adds the most
26 probable successor to the end of a trace. Finally it connects the traces.
27
28 There are two parameters: Branch Threshold and Exec Threshold.
29 If the edge to a successor of the actual basic block is lower than
30 Branch Threshold or the frequency of the successor is lower than
31 Exec Threshold the successor will be the seed in one of the next rounds.
32 Each round has these parameters lower than the previous one.
33 The last round has to have these parameters set to zero
34 so that the remaining blocks are picked up.
35
36 The algorithm selects the most probable successor from all unvisited
37 successors and successors that have been added to this trace.
38 The other successors (that has not been "sent" to the next round) will be
39 other seeds for this round and the secondary traces will start in them.
40 If the successor has not been visited in this trace it is added to the trace
41 (however, there is some heuristic for simple branches).
42 If the successor has been visited in this trace the loop has been found.
43 If the loop has many iterations the loop is rotated so that the
44 source block of the most probable edge going out from the loop
45 is the last block of the trace.
46 If the loop has few iterations and there is no edge from the last block of
47 the loop going out from loop the loop header is duplicated.
48 Finally, the construction of the trace is terminated.
49
50 When connecting traces it first checks whether there is an edge from the
51 last block of one trace to the first block of another trace.
52 When there are still some unconnected traces it checks whether there exists
53 a basic block BB such that BB is a successor of the last bb of one trace
54 and BB is a predecessor of the first block of another trace. In this case,
55 BB is duplicated and the traces are connected through this duplicate.
56 The rest of traces are simply connected so there will be a jump to the
57 beginning of the rest of trace.
58
59
60 References:
61
62 "Software Trace Cache"
63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64 http://citeseer.nj.nec.com/15361.html
65
66 */
67
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "rtl.h"
73 #include "basic-block.h"
74 #include "flags.h"
75 #include "output.h"
76 #include "cfglayout.h"
77 #include "fibheap.h"
78 #include "target.h"
79
80 /* The number of rounds. */
81 #define N_ROUNDS 4
82
83 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
84 static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0};
85
86 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
87 static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0};
88
89 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
90 block the edge destination is not duplicated while connecting traces. */
91 #define DUPLICATION_THRESHOLD 100
92
93 /* Length of unconditional jump instruction. */
94 static int uncond_jump_length;
95
96 /* Structure to hold needed information for each basic block. */
97 typedef struct bbro_basic_block_data_def
98 {
99 /* Which trace is the bb start of (-1 means it is not a start of a trace). */
100 int start_of_trace;
101
102 /* Which trace is the bb end of (-1 means it is not an end of a trace). */
103 int end_of_trace;
104
105 /* Which heap is BB in (if any)? */
106 fibheap_t heap;
107
108 /* Which heap node is BB in (if any)? */
109 fibnode_t node;
110 } bbro_basic_block_data;
111
112 /* The current size of the following dynamic array. */
113 static int array_size;
114
115 /* The array which holds needed information for basic blocks. */
116 static bbro_basic_block_data *bbd;
117
118 /* To avoid frequent reallocation the size of arrays is greater than needed,
119 the number of elements is (not less than) 1.25 * size_wanted. */
120 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
121
122 /* Free the memory and set the pointer to NULL. */
123 #define FREE(P) \
124 do { if (P) { free (P); P = 0; } else { abort (); } } while (0)
125
126 /* Structure for holding information about a trace. */
127 struct trace
128 {
129 /* First and last basic block of the trace. */
130 basic_block first, last;
131
132 /* The round of the STC creation which this trace was found in. */
133 int round;
134
135 /* The length (i.e. the number of basic blocks) of the trace. */
136 int length;
137 };
138
139 /* Maximum frequency and count of one of the entry blocks. */
140 int max_entry_frequency;
141 gcov_type max_entry_count;
142
143 /* Local function prototypes. */
144 static void find_traces PARAMS ((int *, struct trace *));
145 static basic_block rotate_loop PARAMS ((edge, struct trace *, int));
146 static void mark_bb_visited PARAMS ((basic_block, int));
147 static void find_traces_1_round PARAMS ((int, int, gcov_type,
148 struct trace *, int *, int,
149 fibheap_t *));
150 static basic_block copy_bb PARAMS ((basic_block, edge,
151 basic_block, int));
152 static fibheapkey_t bb_to_key PARAMS ((basic_block));
153 static bool better_edge_p PARAMS ((basic_block, edge, int, int,
154 int, int));
155 static void connect_traces PARAMS ((int, struct trace *));
156 static bool copy_bb_p PARAMS ((basic_block, int));
157 static int get_uncond_jump_length PARAMS ((void));
158 \f
159 /* Find the traces for Software Trace Cache. Chain each trace through
160 RBI()->next. Store the number of traces to N_TRACES and description of
161 traces to TRACES. */
162
163 static void
164 find_traces (n_traces, traces)
165 int *n_traces;
166 struct trace *traces;
167 {
168 int i;
169 edge e;
170 fibheap_t heap;
171
172 /* Insert entry points of function into heap. */
173 heap = fibheap_new ();
174 max_entry_frequency = 0;
175 max_entry_count = 0;
176 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
177 {
178 bbd[e->dest->index].heap = heap;
179 bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
180 e->dest);
181 if (e->dest->frequency > max_entry_frequency)
182 max_entry_frequency = e->dest->frequency;
183 if (e->dest->count > max_entry_count)
184 max_entry_count = e->dest->count;
185 }
186
187 /* Find the traces. */
188 for (i = 0; i < N_ROUNDS; i++)
189 {
190 gcov_type count_threshold;
191
192 if (rtl_dump_file)
193 fprintf (rtl_dump_file, "STC - round %d\n", i + 1);
194
195 if (max_entry_count < INT_MAX / 1000)
196 count_threshold = max_entry_count * exec_threshold[i] / 1000;
197 else
198 count_threshold = max_entry_count / 1000 * exec_threshold[i];
199
200 find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
201 max_entry_frequency * exec_threshold[i] / 1000,
202 count_threshold, traces, n_traces, i, &heap);
203 }
204 fibheap_delete (heap);
205
206 if (rtl_dump_file)
207 {
208 for (i = 0; i < *n_traces; i++)
209 {
210 basic_block bb;
211 fprintf (rtl_dump_file, "Trace %d (round %d): ", i + 1,
212 traces[i].round + 1);
213 for (bb = traces[i].first; bb != traces[i].last; bb = RBI (bb)->next)
214 fprintf (rtl_dump_file, "%d [%d] ", bb->index, bb->frequency);
215 fprintf (rtl_dump_file, "%d [%d]\n", bb->index, bb->frequency);
216 }
217 fflush (rtl_dump_file);
218 }
219 }
220
221 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
222 (with sequential number TRACE_N). */
223
224 static basic_block
225 rotate_loop (back_edge, trace, trace_n)
226 edge back_edge;
227 struct trace *trace;
228 int trace_n;
229 {
230 basic_block bb;
231
232 /* Information about the best end (end after rotation) of the loop. */
233 basic_block best_bb = NULL;
234 edge best_edge = NULL;
235 int best_freq = -1;
236 gcov_type best_count = -1;
237 /* The best edge is preferred when its destination is not visited yet
238 or is a start block of some trace. */
239 bool is_preferred = false;
240
241 /* Find the most frequent edge that goes out from current trace. */
242 bb = back_edge->dest;
243 do
244 {
245 edge e;
246 for (e = bb->succ; e; e = e->succ_next)
247 if (e->dest != EXIT_BLOCK_PTR
248 && RBI (e->dest)->visited != trace_n
249 && (e->flags & EDGE_CAN_FALLTHRU)
250 && !(e->flags & EDGE_COMPLEX))
251 {
252 if (is_preferred)
253 {
254 /* The best edge is preferred. */
255 if (!RBI (e->dest)->visited
256 || bbd[e->dest->index].start_of_trace >= 0)
257 {
258 /* The current edge E is also preferred. */
259 int freq = EDGE_FREQUENCY (e);
260 if (freq > best_freq || e->count > best_count)
261 {
262 best_freq = freq;
263 best_count = e->count;
264 best_edge = e;
265 best_bb = bb;
266 }
267 }
268 }
269 else
270 {
271 if (!RBI (e->dest)->visited
272 || bbd[e->dest->index].start_of_trace >= 0)
273 {
274 /* The current edge E is preferred. */
275 is_preferred = true;
276 best_freq = EDGE_FREQUENCY (e);
277 best_count = e->count;
278 best_edge = e;
279 best_bb = bb;
280 }
281 else
282 {
283 int freq = EDGE_FREQUENCY (e);
284 if (!best_edge || freq > best_freq || e->count > best_count)
285 {
286 best_freq = freq;
287 best_count = e->count;
288 best_edge = e;
289 best_bb = bb;
290 }
291 }
292 }
293 }
294 bb = RBI (bb)->next;
295 }
296 while (bb != back_edge->dest);
297
298 if (best_bb)
299 {
300 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
301 the trace. */
302 if (back_edge->dest == trace->first)
303 {
304 trace->first = RBI (best_bb)->next;
305 }
306 else
307 {
308 basic_block prev_bb;
309
310 for (prev_bb = trace->first;
311 RBI (prev_bb)->next != back_edge->dest;
312 prev_bb = RBI (prev_bb)->next)
313 ;
314 RBI (prev_bb)->next = RBI (best_bb)->next;
315
316 /* Try to get rid of uncond jump to cond jump. */
317 if (prev_bb->succ && !prev_bb->succ->succ_next)
318 {
319 basic_block header = prev_bb->succ->dest;
320
321 /* Duplicate HEADER if it is a small block containing cond jump
322 in the end. */
323 if (any_condjump_p (header->end) && copy_bb_p (header, 0))
324 {
325 copy_bb (header, prev_bb->succ, prev_bb, trace_n);
326 }
327 }
328 }
329 }
330 else
331 {
332 /* We have not found suitable loop tail so do no rotation. */
333 best_bb = back_edge->src;
334 }
335 RBI (best_bb)->next = NULL;
336 return best_bb;
337 }
338
339 /* This function marks BB that it was visited in trace number TRACE. */
340
341 static void
342 mark_bb_visited (bb, trace)
343 basic_block bb;
344 int trace;
345 {
346 RBI (bb)->visited = trace;
347 if (bbd[bb->index].heap)
348 {
349 fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
350 bbd[bb->index].heap = NULL;
351 bbd[bb->index].node = NULL;
352 }
353 }
354
355 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
356 not include basic blocks their probability is lower than BRANCH_TH or their
357 frequency is lower than EXEC_TH into traces (or count is lower than
358 COUNT_TH). It stores the new traces into TRACES and modifies the number of
359 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
360 expects that starting basic blocks are in *HEAP and at the end it deletes
361 *HEAP and stores starting points for the next round into new *HEAP. */
362
363 static void
364 find_traces_1_round (branch_th, exec_th, count_th, traces, n_traces, round,
365 heap)
366 int branch_th;
367 int exec_th;
368 gcov_type count_th;
369 struct trace *traces;
370 int *n_traces;
371 int round;
372 fibheap_t *heap;
373 {
374 /* Heap for discarded basic blocks which are possible starting points for
375 the next round. */
376 fibheap_t new_heap = fibheap_new ();
377
378 while (!fibheap_empty (*heap))
379 {
380 basic_block bb;
381 struct trace *trace;
382 edge best_edge, e;
383 fibheapkey_t key;
384
385 bb = fibheap_extract_min (*heap);
386 bbd[bb->index].heap = NULL;
387 bbd[bb->index].node = NULL;
388
389 if (rtl_dump_file)
390 fprintf (rtl_dump_file, "Getting bb %d\n", bb->index);
391
392 /* If the BB's frequency is too low send BB to the next round. */
393 if (bb->frequency < exec_th || bb->count < count_th
394 || ((round < N_ROUNDS - 1) && probably_never_executed_bb_p (bb)))
395 {
396 int key = bb_to_key (bb);
397 bbd[bb->index].heap = new_heap;
398 bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
399
400 if (rtl_dump_file)
401 fprintf (rtl_dump_file,
402 " Possible start point of next round: %d (key: %d)\n",
403 bb->index, key);
404 continue;
405 }
406
407 trace = traces + *n_traces;
408 trace->first = bb;
409 trace->round = round;
410 trace->length = 0;
411 (*n_traces)++;
412
413 do
414 {
415 int prob, freq;
416
417 /* The probability and frequency of the best edge. */
418 int best_prob = INT_MIN / 2;
419 int best_freq = INT_MIN / 2;
420
421 best_edge = NULL;
422 mark_bb_visited (bb, *n_traces);
423 trace->length++;
424
425 if (rtl_dump_file)
426 fprintf (rtl_dump_file, "Basic block %d was visited in trace %d\n",
427 bb->index, *n_traces - 1);
428
429 /* Select the successor that will be placed after BB. */
430 for (e = bb->succ; e; e = e->succ_next)
431 {
432 if (e->flags & EDGE_FAKE)
433 abort ();
434
435 if (e->dest == EXIT_BLOCK_PTR)
436 continue;
437
438 if (RBI (e->dest)->visited
439 && RBI (e->dest)->visited != *n_traces)
440 continue;
441
442 prob = e->probability;
443 freq = EDGE_FREQUENCY (e);
444
445 /* Edge that cannot be fallthru or improbable or infrequent
446 successor (ie. it is unsuitable successor). */
447 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
448 || prob < branch_th || freq < exec_th || e->count < count_th)
449 continue;
450
451 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq))
452 {
453 best_edge = e;
454 best_prob = prob;
455 best_freq = freq;
456 }
457 }
458
459 /* Add all non-selected successors to the heaps. */
460 for (e = bb->succ; e; e = e->succ_next)
461 {
462 if (e == best_edge
463 || e->dest == EXIT_BLOCK_PTR
464 || RBI (e->dest)->visited)
465 continue;
466
467 key = bb_to_key (e->dest);
468
469 if (bbd[e->dest->index].heap)
470 {
471 /* E->DEST is already in some heap. */
472 if (key != bbd[e->dest->index].node->key)
473 {
474 if (rtl_dump_file)
475 {
476 fprintf (rtl_dump_file,
477 "Changing key for bb %d from %ld to %ld.\n",
478 e->dest->index,
479 (long) bbd[e->dest->index].node->key,
480 key);
481 }
482 fibheap_replace_key (bbd[e->dest->index].heap,
483 bbd[e->dest->index].node, key);
484 }
485 }
486 else
487 {
488 fibheap_t which_heap = *heap;
489
490 prob = e->probability;
491 freq = EDGE_FREQUENCY (e);
492
493 if (!(e->flags & EDGE_CAN_FALLTHRU)
494 || (e->flags & EDGE_COMPLEX)
495 || prob < branch_th || freq < exec_th
496 || e->count < count_th)
497 {
498 if (round < N_ROUNDS - 1)
499 which_heap = new_heap;
500 }
501
502 bbd[e->dest->index].heap = which_heap;
503 bbd[e->dest->index].node = fibheap_insert (which_heap,
504 key, e->dest);
505
506 if (rtl_dump_file)
507 {
508 fprintf (rtl_dump_file,
509 " Possible start of %s round: %d (key: %ld)\n",
510 (which_heap == new_heap) ? "next" : "this",
511 e->dest->index, (long) key);
512 }
513
514 }
515 }
516
517 if (best_edge) /* Suitable successor was found. */
518 {
519 if (RBI (best_edge->dest)->visited == *n_traces)
520 {
521 /* We do nothing with one basic block loops. */
522 if (best_edge->dest != bb)
523 {
524 if (EDGE_FREQUENCY (best_edge)
525 > 4 * best_edge->dest->frequency / 5)
526 {
527 /* The loop has at least 4 iterations. If the loop
528 header is not the first block of the function
529 we can rotate the loop. */
530
531 if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
532 {
533 if (rtl_dump_file)
534 {
535 fprintf (rtl_dump_file,
536 "Rotating loop %d - %d\n",
537 best_edge->dest->index, bb->index);
538 }
539 RBI (bb)->next = best_edge->dest;
540 bb = rotate_loop (best_edge, trace, *n_traces);
541 }
542 }
543 else
544 {
545 /* The loop has less than 4 iterations. */
546
547 /* Check whether there is another edge from BB. */
548 edge another_edge;
549 for (another_edge = bb->succ;
550 another_edge;
551 another_edge = another_edge->succ_next)
552 if (another_edge != best_edge)
553 break;
554
555 if (!another_edge && copy_bb_p (best_edge->dest,
556 !optimize_size))
557 {
558 bb = copy_bb (best_edge->dest, best_edge, bb,
559 *n_traces);
560 }
561 }
562 }
563
564 /* Terminate the trace. */
565 break;
566 }
567 else
568 {
569 /* Check for a situation
570
571 A
572 /|
573 B |
574 \|
575 C
576
577 where
578 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
579 >= EDGE_FREQUENCY (AC).
580 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
581 Best ordering is then A B C.
582
583 This situation is created for example by:
584
585 if (A) B;
586 C;
587
588 */
589
590 for (e = bb->succ; e; e = e->succ_next)
591 if (e != best_edge
592 && (e->flags & EDGE_CAN_FALLTHRU)
593 && !(e->flags & EDGE_COMPLEX)
594 && !RBI (e->dest)->visited
595 && !e->dest->pred->pred_next
596 && e->dest->succ
597 && (e->dest->succ->flags & EDGE_CAN_FALLTHRU)
598 && !(e->dest->succ->flags & EDGE_COMPLEX)
599 && !e->dest->succ->succ_next
600 && e->dest->succ->dest == best_edge->dest
601 && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
602 {
603 best_edge = e;
604 if (rtl_dump_file)
605 fprintf (rtl_dump_file, "Selecting BB %d\n",
606 best_edge->dest->index);
607 break;
608 }
609
610 RBI (bb)->next = best_edge->dest;
611 bb = best_edge->dest;
612 }
613 }
614 }
615 while (best_edge);
616 trace->last = bb;
617 bbd[trace->first->index].start_of_trace = *n_traces - 1;
618 bbd[trace->last->index].end_of_trace = *n_traces - 1;
619
620 /* The trace is terminated so we have to recount the keys in heap
621 (some block can have a lower key because now one of its predecessors
622 is an end of the trace). */
623 for (e = bb->succ; e; e = e->succ_next)
624 {
625 if (e->dest == EXIT_BLOCK_PTR
626 || RBI (e->dest)->visited)
627 continue;
628
629 if (bbd[e->dest->index].heap)
630 {
631 key = bb_to_key (e->dest);
632 if (key != bbd[e->dest->index].node->key)
633 {
634 if (rtl_dump_file)
635 {
636 fprintf (rtl_dump_file,
637 "Changing key for bb %d from %ld to %ld.\n",
638 e->dest->index,
639 (long) bbd[e->dest->index].node->key, key);
640 }
641 fibheap_replace_key (bbd[e->dest->index].heap,
642 bbd[e->dest->index].node,
643 key);
644 }
645 }
646 }
647 }
648
649 fibheap_delete (*heap);
650
651 /* "Return" the new heap. */
652 *heap = new_heap;
653 }
654
655 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
656 it to trace after BB, mark OLD_BB visited and update pass' data structures
657 (TRACE is a number of trace which OLD_BB is duplicated to). */
658
659 static basic_block
660 copy_bb (old_bb, e, bb, trace)
661 basic_block old_bb;
662 edge e;
663 basic_block bb;
664 int trace;
665 {
666 basic_block new_bb;
667
668 new_bb = cfg_layout_duplicate_bb (old_bb, e);
669 if (e->dest != new_bb)
670 abort ();
671 if (RBI (e->dest)->visited)
672 abort ();
673 if (rtl_dump_file)
674 fprintf (rtl_dump_file,
675 "Duplicated bb %d (created bb %d)\n",
676 old_bb->index, new_bb->index);
677 RBI (new_bb)->visited = trace;
678 RBI (new_bb)->next = RBI (bb)->next;
679 RBI (bb)->next = new_bb;
680
681 if (new_bb->index >= array_size || last_basic_block > array_size)
682 {
683 int i;
684 int new_size;
685
686 new_size = MAX (last_basic_block, new_bb->index + 1);
687 new_size = GET_ARRAY_SIZE (new_size);
688 bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
689 for (i = array_size; i < new_size; i++)
690 {
691 bbd[i].start_of_trace = -1;
692 bbd[i].end_of_trace = -1;
693 bbd[i].heap = NULL;
694 bbd[i].node = NULL;
695 }
696 array_size = new_size;
697
698 if (rtl_dump_file)
699 {
700 fprintf (rtl_dump_file,
701 "Growing the dynamic array to %d elements.\n",
702 array_size);
703 }
704 }
705
706 return new_bb;
707 }
708
709 /* Compute and return the key (for the heap) of the basic block BB. */
710
711 static fibheapkey_t
712 bb_to_key (bb)
713 basic_block bb;
714 {
715 edge e;
716
717 int priority = 0;
718
719 /* Do not start in probably never executed blocks. */
720 if (probably_never_executed_bb_p (bb))
721 return BB_FREQ_MAX;
722
723 /* Prefer blocks whose predecessor is an end of some trace
724 or whose predecessor edge is EDGE_DFS_BACK. */
725 for (e = bb->pred; e; e = e->pred_next)
726 {
727 if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
728 || (e->flags & EDGE_DFS_BACK))
729 {
730 int edge_freq = EDGE_FREQUENCY (e);
731
732 if (edge_freq > priority)
733 priority = edge_freq;
734 }
735 }
736
737 if (priority)
738 /* The block with priority should have significantly lower key. */
739 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
740 return -bb->frequency;
741 }
742
743 /* Return true when the edge E from basic block BB is better than the temporary
744 best edge (details are in function). The probability of edge E is PROB. The
745 frequency of the successor is FREQ. The current best probability is
746 BEST_PROB, the best frequency is BEST_FREQ.
747 The edge is considered to be equivalent when PROB does not differ much from
748 BEST_PROB; similarly for frequency. */
749
750 static bool
751 better_edge_p (bb, e, prob, freq, best_prob, best_freq)
752 basic_block bb;
753 edge e;
754 int prob;
755 int freq;
756 int best_prob;
757 int best_freq;
758 {
759 bool is_better_edge;
760
761 /* The BEST_* values do not have to be best, but can be a bit smaller than
762 maximum values. */
763 int diff_prob = best_prob / 10;
764 int diff_freq = best_freq / 10;
765
766 if (prob > best_prob + diff_prob)
767 /* The edge has higher probability than the temporary best edge. */
768 is_better_edge = true;
769 else if (prob < best_prob - diff_prob)
770 /* The edge has lower probability than the temporary best edge. */
771 is_better_edge = false;
772 else if (freq < best_freq - diff_freq)
773 /* The edge and the temporary best edge have almost equivalent
774 probabilities. The higher frequency of a successor now means
775 that there is another edge going into that successor.
776 This successor has lower frequency so it is better. */
777 is_better_edge = true;
778 else if (freq > best_freq + diff_freq)
779 /* This successor has higher frequency so it is worse. */
780 is_better_edge = false;
781 else if (e->dest->prev_bb == bb)
782 /* The edges have equivalent probabilities and the successors
783 have equivalent frequencies. Select the previous successor. */
784 is_better_edge = true;
785 else
786 is_better_edge = false;
787
788 return is_better_edge;
789 }
790
791 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
792
793 static void
794 connect_traces (n_traces, traces)
795 int n_traces;
796 struct trace *traces;
797 {
798 int i;
799 bool *connected;
800 int last_trace;
801 int freq_threshold;
802 gcov_type count_threshold;
803
804 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
805 if (max_entry_count < INT_MAX / 1000)
806 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
807 else
808 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
809
810 connected = xcalloc (n_traces, sizeof (bool));
811 last_trace = -1;
812 for (i = 0; i < n_traces; i++)
813 {
814 int t = i;
815 int t2;
816 edge e, best;
817 int best_len;
818
819 if (connected[t])
820 continue;
821
822 connected[t] = true;
823
824 /* Find the predecessor traces. */
825 for (t2 = t; t2 > 0;)
826 {
827 best = NULL;
828 best_len = 0;
829 for (e = traces[t2].first->pred; e; e = e->pred_next)
830 {
831 int si = e->src->index;
832
833 if (e->src != ENTRY_BLOCK_PTR
834 && (e->flags & EDGE_CAN_FALLTHRU)
835 && !(e->flags & EDGE_COMPLEX)
836 && bbd[si].end_of_trace >= 0
837 && !connected[bbd[si].end_of_trace]
838 && (!best
839 || e->probability > best->probability
840 || (e->probability == best->probability
841 && traces[bbd[si].end_of_trace].length > best_len)))
842 {
843 best = e;
844 best_len = traces[bbd[si].end_of_trace].length;
845 }
846 }
847 if (best)
848 {
849 RBI (best->src)->next = best->dest;
850 t2 = bbd[best->src->index].end_of_trace;
851 connected[t2] = true;
852 if (rtl_dump_file)
853 {
854 fprintf (rtl_dump_file, "Connection: %d %d\n",
855 best->src->index, best->dest->index);
856 }
857 }
858 else
859 break;
860 }
861
862 if (last_trace >= 0)
863 RBI (traces[last_trace].last)->next = traces[t2].first;
864 last_trace = t;
865
866 /* Find the successor traces. */
867 while (1)
868 {
869 /* Find the continuation of the chain. */
870 best = NULL;
871 best_len = 0;
872 for (e = traces[t].last->succ; e; e = e->succ_next)
873 {
874 int di = e->dest->index;
875
876 if (e->dest != EXIT_BLOCK_PTR
877 && (e->flags & EDGE_CAN_FALLTHRU)
878 && !(e->flags & EDGE_COMPLEX)
879 && bbd[di].start_of_trace >= 0
880 && !connected[bbd[di].start_of_trace]
881 && (!best
882 || e->probability > best->probability
883 || (e->probability == best->probability
884 && traces[bbd[di].start_of_trace].length > best_len)))
885 {
886 best = e;
887 best_len = traces[bbd[di].start_of_trace].length;
888 }
889 }
890
891 if (best)
892 {
893 if (rtl_dump_file)
894 {
895 fprintf (rtl_dump_file, "Connection: %d %d\n",
896 best->src->index, best->dest->index);
897 }
898 t = bbd[best->dest->index].start_of_trace;
899 RBI (traces[last_trace].last)->next = traces[t].first;
900 connected[t] = true;
901 last_trace = t;
902 }
903 else
904 {
905 /* Try to connect the traces by duplication of 1 block. */
906 edge e2;
907 basic_block next_bb = NULL;
908
909 for (e = traces[t].last->succ; e; e = e->succ_next)
910 if (e->dest != EXIT_BLOCK_PTR
911 && (e->flags & EDGE_CAN_FALLTHRU)
912 && !(e->flags & EDGE_COMPLEX)
913 && (EDGE_FREQUENCY (e) >= freq_threshold)
914 && (e->count >= count_threshold)
915 && (!best
916 || e->probability > best->probability))
917 {
918 edge best2 = NULL;
919 int best2_len = 0;
920
921 for (e2 = e->dest->succ; e2; e2 = e2->succ_next)
922 {
923 int di = e2->dest->index;
924
925 if (e2->dest == EXIT_BLOCK_PTR
926 || ((e2->flags & EDGE_CAN_FALLTHRU)
927 && !(e2->flags & EDGE_COMPLEX)
928 && bbd[di].start_of_trace >= 0
929 && !connected[bbd[di].start_of_trace]
930 && (EDGE_FREQUENCY (e2) >= freq_threshold)
931 && (e2->count >= count_threshold)
932 && (!best2
933 || e2->probability > best2->probability
934 || (e2->probability == best2->probability
935 && traces[bbd[di].start_of_trace].length
936 > best2_len))))
937 {
938 best = e;
939 best2 = e2;
940 if (e2->dest != EXIT_BLOCK_PTR)
941 best2_len = traces[bbd[di].start_of_trace].length;
942 else
943 best2_len = INT_MAX;
944 next_bb = e2->dest;
945 }
946 }
947 }
948 if (best && next_bb && copy_bb_p (best->dest, !optimize_size))
949 {
950 basic_block new_bb;
951
952 if (rtl_dump_file)
953 {
954 fprintf (rtl_dump_file, "Connection: %d %d ",
955 traces[t].last->index, best->dest->index);
956 if (next_bb == EXIT_BLOCK_PTR)
957 fprintf (rtl_dump_file, "exit\n");
958 else
959 fprintf (rtl_dump_file, "%d\n", next_bb->index);
960 }
961
962 new_bb = copy_bb (best->dest, best, traces[t].last, t);
963 traces[t].last = new_bb;
964 if (next_bb != EXIT_BLOCK_PTR)
965 {
966 t = bbd[next_bb->index].start_of_trace;
967 RBI (traces[last_trace].last)->next = traces[t].first;
968 connected[t] = true;
969 last_trace = t;
970 }
971 else
972 break; /* Stop finding the successor traces. */
973 }
974 else
975 break; /* Stop finding the successor traces. */
976 }
977 }
978 }
979
980 if (rtl_dump_file)
981 {
982 basic_block bb;
983
984 fprintf (rtl_dump_file, "Final order:\n");
985 for (bb = traces[0].first; bb; bb = RBI (bb)->next)
986 fprintf (rtl_dump_file, "%d ", bb->index);
987 fprintf (rtl_dump_file, "\n");
988 fflush (rtl_dump_file);
989 }
990
991 FREE (connected);
992 }
993
994 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
995 when code size is allowed to grow by duplication. */
996
997 static bool
998 copy_bb_p (bb, code_may_grow)
999 basic_block bb;
1000 int code_may_grow;
1001 {
1002 int size = 0;
1003 int max_size = uncond_jump_length;
1004 rtx insn;
1005
1006 if (!bb->frequency)
1007 return false;
1008 if (!bb->pred || !bb->pred->pred_next)
1009 return false;
1010 if (!cfg_layout_can_duplicate_bb_p (bb))
1011 return false;
1012
1013 if (code_may_grow && maybe_hot_bb_p (bb))
1014 max_size *= 8;
1015
1016 for (insn = bb->head; insn != NEXT_INSN (bb->end);
1017 insn = NEXT_INSN (insn))
1018 {
1019 if (INSN_P (insn))
1020 size += get_attr_length (insn);
1021 }
1022
1023 if (size <= max_size)
1024 return true;
1025
1026 if (rtl_dump_file)
1027 {
1028 fprintf (rtl_dump_file,
1029 "Block %d can't be copied because its size = %d.\n",
1030 bb->index, size);
1031 }
1032
1033 return false;
1034 }
1035
1036 /* Return the length of unconditional jump instruction. */
1037
1038 static int
1039 get_uncond_jump_length ()
1040 {
1041 rtx label, jump;
1042 int length;
1043
1044 label = emit_label_before (gen_label_rtx (), get_insns ());
1045 jump = emit_jump_insn (gen_jump (label));
1046
1047 length = get_attr_length (jump);
1048
1049 delete_insn (jump);
1050 delete_insn (label);
1051 return length;
1052 }
1053
1054 /* Reorder basic blocks. The main entry point to this file. */
1055
1056 void
1057 reorder_basic_blocks ()
1058 {
1059 int n_traces;
1060 int i;
1061 struct trace *traces;
1062
1063 if (n_basic_blocks <= 1)
1064 return;
1065
1066 if ((* targetm.cannot_modify_jumps_p) ())
1067 return;
1068
1069 cfg_layout_initialize (NULL);
1070
1071 set_edge_can_fallthru_flag ();
1072 mark_dfs_back_edges ();
1073
1074 /* We are estimating the lenght of uncond jump insn only once since the code
1075 for getting the insn lenght always returns the minimal length now. */
1076 if (uncond_jump_length == 0)
1077 uncond_jump_length = get_uncond_jump_length ();
1078
1079 /* We need to know some information for each basic block. */
1080 array_size = GET_ARRAY_SIZE (last_basic_block);
1081 bbd = xmalloc (array_size * sizeof (bbro_basic_block_data));
1082 for (i = 0; i < array_size; i++)
1083 {
1084 bbd[i].start_of_trace = -1;
1085 bbd[i].end_of_trace = -1;
1086 bbd[i].heap = NULL;
1087 bbd[i].node = NULL;
1088 }
1089
1090 traces = xmalloc (n_basic_blocks * sizeof (struct trace));
1091 n_traces = 0;
1092 find_traces (&n_traces, traces);
1093 connect_traces (n_traces, traces);
1094 FREE (traces);
1095 FREE (bbd);
1096
1097 if (rtl_dump_file)
1098 dump_flow_info (rtl_dump_file);
1099
1100 cfg_layout_finalize ();
1101 }