* g++.dg/cpp0x/nullptr21.c: Remove printfs, make self-checking.
[gcc.git] / gcc / bb-reorder.c
1 /* Basic block reordering routines for the GNU compiler.
2 Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011,
3 2012 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This (greedy) algorithm constructs traces in several rounds.
22 The construction starts from "seeds". The seed for the first round
23 is the entry point of function. When there are more than one seed
24 that one is selected first that has the lowest key in the heap
25 (see function bb_to_key). Then the algorithm repeatedly adds the most
26 probable successor to the end of a trace. Finally it connects the traces.
27
28 There are two parameters: Branch Threshold and Exec Threshold.
29 If the edge to a successor of the actual basic block is lower than
30 Branch Threshold or the frequency of the successor is lower than
31 Exec Threshold the successor will be the seed in one of the next rounds.
32 Each round has these parameters lower than the previous one.
33 The last round has to have these parameters set to zero
34 so that the remaining blocks are picked up.
35
36 The algorithm selects the most probable successor from all unvisited
37 successors and successors that have been added to this trace.
38 The other successors (that has not been "sent" to the next round) will be
39 other seeds for this round and the secondary traces will start in them.
40 If the successor has not been visited in this trace it is added to the trace
41 (however, there is some heuristic for simple branches).
42 If the successor has been visited in this trace the loop has been found.
43 If the loop has many iterations the loop is rotated so that the
44 source block of the most probable edge going out from the loop
45 is the last block of the trace.
46 If the loop has few iterations and there is no edge from the last block of
47 the loop going out from loop the loop header is duplicated.
48 Finally, the construction of the trace is terminated.
49
50 When connecting traces it first checks whether there is an edge from the
51 last block of one trace to the first block of another trace.
52 When there are still some unconnected traces it checks whether there exists
53 a basic block BB such that BB is a successor of the last bb of one trace
54 and BB is a predecessor of the first block of another trace. In this case,
55 BB is duplicated and the traces are connected through this duplicate.
56 The rest of traces are simply connected so there will be a jump to the
57 beginning of the rest of trace.
58
59
60 References:
61
62 "Software Trace Cache"
63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64 http://citeseer.nj.nec.com/15361.html
65
66 */
67
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "rtl.h"
73 #include "regs.h"
74 #include "flags.h"
75 #include "output.h"
76 #include "fibheap.h"
77 #include "target.h"
78 #include "function.h"
79 #include "tm_p.h"
80 #include "obstack.h"
81 #include "expr.h"
82 #include "params.h"
83 #include "diagnostic-core.h"
84 #include "toplev.h" /* user_defined_section_attribute */
85 #include "tree-pass.h"
86 #include "df.h"
87 #include "bb-reorder.h"
88 #include "except.h"
89
90 /* The number of rounds. In most cases there will only be 4 rounds, but
91 when partitioning hot and cold basic blocks into separate sections of
92 the .o file there will be an extra round.*/
93 #define N_ROUNDS 5
94
95 /* Stubs in case we don't have a return insn.
96 We have to check at runtime too, not only compiletime. */
97
98 #ifndef HAVE_return
99 #define HAVE_return 0
100 #define gen_return() NULL_RTX
101 #endif
102
103
104 struct target_bb_reorder default_target_bb_reorder;
105 #if SWITCHABLE_TARGET
106 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
107 #endif
108
109 #define uncond_jump_length \
110 (this_target_bb_reorder->x_uncond_jump_length)
111
112 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */
113 static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
114
115 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */
116 static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
117
118 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
119 block the edge destination is not duplicated while connecting traces. */
120 #define DUPLICATION_THRESHOLD 100
121
122 /* Structure to hold needed information for each basic block. */
123 typedef struct bbro_basic_block_data_def
124 {
125 /* Which trace is the bb start of (-1 means it is not a start of a trace). */
126 int start_of_trace;
127
128 /* Which trace is the bb end of (-1 means it is not an end of a trace). */
129 int end_of_trace;
130
131 /* Which trace is the bb in? */
132 int in_trace;
133
134 /* Which trace was this bb visited in? */
135 int visited;
136
137 /* Which heap is BB in (if any)? */
138 fibheap_t heap;
139
140 /* Which heap node is BB in (if any)? */
141 fibnode_t node;
142 } bbro_basic_block_data;
143
144 /* The current size of the following dynamic array. */
145 static int array_size;
146
147 /* The array which holds needed information for basic blocks. */
148 static bbro_basic_block_data *bbd;
149
150 /* To avoid frequent reallocation the size of arrays is greater than needed,
151 the number of elements is (not less than) 1.25 * size_wanted. */
152 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
153
154 /* Free the memory and set the pointer to NULL. */
155 #define FREE(P) (gcc_assert (P), free (P), P = 0)
156
157 /* Structure for holding information about a trace. */
158 struct trace
159 {
160 /* First and last basic block of the trace. */
161 basic_block first, last;
162
163 /* The round of the STC creation which this trace was found in. */
164 int round;
165
166 /* The length (i.e. the number of basic blocks) of the trace. */
167 int length;
168 };
169
170 /* Maximum frequency and count of one of the entry blocks. */
171 static int max_entry_frequency;
172 static gcov_type max_entry_count;
173
174 /* Local function prototypes. */
175 static void find_traces (int *, struct trace *);
176 static basic_block rotate_loop (edge, struct trace *, int);
177 static void mark_bb_visited (basic_block, int);
178 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
179 int, fibheap_t *, int);
180 static basic_block copy_bb (basic_block, edge, basic_block, int);
181 static fibheapkey_t bb_to_key (basic_block);
182 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int, const_edge);
183 static void connect_traces (int, struct trace *);
184 static bool copy_bb_p (const_basic_block, int);
185 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
186 \f
187 /* Return the trace number in which BB was visited. */
188
189 static int
190 bb_visited_trace (const_basic_block bb)
191 {
192 gcc_assert (bb->index < array_size);
193 return bbd[bb->index].visited;
194 }
195
196 /* This function marks BB that it was visited in trace number TRACE. */
197
198 static void
199 mark_bb_visited (basic_block bb, int trace)
200 {
201 bbd[bb->index].visited = trace;
202 if (bbd[bb->index].heap)
203 {
204 fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
205 bbd[bb->index].heap = NULL;
206 bbd[bb->index].node = NULL;
207 }
208 }
209
210 /* Check to see if bb should be pushed into the next round of trace
211 collections or not. Reasons for pushing the block forward are 1).
212 If the block is cold, we are doing partitioning, and there will be
213 another round (cold partition blocks are not supposed to be
214 collected into traces until the very last round); or 2). There will
215 be another round, and the basic block is not "hot enough" for the
216 current round of trace collection. */
217
218 static bool
219 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
220 int exec_th, gcov_type count_th)
221 {
222 bool there_exists_another_round;
223 bool block_not_hot_enough;
224
225 there_exists_another_round = round < number_of_rounds - 1;
226
227 block_not_hot_enough = (bb->frequency < exec_th
228 || bb->count < count_th
229 || probably_never_executed_bb_p (bb));
230
231 if (there_exists_another_round
232 && block_not_hot_enough)
233 return true;
234 else
235 return false;
236 }
237
238 /* Find the traces for Software Trace Cache. Chain each trace through
239 RBI()->next. Store the number of traces to N_TRACES and description of
240 traces to TRACES. */
241
242 static void
243 find_traces (int *n_traces, struct trace *traces)
244 {
245 int i;
246 int number_of_rounds;
247 edge e;
248 edge_iterator ei;
249 fibheap_t heap;
250
251 /* Add one extra round of trace collection when partitioning hot/cold
252 basic blocks into separate sections. The last round is for all the
253 cold blocks (and ONLY the cold blocks). */
254
255 number_of_rounds = N_ROUNDS - 1;
256
257 /* Insert entry points of function into heap. */
258 heap = fibheap_new ();
259 max_entry_frequency = 0;
260 max_entry_count = 0;
261 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
262 {
263 bbd[e->dest->index].heap = heap;
264 bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
265 e->dest);
266 if (e->dest->frequency > max_entry_frequency)
267 max_entry_frequency = e->dest->frequency;
268 if (e->dest->count > max_entry_count)
269 max_entry_count = e->dest->count;
270 }
271
272 /* Find the traces. */
273 for (i = 0; i < number_of_rounds; i++)
274 {
275 gcov_type count_threshold;
276
277 if (dump_file)
278 fprintf (dump_file, "STC - round %d\n", i + 1);
279
280 if (max_entry_count < INT_MAX / 1000)
281 count_threshold = max_entry_count * exec_threshold[i] / 1000;
282 else
283 count_threshold = max_entry_count / 1000 * exec_threshold[i];
284
285 find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
286 max_entry_frequency * exec_threshold[i] / 1000,
287 count_threshold, traces, n_traces, i, &heap,
288 number_of_rounds);
289 }
290 fibheap_delete (heap);
291
292 if (dump_file)
293 {
294 for (i = 0; i < *n_traces; i++)
295 {
296 basic_block bb;
297 fprintf (dump_file, "Trace %d (round %d): ", i + 1,
298 traces[i].round + 1);
299 for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux)
300 fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
301 fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
302 }
303 fflush (dump_file);
304 }
305 }
306
307 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
308 (with sequential number TRACE_N). */
309
310 static basic_block
311 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
312 {
313 basic_block bb;
314
315 /* Information about the best end (end after rotation) of the loop. */
316 basic_block best_bb = NULL;
317 edge best_edge = NULL;
318 int best_freq = -1;
319 gcov_type best_count = -1;
320 /* The best edge is preferred when its destination is not visited yet
321 or is a start block of some trace. */
322 bool is_preferred = false;
323
324 /* Find the most frequent edge that goes out from current trace. */
325 bb = back_edge->dest;
326 do
327 {
328 edge e;
329 edge_iterator ei;
330
331 FOR_EACH_EDGE (e, ei, bb->succs)
332 if (e->dest != EXIT_BLOCK_PTR
333 && bb_visited_trace (e->dest) != trace_n
334 && (e->flags & EDGE_CAN_FALLTHRU)
335 && !(e->flags & EDGE_COMPLEX))
336 {
337 if (is_preferred)
338 {
339 /* The best edge is preferred. */
340 if (!bb_visited_trace (e->dest)
341 || bbd[e->dest->index].start_of_trace >= 0)
342 {
343 /* The current edge E is also preferred. */
344 int freq = EDGE_FREQUENCY (e);
345 if (freq > best_freq || e->count > best_count)
346 {
347 best_freq = freq;
348 best_count = e->count;
349 best_edge = e;
350 best_bb = bb;
351 }
352 }
353 }
354 else
355 {
356 if (!bb_visited_trace (e->dest)
357 || bbd[e->dest->index].start_of_trace >= 0)
358 {
359 /* The current edge E is preferred. */
360 is_preferred = true;
361 best_freq = EDGE_FREQUENCY (e);
362 best_count = e->count;
363 best_edge = e;
364 best_bb = bb;
365 }
366 else
367 {
368 int freq = EDGE_FREQUENCY (e);
369 if (!best_edge || freq > best_freq || e->count > best_count)
370 {
371 best_freq = freq;
372 best_count = e->count;
373 best_edge = e;
374 best_bb = bb;
375 }
376 }
377 }
378 }
379 bb = (basic_block) bb->aux;
380 }
381 while (bb != back_edge->dest);
382
383 if (best_bb)
384 {
385 /* Rotate the loop so that the BEST_EDGE goes out from the last block of
386 the trace. */
387 if (back_edge->dest == trace->first)
388 {
389 trace->first = (basic_block) best_bb->aux;
390 }
391 else
392 {
393 basic_block prev_bb;
394
395 for (prev_bb = trace->first;
396 prev_bb->aux != back_edge->dest;
397 prev_bb = (basic_block) prev_bb->aux)
398 ;
399 prev_bb->aux = best_bb->aux;
400
401 /* Try to get rid of uncond jump to cond jump. */
402 if (single_succ_p (prev_bb))
403 {
404 basic_block header = single_succ (prev_bb);
405
406 /* Duplicate HEADER if it is a small block containing cond jump
407 in the end. */
408 if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
409 && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
410 NULL_RTX))
411 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
412 }
413 }
414 }
415 else
416 {
417 /* We have not found suitable loop tail so do no rotation. */
418 best_bb = back_edge->src;
419 }
420 best_bb->aux = NULL;
421 return best_bb;
422 }
423
424 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
425 not include basic blocks their probability is lower than BRANCH_TH or their
426 frequency is lower than EXEC_TH into traces (or count is lower than
427 COUNT_TH). It stores the new traces into TRACES and modifies the number of
428 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
429 expects that starting basic blocks are in *HEAP and at the end it deletes
430 *HEAP and stores starting points for the next round into new *HEAP. */
431
432 static void
433 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
434 struct trace *traces, int *n_traces, int round,
435 fibheap_t *heap, int number_of_rounds)
436 {
437 /* Heap for discarded basic blocks which are possible starting points for
438 the next round. */
439 fibheap_t new_heap = fibheap_new ();
440
441 while (!fibheap_empty (*heap))
442 {
443 basic_block bb;
444 struct trace *trace;
445 edge best_edge, e;
446 fibheapkey_t key;
447 edge_iterator ei;
448
449 bb = (basic_block) fibheap_extract_min (*heap);
450 bbd[bb->index].heap = NULL;
451 bbd[bb->index].node = NULL;
452
453 if (dump_file)
454 fprintf (dump_file, "Getting bb %d\n", bb->index);
455
456 /* If the BB's frequency is too low send BB to the next round. When
457 partitioning hot/cold blocks into separate sections, make sure all
458 the cold blocks (and ONLY the cold blocks) go into the (extra) final
459 round. */
460
461 if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
462 count_th))
463 {
464 int key = bb_to_key (bb);
465 bbd[bb->index].heap = new_heap;
466 bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
467
468 if (dump_file)
469 fprintf (dump_file,
470 " Possible start point of next round: %d (key: %d)\n",
471 bb->index, key);
472 continue;
473 }
474
475 trace = traces + *n_traces;
476 trace->first = bb;
477 trace->round = round;
478 trace->length = 0;
479 bbd[bb->index].in_trace = *n_traces;
480 (*n_traces)++;
481
482 do
483 {
484 int prob, freq;
485 bool ends_in_call;
486
487 /* The probability and frequency of the best edge. */
488 int best_prob = INT_MIN / 2;
489 int best_freq = INT_MIN / 2;
490
491 best_edge = NULL;
492 mark_bb_visited (bb, *n_traces);
493 trace->length++;
494
495 if (dump_file)
496 fprintf (dump_file, "Basic block %d was visited in trace %d\n",
497 bb->index, *n_traces - 1);
498
499 ends_in_call = block_ends_with_call_p (bb);
500
501 /* Select the successor that will be placed after BB. */
502 FOR_EACH_EDGE (e, ei, bb->succs)
503 {
504 gcc_assert (!(e->flags & EDGE_FAKE));
505
506 if (e->dest == EXIT_BLOCK_PTR)
507 continue;
508
509 if (bb_visited_trace (e->dest)
510 && bb_visited_trace (e->dest) != *n_traces)
511 continue;
512
513 if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
514 continue;
515
516 prob = e->probability;
517 freq = e->dest->frequency;
518
519 /* The only sensible preference for a call instruction is the
520 fallthru edge. Don't bother selecting anything else. */
521 if (ends_in_call)
522 {
523 if (e->flags & EDGE_CAN_FALLTHRU)
524 {
525 best_edge = e;
526 best_prob = prob;
527 best_freq = freq;
528 }
529 continue;
530 }
531
532 /* Edge that cannot be fallthru or improbable or infrequent
533 successor (i.e. it is unsuitable successor). */
534 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
535 || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
536 || e->count < count_th)
537 continue;
538
539 /* If partitioning hot/cold basic blocks, don't consider edges
540 that cross section boundaries. */
541
542 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
543 best_edge))
544 {
545 best_edge = e;
546 best_prob = prob;
547 best_freq = freq;
548 }
549 }
550
551 /* If the best destination has multiple predecessors, and can be
552 duplicated cheaper than a jump, don't allow it to be added
553 to a trace. We'll duplicate it when connecting traces. */
554 if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
555 && copy_bb_p (best_edge->dest, 0))
556 best_edge = NULL;
557
558 /* Add all non-selected successors to the heaps. */
559 FOR_EACH_EDGE (e, ei, bb->succs)
560 {
561 if (e == best_edge
562 || e->dest == EXIT_BLOCK_PTR
563 || bb_visited_trace (e->dest))
564 continue;
565
566 key = bb_to_key (e->dest);
567
568 if (bbd[e->dest->index].heap)
569 {
570 /* E->DEST is already in some heap. */
571 if (key != bbd[e->dest->index].node->key)
572 {
573 if (dump_file)
574 {
575 fprintf (dump_file,
576 "Changing key for bb %d from %ld to %ld.\n",
577 e->dest->index,
578 (long) bbd[e->dest->index].node->key,
579 key);
580 }
581 fibheap_replace_key (bbd[e->dest->index].heap,
582 bbd[e->dest->index].node, key);
583 }
584 }
585 else
586 {
587 fibheap_t which_heap = *heap;
588
589 prob = e->probability;
590 freq = EDGE_FREQUENCY (e);
591
592 if (!(e->flags & EDGE_CAN_FALLTHRU)
593 || (e->flags & EDGE_COMPLEX)
594 || prob < branch_th || freq < exec_th
595 || e->count < count_th)
596 {
597 /* When partitioning hot/cold basic blocks, make sure
598 the cold blocks (and only the cold blocks) all get
599 pushed to the last round of trace collection. */
600
601 if (push_to_next_round_p (e->dest, round,
602 number_of_rounds,
603 exec_th, count_th))
604 which_heap = new_heap;
605 }
606
607 bbd[e->dest->index].heap = which_heap;
608 bbd[e->dest->index].node = fibheap_insert (which_heap,
609 key, e->dest);
610
611 if (dump_file)
612 {
613 fprintf (dump_file,
614 " Possible start of %s round: %d (key: %ld)\n",
615 (which_heap == new_heap) ? "next" : "this",
616 e->dest->index, (long) key);
617 }
618
619 }
620 }
621
622 if (best_edge) /* Suitable successor was found. */
623 {
624 if (bb_visited_trace (best_edge->dest) == *n_traces)
625 {
626 /* We do nothing with one basic block loops. */
627 if (best_edge->dest != bb)
628 {
629 if (EDGE_FREQUENCY (best_edge)
630 > 4 * best_edge->dest->frequency / 5)
631 {
632 /* The loop has at least 4 iterations. If the loop
633 header is not the first block of the function
634 we can rotate the loop. */
635
636 if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
637 {
638 if (dump_file)
639 {
640 fprintf (dump_file,
641 "Rotating loop %d - %d\n",
642 best_edge->dest->index, bb->index);
643 }
644 bb->aux = best_edge->dest;
645 bbd[best_edge->dest->index].in_trace =
646 (*n_traces) - 1;
647 bb = rotate_loop (best_edge, trace, *n_traces);
648 }
649 }
650 else
651 {
652 /* The loop has less than 4 iterations. */
653
654 if (single_succ_p (bb)
655 && copy_bb_p (best_edge->dest,
656 optimize_edge_for_speed_p (best_edge)))
657 {
658 bb = copy_bb (best_edge->dest, best_edge, bb,
659 *n_traces);
660 trace->length++;
661 }
662 }
663 }
664
665 /* Terminate the trace. */
666 break;
667 }
668 else
669 {
670 /* Check for a situation
671
672 A
673 /|
674 B |
675 \|
676 C
677
678 where
679 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
680 >= EDGE_FREQUENCY (AC).
681 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
682 Best ordering is then A B C.
683
684 This situation is created for example by:
685
686 if (A) B;
687 C;
688
689 */
690
691 FOR_EACH_EDGE (e, ei, bb->succs)
692 if (e != best_edge
693 && (e->flags & EDGE_CAN_FALLTHRU)
694 && !(e->flags & EDGE_COMPLEX)
695 && !bb_visited_trace (e->dest)
696 && single_pred_p (e->dest)
697 && !(e->flags & EDGE_CROSSING)
698 && single_succ_p (e->dest)
699 && (single_succ_edge (e->dest)->flags
700 & EDGE_CAN_FALLTHRU)
701 && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
702 && single_succ (e->dest) == best_edge->dest
703 && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
704 {
705 best_edge = e;
706 if (dump_file)
707 fprintf (dump_file, "Selecting BB %d\n",
708 best_edge->dest->index);
709 break;
710 }
711
712 bb->aux = best_edge->dest;
713 bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
714 bb = best_edge->dest;
715 }
716 }
717 }
718 while (best_edge);
719 trace->last = bb;
720 bbd[trace->first->index].start_of_trace = *n_traces - 1;
721 bbd[trace->last->index].end_of_trace = *n_traces - 1;
722
723 /* The trace is terminated so we have to recount the keys in heap
724 (some block can have a lower key because now one of its predecessors
725 is an end of the trace). */
726 FOR_EACH_EDGE (e, ei, bb->succs)
727 {
728 if (e->dest == EXIT_BLOCK_PTR
729 || bb_visited_trace (e->dest))
730 continue;
731
732 if (bbd[e->dest->index].heap)
733 {
734 key = bb_to_key (e->dest);
735 if (key != bbd[e->dest->index].node->key)
736 {
737 if (dump_file)
738 {
739 fprintf (dump_file,
740 "Changing key for bb %d from %ld to %ld.\n",
741 e->dest->index,
742 (long) bbd[e->dest->index].node->key, key);
743 }
744 fibheap_replace_key (bbd[e->dest->index].heap,
745 bbd[e->dest->index].node,
746 key);
747 }
748 }
749 }
750 }
751
752 fibheap_delete (*heap);
753
754 /* "Return" the new heap. */
755 *heap = new_heap;
756 }
757
758 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
759 it to trace after BB, mark OLD_BB visited and update pass' data structures
760 (TRACE is a number of trace which OLD_BB is duplicated to). */
761
762 static basic_block
763 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
764 {
765 basic_block new_bb;
766
767 new_bb = duplicate_block (old_bb, e, bb);
768 BB_COPY_PARTITION (new_bb, old_bb);
769
770 gcc_assert (e->dest == new_bb);
771
772 if (dump_file)
773 fprintf (dump_file,
774 "Duplicated bb %d (created bb %d)\n",
775 old_bb->index, new_bb->index);
776
777 if (new_bb->index >= array_size || last_basic_block > array_size)
778 {
779 int i;
780 int new_size;
781
782 new_size = MAX (last_basic_block, new_bb->index + 1);
783 new_size = GET_ARRAY_SIZE (new_size);
784 bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
785 for (i = array_size; i < new_size; i++)
786 {
787 bbd[i].start_of_trace = -1;
788 bbd[i].end_of_trace = -1;
789 bbd[i].in_trace = -1;
790 bbd[i].visited = 0;
791 bbd[i].heap = NULL;
792 bbd[i].node = NULL;
793 }
794 array_size = new_size;
795
796 if (dump_file)
797 {
798 fprintf (dump_file,
799 "Growing the dynamic array to %d elements.\n",
800 array_size);
801 }
802 }
803
804 gcc_assert (!bb_visited_trace (e->dest));
805 mark_bb_visited (new_bb, trace);
806 new_bb->aux = bb->aux;
807 bb->aux = new_bb;
808
809 bbd[new_bb->index].in_trace = trace;
810
811 return new_bb;
812 }
813
814 /* Compute and return the key (for the heap) of the basic block BB. */
815
816 static fibheapkey_t
817 bb_to_key (basic_block bb)
818 {
819 edge e;
820 edge_iterator ei;
821 int priority = 0;
822
823 /* Do not start in probably never executed blocks. */
824
825 if (BB_PARTITION (bb) == BB_COLD_PARTITION
826 || probably_never_executed_bb_p (bb))
827 return BB_FREQ_MAX;
828
829 /* Prefer blocks whose predecessor is an end of some trace
830 or whose predecessor edge is EDGE_DFS_BACK. */
831 FOR_EACH_EDGE (e, ei, bb->preds)
832 {
833 if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
834 || (e->flags & EDGE_DFS_BACK))
835 {
836 int edge_freq = EDGE_FREQUENCY (e);
837
838 if (edge_freq > priority)
839 priority = edge_freq;
840 }
841 }
842
843 if (priority)
844 /* The block with priority should have significantly lower key. */
845 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
846 return -bb->frequency;
847 }
848
849 /* Return true when the edge E from basic block BB is better than the temporary
850 best edge (details are in function). The probability of edge E is PROB. The
851 frequency of the successor is FREQ. The current best probability is
852 BEST_PROB, the best frequency is BEST_FREQ.
853 The edge is considered to be equivalent when PROB does not differ much from
854 BEST_PROB; similarly for frequency. */
855
856 static bool
857 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq, int best_prob,
858 int best_freq, const_edge cur_best_edge)
859 {
860 bool is_better_edge;
861
862 /* The BEST_* values do not have to be best, but can be a bit smaller than
863 maximum values. */
864 int diff_prob = best_prob / 10;
865 int diff_freq = best_freq / 10;
866
867 if (prob > best_prob + diff_prob)
868 /* The edge has higher probability than the temporary best edge. */
869 is_better_edge = true;
870 else if (prob < best_prob - diff_prob)
871 /* The edge has lower probability than the temporary best edge. */
872 is_better_edge = false;
873 else if (freq < best_freq - diff_freq)
874 /* The edge and the temporary best edge have almost equivalent
875 probabilities. The higher frequency of a successor now means
876 that there is another edge going into that successor.
877 This successor has lower frequency so it is better. */
878 is_better_edge = true;
879 else if (freq > best_freq + diff_freq)
880 /* This successor has higher frequency so it is worse. */
881 is_better_edge = false;
882 else if (e->dest->prev_bb == bb)
883 /* The edges have equivalent probabilities and the successors
884 have equivalent frequencies. Select the previous successor. */
885 is_better_edge = true;
886 else
887 is_better_edge = false;
888
889 /* If we are doing hot/cold partitioning, make sure that we always favor
890 non-crossing edges over crossing edges. */
891
892 if (!is_better_edge
893 && flag_reorder_blocks_and_partition
894 && cur_best_edge
895 && (cur_best_edge->flags & EDGE_CROSSING)
896 && !(e->flags & EDGE_CROSSING))
897 is_better_edge = true;
898
899 return is_better_edge;
900 }
901
902 /* Connect traces in array TRACES, N_TRACES is the count of traces. */
903
904 static void
905 connect_traces (int n_traces, struct trace *traces)
906 {
907 int i;
908 bool *connected;
909 bool two_passes;
910 int last_trace;
911 int current_pass;
912 int current_partition;
913 int freq_threshold;
914 gcov_type count_threshold;
915
916 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
917 if (max_entry_count < INT_MAX / 1000)
918 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
919 else
920 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
921
922 connected = XCNEWVEC (bool, n_traces);
923 last_trace = -1;
924 current_pass = 1;
925 current_partition = BB_PARTITION (traces[0].first);
926 two_passes = false;
927
928 if (flag_reorder_blocks_and_partition)
929 for (i = 0; i < n_traces && !two_passes; i++)
930 if (BB_PARTITION (traces[0].first)
931 != BB_PARTITION (traces[i].first))
932 two_passes = true;
933
934 for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
935 {
936 int t = i;
937 int t2;
938 edge e, best;
939 int best_len;
940
941 if (i >= n_traces)
942 {
943 gcc_assert (two_passes && current_pass == 1);
944 i = 0;
945 t = i;
946 current_pass = 2;
947 if (current_partition == BB_HOT_PARTITION)
948 current_partition = BB_COLD_PARTITION;
949 else
950 current_partition = BB_HOT_PARTITION;
951 }
952
953 if (connected[t])
954 continue;
955
956 if (two_passes
957 && BB_PARTITION (traces[t].first) != current_partition)
958 continue;
959
960 connected[t] = true;
961
962 /* Find the predecessor traces. */
963 for (t2 = t; t2 > 0;)
964 {
965 edge_iterator ei;
966 best = NULL;
967 best_len = 0;
968 FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
969 {
970 int si = e->src->index;
971
972 if (e->src != ENTRY_BLOCK_PTR
973 && (e->flags & EDGE_CAN_FALLTHRU)
974 && !(e->flags & EDGE_COMPLEX)
975 && bbd[si].end_of_trace >= 0
976 && !connected[bbd[si].end_of_trace]
977 && (BB_PARTITION (e->src) == current_partition)
978 && (!best
979 || e->probability > best->probability
980 || (e->probability == best->probability
981 && traces[bbd[si].end_of_trace].length > best_len)))
982 {
983 best = e;
984 best_len = traces[bbd[si].end_of_trace].length;
985 }
986 }
987 if (best)
988 {
989 best->src->aux = best->dest;
990 t2 = bbd[best->src->index].end_of_trace;
991 connected[t2] = true;
992
993 if (dump_file)
994 {
995 fprintf (dump_file, "Connection: %d %d\n",
996 best->src->index, best->dest->index);
997 }
998 }
999 else
1000 break;
1001 }
1002
1003 if (last_trace >= 0)
1004 traces[last_trace].last->aux = traces[t2].first;
1005 last_trace = t;
1006
1007 /* Find the successor traces. */
1008 while (1)
1009 {
1010 /* Find the continuation of the chain. */
1011 edge_iterator ei;
1012 best = NULL;
1013 best_len = 0;
1014 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1015 {
1016 int di = e->dest->index;
1017
1018 if (e->dest != EXIT_BLOCK_PTR
1019 && (e->flags & EDGE_CAN_FALLTHRU)
1020 && !(e->flags & EDGE_COMPLEX)
1021 && bbd[di].start_of_trace >= 0
1022 && !connected[bbd[di].start_of_trace]
1023 && (BB_PARTITION (e->dest) == current_partition)
1024 && (!best
1025 || e->probability > best->probability
1026 || (e->probability == best->probability
1027 && traces[bbd[di].start_of_trace].length > best_len)))
1028 {
1029 best = e;
1030 best_len = traces[bbd[di].start_of_trace].length;
1031 }
1032 }
1033
1034 if (best)
1035 {
1036 if (dump_file)
1037 {
1038 fprintf (dump_file, "Connection: %d %d\n",
1039 best->src->index, best->dest->index);
1040 }
1041 t = bbd[best->dest->index].start_of_trace;
1042 traces[last_trace].last->aux = traces[t].first;
1043 connected[t] = true;
1044 last_trace = t;
1045 }
1046 else
1047 {
1048 /* Try to connect the traces by duplication of 1 block. */
1049 edge e2;
1050 basic_block next_bb = NULL;
1051 bool try_copy = false;
1052
1053 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1054 if (e->dest != EXIT_BLOCK_PTR
1055 && (e->flags & EDGE_CAN_FALLTHRU)
1056 && !(e->flags & EDGE_COMPLEX)
1057 && (!best || e->probability > best->probability))
1058 {
1059 edge_iterator ei;
1060 edge best2 = NULL;
1061 int best2_len = 0;
1062
1063 /* If the destination is a start of a trace which is only
1064 one block long, then no need to search the successor
1065 blocks of the trace. Accept it. */
1066 if (bbd[e->dest->index].start_of_trace >= 0
1067 && traces[bbd[e->dest->index].start_of_trace].length
1068 == 1)
1069 {
1070 best = e;
1071 try_copy = true;
1072 continue;
1073 }
1074
1075 FOR_EACH_EDGE (e2, ei, e->dest->succs)
1076 {
1077 int di = e2->dest->index;
1078
1079 if (e2->dest == EXIT_BLOCK_PTR
1080 || ((e2->flags & EDGE_CAN_FALLTHRU)
1081 && !(e2->flags & EDGE_COMPLEX)
1082 && bbd[di].start_of_trace >= 0
1083 && !connected[bbd[di].start_of_trace]
1084 && (BB_PARTITION (e2->dest) == current_partition)
1085 && (EDGE_FREQUENCY (e2) >= freq_threshold)
1086 && (e2->count >= count_threshold)
1087 && (!best2
1088 || e2->probability > best2->probability
1089 || (e2->probability == best2->probability
1090 && traces[bbd[di].start_of_trace].length
1091 > best2_len))))
1092 {
1093 best = e;
1094 best2 = e2;
1095 if (e2->dest != EXIT_BLOCK_PTR)
1096 best2_len = traces[bbd[di].start_of_trace].length;
1097 else
1098 best2_len = INT_MAX;
1099 next_bb = e2->dest;
1100 try_copy = true;
1101 }
1102 }
1103 }
1104
1105 if (flag_reorder_blocks_and_partition)
1106 try_copy = false;
1107
1108 /* Copy tiny blocks always; copy larger blocks only when the
1109 edge is traversed frequently enough. */
1110 if (try_copy
1111 && copy_bb_p (best->dest,
1112 optimize_edge_for_speed_p (best)
1113 && EDGE_FREQUENCY (best) >= freq_threshold
1114 && best->count >= count_threshold))
1115 {
1116 basic_block new_bb;
1117
1118 if (dump_file)
1119 {
1120 fprintf (dump_file, "Connection: %d %d ",
1121 traces[t].last->index, best->dest->index);
1122 if (!next_bb)
1123 fputc ('\n', dump_file);
1124 else if (next_bb == EXIT_BLOCK_PTR)
1125 fprintf (dump_file, "exit\n");
1126 else
1127 fprintf (dump_file, "%d\n", next_bb->index);
1128 }
1129
1130 new_bb = copy_bb (best->dest, best, traces[t].last, t);
1131 traces[t].last = new_bb;
1132 if (next_bb && next_bb != EXIT_BLOCK_PTR)
1133 {
1134 t = bbd[next_bb->index].start_of_trace;
1135 traces[last_trace].last->aux = traces[t].first;
1136 connected[t] = true;
1137 last_trace = t;
1138 }
1139 else
1140 break; /* Stop finding the successor traces. */
1141 }
1142 else
1143 break; /* Stop finding the successor traces. */
1144 }
1145 }
1146 }
1147
1148 if (dump_file)
1149 {
1150 basic_block bb;
1151
1152 fprintf (dump_file, "Final order:\n");
1153 for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1154 fprintf (dump_file, "%d ", bb->index);
1155 fprintf (dump_file, "\n");
1156 fflush (dump_file);
1157 }
1158
1159 FREE (connected);
1160 }
1161
1162 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1163 when code size is allowed to grow by duplication. */
1164
1165 static bool
1166 copy_bb_p (const_basic_block bb, int code_may_grow)
1167 {
1168 int size = 0;
1169 int max_size = uncond_jump_length;
1170 rtx insn;
1171
1172 if (!bb->frequency)
1173 return false;
1174 if (EDGE_COUNT (bb->preds) < 2)
1175 return false;
1176 if (!can_duplicate_block_p (bb))
1177 return false;
1178
1179 /* Avoid duplicating blocks which have many successors (PR/13430). */
1180 if (EDGE_COUNT (bb->succs) > 8)
1181 return false;
1182
1183 if (code_may_grow && optimize_bb_for_speed_p (bb))
1184 max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1185
1186 FOR_BB_INSNS (bb, insn)
1187 {
1188 if (INSN_P (insn))
1189 size += get_attr_min_length (insn);
1190 }
1191
1192 if (size <= max_size)
1193 return true;
1194
1195 if (dump_file)
1196 {
1197 fprintf (dump_file,
1198 "Block %d can't be copied because its size = %d.\n",
1199 bb->index, size);
1200 }
1201
1202 return false;
1203 }
1204
1205 /* Return the length of unconditional jump instruction. */
1206
1207 int
1208 get_uncond_jump_length (void)
1209 {
1210 rtx label, jump;
1211 int length;
1212
1213 label = emit_label_before (gen_label_rtx (), get_insns ());
1214 jump = emit_jump_insn (gen_jump (label));
1215
1216 length = get_attr_min_length (jump);
1217
1218 delete_insn (jump);
1219 delete_insn (label);
1220 return length;
1221 }
1222
1223 /* Emit a barrier into the footer of BB. */
1224
1225 static void
1226 emit_barrier_after_bb (basic_block bb)
1227 {
1228 rtx barrier = emit_barrier_after (BB_END (bb));
1229 BB_FOOTER (bb) = unlink_insn_chain (barrier, barrier);
1230 }
1231
1232 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1233 Duplicate the landing pad and split the edges so that no EH edge
1234 crosses partitions. */
1235
1236 static void
1237 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1238 {
1239 eh_landing_pad new_lp;
1240 basic_block new_bb, last_bb, post_bb;
1241 rtx new_label, jump, post_label;
1242 unsigned new_partition;
1243 edge_iterator ei;
1244 edge e;
1245
1246 /* Generate the new landing-pad structure. */
1247 new_lp = gen_eh_landing_pad (old_lp->region);
1248 new_lp->post_landing_pad = old_lp->post_landing_pad;
1249 new_lp->landing_pad = gen_label_rtx ();
1250 LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1251
1252 /* Put appropriate instructions in new bb. */
1253 new_label = emit_label (new_lp->landing_pad);
1254
1255 expand_dw2_landing_pad_for_region (old_lp->region);
1256
1257 post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1258 post_bb = single_succ (post_bb);
1259 post_label = block_label (post_bb);
1260 jump = emit_jump_insn (gen_jump (post_label));
1261 JUMP_LABEL (jump) = post_label;
1262
1263 /* Create new basic block to be dest for lp. */
1264 last_bb = EXIT_BLOCK_PTR->prev_bb;
1265 new_bb = create_basic_block (new_label, jump, last_bb);
1266 new_bb->aux = last_bb->aux;
1267 last_bb->aux = new_bb;
1268
1269 emit_barrier_after_bb (new_bb);
1270
1271 make_edge (new_bb, post_bb, 0);
1272
1273 /* Make sure new bb is in the other partition. */
1274 new_partition = BB_PARTITION (old_bb);
1275 new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1276 BB_SET_PARTITION (new_bb, new_partition);
1277
1278 /* Fix up the edges. */
1279 for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1280 if (BB_PARTITION (e->src) == new_partition)
1281 {
1282 rtx insn = BB_END (e->src);
1283 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1284
1285 gcc_assert (note != NULL);
1286 gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1287 XEXP (note, 0) = GEN_INT (new_lp->index);
1288
1289 /* Adjust the edge to the new destination. */
1290 redirect_edge_succ (e, new_bb);
1291 }
1292 else
1293 ei_next (&ei);
1294 }
1295
1296 /* Find the basic blocks that are rarely executed and need to be moved to
1297 a separate section of the .o file (to cut down on paging and improve
1298 cache locality). Return a vector of all edges that cross. */
1299
1300 static VEC(edge, heap) *
1301 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1302 {
1303 VEC(edge, heap) *crossing_edges = NULL;
1304 basic_block bb;
1305 edge e;
1306 edge_iterator ei;
1307
1308 /* Mark which partition (hot/cold) each basic block belongs in. */
1309 FOR_EACH_BB (bb)
1310 {
1311 if (probably_never_executed_bb_p (bb))
1312 BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1313 else
1314 BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1315 }
1316
1317 /* The format of .gcc_except_table does not allow landing pads to
1318 be in a different partition as the throw. Fix this by either
1319 moving or duplicating the landing pads. */
1320 if (cfun->eh->lp_array)
1321 {
1322 unsigned i;
1323 eh_landing_pad lp;
1324
1325 FOR_EACH_VEC_ELT (eh_landing_pad, cfun->eh->lp_array, i, lp)
1326 {
1327 bool all_same, all_diff;
1328
1329 if (lp == NULL
1330 || lp->landing_pad == NULL_RTX
1331 || !LABEL_P (lp->landing_pad))
1332 continue;
1333
1334 all_same = all_diff = true;
1335 bb = BLOCK_FOR_INSN (lp->landing_pad);
1336 FOR_EACH_EDGE (e, ei, bb->preds)
1337 {
1338 gcc_assert (e->flags & EDGE_EH);
1339 if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1340 all_diff = false;
1341 else
1342 all_same = false;
1343 }
1344
1345 if (all_same)
1346 ;
1347 else if (all_diff)
1348 {
1349 int which = BB_PARTITION (bb);
1350 which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1351 BB_SET_PARTITION (bb, which);
1352 }
1353 else
1354 fix_up_crossing_landing_pad (lp, bb);
1355 }
1356 }
1357
1358 /* Mark every edge that crosses between sections. */
1359
1360 FOR_EACH_BB (bb)
1361 FOR_EACH_EDGE (e, ei, bb->succs)
1362 {
1363 unsigned int flags = e->flags;
1364
1365 /* We should never have EDGE_CROSSING set yet. */
1366 gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1367
1368 if (e->src != ENTRY_BLOCK_PTR
1369 && e->dest != EXIT_BLOCK_PTR
1370 && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1371 {
1372 VEC_safe_push (edge, heap, crossing_edges, e);
1373 flags |= EDGE_CROSSING;
1374 }
1375
1376 /* Now that we've split eh edges as appropriate, allow landing pads
1377 to be merged with the post-landing pads. */
1378 flags &= ~EDGE_PRESERVE;
1379
1380 e->flags = flags;
1381 }
1382
1383 return crossing_edges;
1384 }
1385
1386 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru. */
1387
1388 static void
1389 set_edge_can_fallthru_flag (void)
1390 {
1391 basic_block bb;
1392
1393 FOR_EACH_BB (bb)
1394 {
1395 edge e;
1396 edge_iterator ei;
1397
1398 FOR_EACH_EDGE (e, ei, bb->succs)
1399 {
1400 e->flags &= ~EDGE_CAN_FALLTHRU;
1401
1402 /* The FALLTHRU edge is also CAN_FALLTHRU edge. */
1403 if (e->flags & EDGE_FALLTHRU)
1404 e->flags |= EDGE_CAN_FALLTHRU;
1405 }
1406
1407 /* If the BB ends with an invertible condjump all (2) edges are
1408 CAN_FALLTHRU edges. */
1409 if (EDGE_COUNT (bb->succs) != 2)
1410 continue;
1411 if (!any_condjump_p (BB_END (bb)))
1412 continue;
1413 if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1414 continue;
1415 invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1416 EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1417 EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1418 }
1419 }
1420
1421 /* If any destination of a crossing edge does not have a label, add label;
1422 Convert any easy fall-through crossing edges to unconditional jumps. */
1423
1424 static void
1425 add_labels_and_missing_jumps (VEC(edge, heap) *crossing_edges)
1426 {
1427 size_t i;
1428 edge e;
1429
1430 FOR_EACH_VEC_ELT (edge, crossing_edges, i, e)
1431 {
1432 basic_block src = e->src;
1433 basic_block dest = e->dest;
1434 rtx label, new_jump;
1435
1436 if (dest == EXIT_BLOCK_PTR)
1437 continue;
1438
1439 /* Make sure dest has a label. */
1440 label = block_label (dest);
1441
1442 /* Nothing to do for non-fallthru edges. */
1443 if (src == ENTRY_BLOCK_PTR)
1444 continue;
1445 if ((e->flags & EDGE_FALLTHRU) == 0)
1446 continue;
1447
1448 /* If the block does not end with a control flow insn, then we
1449 can trivially add a jump to the end to fixup the crossing.
1450 Otherwise the jump will have to go in a new bb, which will
1451 be handled by fix_up_fall_thru_edges function. */
1452 if (control_flow_insn_p (BB_END (src)))
1453 continue;
1454
1455 /* Make sure there's only one successor. */
1456 gcc_assert (single_succ_p (src));
1457
1458 new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1459 BB_END (src) = new_jump;
1460 JUMP_LABEL (new_jump) = label;
1461 LABEL_NUSES (label) += 1;
1462
1463 emit_barrier_after_bb (src);
1464
1465 /* Mark edge as non-fallthru. */
1466 e->flags &= ~EDGE_FALLTHRU;
1467 }
1468 }
1469
1470 /* Find any bb's where the fall-through edge is a crossing edge (note that
1471 these bb's must also contain a conditional jump or end with a call
1472 instruction; we've already dealt with fall-through edges for blocks
1473 that didn't have a conditional jump or didn't end with call instruction
1474 in the call to add_labels_and_missing_jumps). Convert the fall-through
1475 edge to non-crossing edge by inserting a new bb to fall-through into.
1476 The new bb will contain an unconditional jump (crossing edge) to the
1477 original fall through destination. */
1478
1479 static void
1480 fix_up_fall_thru_edges (void)
1481 {
1482 basic_block cur_bb;
1483 basic_block new_bb;
1484 edge succ1;
1485 edge succ2;
1486 edge fall_thru;
1487 edge cond_jump = NULL;
1488 edge e;
1489 bool cond_jump_crosses;
1490 int invert_worked;
1491 rtx old_jump;
1492 rtx fall_thru_label;
1493
1494 FOR_EACH_BB (cur_bb)
1495 {
1496 fall_thru = NULL;
1497 if (EDGE_COUNT (cur_bb->succs) > 0)
1498 succ1 = EDGE_SUCC (cur_bb, 0);
1499 else
1500 succ1 = NULL;
1501
1502 if (EDGE_COUNT (cur_bb->succs) > 1)
1503 succ2 = EDGE_SUCC (cur_bb, 1);
1504 else
1505 succ2 = NULL;
1506
1507 /* Find the fall-through edge. */
1508
1509 if (succ1
1510 && (succ1->flags & EDGE_FALLTHRU))
1511 {
1512 fall_thru = succ1;
1513 cond_jump = succ2;
1514 }
1515 else if (succ2
1516 && (succ2->flags & EDGE_FALLTHRU))
1517 {
1518 fall_thru = succ2;
1519 cond_jump = succ1;
1520 }
1521 else if (succ1
1522 && (block_ends_with_call_p (cur_bb)
1523 || can_throw_internal (BB_END (cur_bb))))
1524 {
1525 edge e;
1526 edge_iterator ei;
1527
1528 /* Find EDGE_CAN_FALLTHRU edge. */
1529 FOR_EACH_EDGE (e, ei, cur_bb->succs)
1530 if (e->flags & EDGE_CAN_FALLTHRU)
1531 {
1532 fall_thru = e;
1533 break;
1534 }
1535 }
1536
1537 if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1538 {
1539 /* Check to see if the fall-thru edge is a crossing edge. */
1540
1541 if (fall_thru->flags & EDGE_CROSSING)
1542 {
1543 /* The fall_thru edge crosses; now check the cond jump edge, if
1544 it exists. */
1545
1546 cond_jump_crosses = true;
1547 invert_worked = 0;
1548 old_jump = BB_END (cur_bb);
1549
1550 /* Find the jump instruction, if there is one. */
1551
1552 if (cond_jump)
1553 {
1554 if (!(cond_jump->flags & EDGE_CROSSING))
1555 cond_jump_crosses = false;
1556
1557 /* We know the fall-thru edge crosses; if the cond
1558 jump edge does NOT cross, and its destination is the
1559 next block in the bb order, invert the jump
1560 (i.e. fix it so the fall through does not cross and
1561 the cond jump does). */
1562
1563 if (!cond_jump_crosses
1564 && cur_bb->aux == cond_jump->dest)
1565 {
1566 /* Find label in fall_thru block. We've already added
1567 any missing labels, so there must be one. */
1568
1569 fall_thru_label = block_label (fall_thru->dest);
1570
1571 if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1572 invert_worked = invert_jump (old_jump,
1573 fall_thru_label,0);
1574 if (invert_worked)
1575 {
1576 fall_thru->flags &= ~EDGE_FALLTHRU;
1577 cond_jump->flags |= EDGE_FALLTHRU;
1578 update_br_prob_note (cur_bb);
1579 e = fall_thru;
1580 fall_thru = cond_jump;
1581 cond_jump = e;
1582 cond_jump->flags |= EDGE_CROSSING;
1583 fall_thru->flags &= ~EDGE_CROSSING;
1584 }
1585 }
1586 }
1587
1588 if (cond_jump_crosses || !invert_worked)
1589 {
1590 /* This is the case where both edges out of the basic
1591 block are crossing edges. Here we will fix up the
1592 fall through edge. The jump edge will be taken care
1593 of later. The EDGE_CROSSING flag of fall_thru edge
1594 is unset before the call to force_nonfallthru
1595 function because if a new basic-block is created
1596 this edge remains in the current section boundary
1597 while the edge between new_bb and the fall_thru->dest
1598 becomes EDGE_CROSSING. */
1599
1600 fall_thru->flags &= ~EDGE_CROSSING;
1601 new_bb = force_nonfallthru (fall_thru);
1602
1603 if (new_bb)
1604 {
1605 new_bb->aux = cur_bb->aux;
1606 cur_bb->aux = new_bb;
1607
1608 /* Make sure new fall-through bb is in same
1609 partition as bb it's falling through from. */
1610
1611 BB_COPY_PARTITION (new_bb, cur_bb);
1612 single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1613 }
1614 else
1615 {
1616 /* If a new basic-block was not created; restore
1617 the EDGE_CROSSING flag. */
1618 fall_thru->flags |= EDGE_CROSSING;
1619 }
1620
1621 /* Add barrier after new jump */
1622 emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1623 }
1624 }
1625 }
1626 }
1627 }
1628
1629 /* This function checks the destination block of a "crossing jump" to
1630 see if it has any crossing predecessors that begin with a code label
1631 and end with an unconditional jump. If so, it returns that predecessor
1632 block. (This is to avoid creating lots of new basic blocks that all
1633 contain unconditional jumps to the same destination). */
1634
1635 static basic_block
1636 find_jump_block (basic_block jump_dest)
1637 {
1638 basic_block source_bb = NULL;
1639 edge e;
1640 rtx insn;
1641 edge_iterator ei;
1642
1643 FOR_EACH_EDGE (e, ei, jump_dest->preds)
1644 if (e->flags & EDGE_CROSSING)
1645 {
1646 basic_block src = e->src;
1647
1648 /* Check each predecessor to see if it has a label, and contains
1649 only one executable instruction, which is an unconditional jump.
1650 If so, we can use it. */
1651
1652 if (LABEL_P (BB_HEAD (src)))
1653 for (insn = BB_HEAD (src);
1654 !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1655 insn = NEXT_INSN (insn))
1656 {
1657 if (INSN_P (insn)
1658 && insn == BB_END (src)
1659 && JUMP_P (insn)
1660 && !any_condjump_p (insn))
1661 {
1662 source_bb = src;
1663 break;
1664 }
1665 }
1666
1667 if (source_bb)
1668 break;
1669 }
1670
1671 return source_bb;
1672 }
1673
1674 /* Find all BB's with conditional jumps that are crossing edges;
1675 insert a new bb and make the conditional jump branch to the new
1676 bb instead (make the new bb same color so conditional branch won't
1677 be a 'crossing' edge). Insert an unconditional jump from the
1678 new bb to the original destination of the conditional jump. */
1679
1680 static void
1681 fix_crossing_conditional_branches (void)
1682 {
1683 basic_block cur_bb;
1684 basic_block new_bb;
1685 basic_block dest;
1686 edge succ1;
1687 edge succ2;
1688 edge crossing_edge;
1689 edge new_edge;
1690 rtx old_jump;
1691 rtx set_src;
1692 rtx old_label = NULL_RTX;
1693 rtx new_label;
1694
1695 FOR_EACH_BB (cur_bb)
1696 {
1697 crossing_edge = NULL;
1698 if (EDGE_COUNT (cur_bb->succs) > 0)
1699 succ1 = EDGE_SUCC (cur_bb, 0);
1700 else
1701 succ1 = NULL;
1702
1703 if (EDGE_COUNT (cur_bb->succs) > 1)
1704 succ2 = EDGE_SUCC (cur_bb, 1);
1705 else
1706 succ2 = NULL;
1707
1708 /* We already took care of fall-through edges, so only one successor
1709 can be a crossing edge. */
1710
1711 if (succ1 && (succ1->flags & EDGE_CROSSING))
1712 crossing_edge = succ1;
1713 else if (succ2 && (succ2->flags & EDGE_CROSSING))
1714 crossing_edge = succ2;
1715
1716 if (crossing_edge)
1717 {
1718 old_jump = BB_END (cur_bb);
1719
1720 /* Check to make sure the jump instruction is a
1721 conditional jump. */
1722
1723 set_src = NULL_RTX;
1724
1725 if (any_condjump_p (old_jump))
1726 {
1727 if (GET_CODE (PATTERN (old_jump)) == SET)
1728 set_src = SET_SRC (PATTERN (old_jump));
1729 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1730 {
1731 set_src = XVECEXP (PATTERN (old_jump), 0,0);
1732 if (GET_CODE (set_src) == SET)
1733 set_src = SET_SRC (set_src);
1734 else
1735 set_src = NULL_RTX;
1736 }
1737 }
1738
1739 if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1740 {
1741 if (GET_CODE (XEXP (set_src, 1)) == PC)
1742 old_label = XEXP (set_src, 2);
1743 else if (GET_CODE (XEXP (set_src, 2)) == PC)
1744 old_label = XEXP (set_src, 1);
1745
1746 /* Check to see if new bb for jumping to that dest has
1747 already been created; if so, use it; if not, create
1748 a new one. */
1749
1750 new_bb = find_jump_block (crossing_edge->dest);
1751
1752 if (new_bb)
1753 new_label = block_label (new_bb);
1754 else
1755 {
1756 basic_block last_bb;
1757 rtx new_jump;
1758
1759 /* Create new basic block to be dest for
1760 conditional jump. */
1761
1762 /* Put appropriate instructions in new bb. */
1763
1764 new_label = gen_label_rtx ();
1765 emit_label (new_label);
1766
1767 gcc_assert (GET_CODE (old_label) == LABEL_REF);
1768 old_label = JUMP_LABEL (old_jump);
1769 new_jump = emit_jump_insn (gen_jump (old_label));
1770 JUMP_LABEL (new_jump) = old_label;
1771
1772 last_bb = EXIT_BLOCK_PTR->prev_bb;
1773 new_bb = create_basic_block (new_label, new_jump, last_bb);
1774 new_bb->aux = last_bb->aux;
1775 last_bb->aux = new_bb;
1776
1777 emit_barrier_after_bb (new_bb);
1778
1779 /* Make sure new bb is in same partition as source
1780 of conditional branch. */
1781 BB_COPY_PARTITION (new_bb, cur_bb);
1782 }
1783
1784 /* Make old jump branch to new bb. */
1785
1786 redirect_jump (old_jump, new_label, 0);
1787
1788 /* Remove crossing_edge as predecessor of 'dest'. */
1789
1790 dest = crossing_edge->dest;
1791
1792 redirect_edge_succ (crossing_edge, new_bb);
1793
1794 /* Make a new edge from new_bb to old dest; new edge
1795 will be a successor for new_bb and a predecessor
1796 for 'dest'. */
1797
1798 if (EDGE_COUNT (new_bb->succs) == 0)
1799 new_edge = make_edge (new_bb, dest, 0);
1800 else
1801 new_edge = EDGE_SUCC (new_bb, 0);
1802
1803 crossing_edge->flags &= ~EDGE_CROSSING;
1804 new_edge->flags |= EDGE_CROSSING;
1805 }
1806 }
1807 }
1808 }
1809
1810 /* Find any unconditional branches that cross between hot and cold
1811 sections. Convert them into indirect jumps instead. */
1812
1813 static void
1814 fix_crossing_unconditional_branches (void)
1815 {
1816 basic_block cur_bb;
1817 rtx last_insn;
1818 rtx label;
1819 rtx label_addr;
1820 rtx indirect_jump_sequence;
1821 rtx jump_insn = NULL_RTX;
1822 rtx new_reg;
1823 rtx cur_insn;
1824 edge succ;
1825
1826 FOR_EACH_BB (cur_bb)
1827 {
1828 last_insn = BB_END (cur_bb);
1829
1830 if (EDGE_COUNT (cur_bb->succs) < 1)
1831 continue;
1832
1833 succ = EDGE_SUCC (cur_bb, 0);
1834
1835 /* Check to see if bb ends in a crossing (unconditional) jump. At
1836 this point, no crossing jumps should be conditional. */
1837
1838 if (JUMP_P (last_insn)
1839 && (succ->flags & EDGE_CROSSING))
1840 {
1841 rtx label2, table;
1842
1843 gcc_assert (!any_condjump_p (last_insn));
1844
1845 /* Make sure the jump is not already an indirect or table jump. */
1846
1847 if (!computed_jump_p (last_insn)
1848 && !tablejump_p (last_insn, &label2, &table))
1849 {
1850 /* We have found a "crossing" unconditional branch. Now
1851 we must convert it to an indirect jump. First create
1852 reference of label, as target for jump. */
1853
1854 label = JUMP_LABEL (last_insn);
1855 label_addr = gen_rtx_LABEL_REF (Pmode, label);
1856 LABEL_NUSES (label) += 1;
1857
1858 /* Get a register to use for the indirect jump. */
1859
1860 new_reg = gen_reg_rtx (Pmode);
1861
1862 /* Generate indirect the jump sequence. */
1863
1864 start_sequence ();
1865 emit_move_insn (new_reg, label_addr);
1866 emit_indirect_jump (new_reg);
1867 indirect_jump_sequence = get_insns ();
1868 end_sequence ();
1869
1870 /* Make sure every instruction in the new jump sequence has
1871 its basic block set to be cur_bb. */
1872
1873 for (cur_insn = indirect_jump_sequence; cur_insn;
1874 cur_insn = NEXT_INSN (cur_insn))
1875 {
1876 if (!BARRIER_P (cur_insn))
1877 BLOCK_FOR_INSN (cur_insn) = cur_bb;
1878 if (JUMP_P (cur_insn))
1879 jump_insn = cur_insn;
1880 }
1881
1882 /* Insert the new (indirect) jump sequence immediately before
1883 the unconditional jump, then delete the unconditional jump. */
1884
1885 emit_insn_before (indirect_jump_sequence, last_insn);
1886 delete_insn (last_insn);
1887
1888 /* Make BB_END for cur_bb be the jump instruction (NOT the
1889 barrier instruction at the end of the sequence...). */
1890
1891 BB_END (cur_bb) = jump_insn;
1892 }
1893 }
1894 }
1895 }
1896
1897 /* Add REG_CROSSING_JUMP note to all crossing jump insns. */
1898
1899 static void
1900 add_reg_crossing_jump_notes (void)
1901 {
1902 basic_block bb;
1903 edge e;
1904 edge_iterator ei;
1905
1906 FOR_EACH_BB (bb)
1907 FOR_EACH_EDGE (e, ei, bb->succs)
1908 if ((e->flags & EDGE_CROSSING)
1909 && JUMP_P (BB_END (e->src)))
1910 add_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX);
1911 }
1912
1913 /* Verify, in the basic block chain, that there is at most one switch
1914 between hot/cold partitions. This is modelled on
1915 rtl_verify_flow_info_1, but it cannot go inside that function
1916 because this condition will not be true until after
1917 reorder_basic_blocks is called. */
1918
1919 static void
1920 verify_hot_cold_block_grouping (void)
1921 {
1922 basic_block bb;
1923 int err = 0;
1924 bool switched_sections = false;
1925 int current_partition = 0;
1926
1927 FOR_EACH_BB (bb)
1928 {
1929 if (!current_partition)
1930 current_partition = BB_PARTITION (bb);
1931 if (BB_PARTITION (bb) != current_partition)
1932 {
1933 if (switched_sections)
1934 {
1935 error ("multiple hot/cold transitions found (bb %i)",
1936 bb->index);
1937 err = 1;
1938 }
1939 else
1940 {
1941 switched_sections = true;
1942 current_partition = BB_PARTITION (bb);
1943 }
1944 }
1945 }
1946
1947 gcc_assert(!err);
1948 }
1949
1950 /* Reorder basic blocks. The main entry point to this file. FLAGS is
1951 the set of flags to pass to cfg_layout_initialize(). */
1952
1953 static void
1954 reorder_basic_blocks (void)
1955 {
1956 int n_traces;
1957 int i;
1958 struct trace *traces;
1959
1960 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
1961
1962 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
1963 return;
1964
1965 set_edge_can_fallthru_flag ();
1966 mark_dfs_back_edges ();
1967
1968 /* We are estimating the length of uncond jump insn only once since the code
1969 for getting the insn length always returns the minimal length now. */
1970 if (uncond_jump_length == 0)
1971 uncond_jump_length = get_uncond_jump_length ();
1972
1973 /* We need to know some information for each basic block. */
1974 array_size = GET_ARRAY_SIZE (last_basic_block);
1975 bbd = XNEWVEC (bbro_basic_block_data, array_size);
1976 for (i = 0; i < array_size; i++)
1977 {
1978 bbd[i].start_of_trace = -1;
1979 bbd[i].end_of_trace = -1;
1980 bbd[i].in_trace = -1;
1981 bbd[i].visited = 0;
1982 bbd[i].heap = NULL;
1983 bbd[i].node = NULL;
1984 }
1985
1986 traces = XNEWVEC (struct trace, n_basic_blocks);
1987 n_traces = 0;
1988 find_traces (&n_traces, traces);
1989 connect_traces (n_traces, traces);
1990 FREE (traces);
1991 FREE (bbd);
1992
1993 relink_block_chain (/*stay_in_cfglayout_mode=*/true);
1994
1995 if (dump_file)
1996 {
1997 if (dump_flags & TDF_DETAILS)
1998 dump_reg_info (dump_file);
1999 dump_flow_info (dump_file, dump_flags);
2000 }
2001
2002 if (flag_reorder_blocks_and_partition)
2003 verify_hot_cold_block_grouping ();
2004 }
2005
2006 /* Determine which partition the first basic block in the function
2007 belongs to, then find the first basic block in the current function
2008 that belongs to a different section, and insert a
2009 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2010 instruction stream. When writing out the assembly code,
2011 encountering this note will make the compiler switch between the
2012 hot and cold text sections. */
2013
2014 static void
2015 insert_section_boundary_note (void)
2016 {
2017 basic_block bb;
2018 rtx new_note;
2019 int first_partition = 0;
2020
2021 if (!flag_reorder_blocks_and_partition)
2022 return;
2023
2024 FOR_EACH_BB (bb)
2025 {
2026 if (!first_partition)
2027 first_partition = BB_PARTITION (bb);
2028 if (BB_PARTITION (bb) != first_partition)
2029 {
2030 new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
2031 BB_HEAD (bb));
2032 /* ??? This kind of note always lives between basic blocks,
2033 but add_insn_before will set BLOCK_FOR_INSN anyway. */
2034 BLOCK_FOR_INSN (new_note) = NULL;
2035 break;
2036 }
2037 }
2038 }
2039
2040 /* Duplicate the blocks containing computed gotos. This basically unfactors
2041 computed gotos that were factored early on in the compilation process to
2042 speed up edge based data flow. We used to not unfactoring them again,
2043 which can seriously pessimize code with many computed jumps in the source
2044 code, such as interpreters. See e.g. PR15242. */
2045
2046 static bool
2047 gate_duplicate_computed_gotos (void)
2048 {
2049 if (targetm.cannot_modify_jumps_p ())
2050 return false;
2051 return (optimize > 0
2052 && flag_expensive_optimizations
2053 && ! optimize_function_for_size_p (cfun));
2054 }
2055
2056
2057 static unsigned int
2058 duplicate_computed_gotos (void)
2059 {
2060 basic_block bb, new_bb;
2061 bitmap candidates;
2062 int max_size;
2063
2064 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2065 return 0;
2066
2067 clear_bb_flags ();
2068 cfg_layout_initialize (0);
2069
2070 /* We are estimating the length of uncond jump insn only once
2071 since the code for getting the insn length always returns
2072 the minimal length now. */
2073 if (uncond_jump_length == 0)
2074 uncond_jump_length = get_uncond_jump_length ();
2075
2076 max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2077 candidates = BITMAP_ALLOC (NULL);
2078
2079 /* Look for blocks that end in a computed jump, and see if such blocks
2080 are suitable for unfactoring. If a block is a candidate for unfactoring,
2081 mark it in the candidates. */
2082 FOR_EACH_BB (bb)
2083 {
2084 rtx insn;
2085 edge e;
2086 edge_iterator ei;
2087 int size, all_flags;
2088
2089 /* Build the reorder chain for the original order of blocks. */
2090 if (bb->next_bb != EXIT_BLOCK_PTR)
2091 bb->aux = bb->next_bb;
2092
2093 /* Obviously the block has to end in a computed jump. */
2094 if (!computed_jump_p (BB_END (bb)))
2095 continue;
2096
2097 /* Only consider blocks that can be duplicated. */
2098 if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2099 || !can_duplicate_block_p (bb))
2100 continue;
2101
2102 /* Make sure that the block is small enough. */
2103 size = 0;
2104 FOR_BB_INSNS (bb, insn)
2105 if (INSN_P (insn))
2106 {
2107 size += get_attr_min_length (insn);
2108 if (size > max_size)
2109 break;
2110 }
2111 if (size > max_size)
2112 continue;
2113
2114 /* Final check: there must not be any incoming abnormal edges. */
2115 all_flags = 0;
2116 FOR_EACH_EDGE (e, ei, bb->preds)
2117 all_flags |= e->flags;
2118 if (all_flags & EDGE_COMPLEX)
2119 continue;
2120
2121 bitmap_set_bit (candidates, bb->index);
2122 }
2123
2124 /* Nothing to do if there is no computed jump here. */
2125 if (bitmap_empty_p (candidates))
2126 goto done;
2127
2128 /* Duplicate computed gotos. */
2129 FOR_EACH_BB (bb)
2130 {
2131 if (bb->flags & BB_VISITED)
2132 continue;
2133
2134 bb->flags |= BB_VISITED;
2135
2136 /* BB must have one outgoing edge. That edge must not lead to
2137 the exit block or the next block.
2138 The destination must have more than one predecessor. */
2139 if (!single_succ_p (bb)
2140 || single_succ (bb) == EXIT_BLOCK_PTR
2141 || single_succ (bb) == bb->next_bb
2142 || single_pred_p (single_succ (bb)))
2143 continue;
2144
2145 /* The successor block has to be a duplication candidate. */
2146 if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2147 continue;
2148
2149 new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2150 new_bb->aux = bb->aux;
2151 bb->aux = new_bb;
2152 new_bb->flags |= BB_VISITED;
2153 }
2154
2155 done:
2156 cfg_layout_finalize ();
2157
2158 BITMAP_FREE (candidates);
2159 return 0;
2160 }
2161
2162 struct rtl_opt_pass pass_duplicate_computed_gotos =
2163 {
2164 {
2165 RTL_PASS,
2166 "compgotos", /* name */
2167 gate_duplicate_computed_gotos, /* gate */
2168 duplicate_computed_gotos, /* execute */
2169 NULL, /* sub */
2170 NULL, /* next */
2171 0, /* static_pass_number */
2172 TV_REORDER_BLOCKS, /* tv_id */
2173 0, /* properties_required */
2174 0, /* properties_provided */
2175 0, /* properties_destroyed */
2176 0, /* todo_flags_start */
2177 TODO_verify_rtl_sharing,/* todo_flags_finish */
2178 }
2179 };
2180
2181
2182 /* This function is the main 'entrance' for the optimization that
2183 partitions hot and cold basic blocks into separate sections of the
2184 .o file (to improve performance and cache locality). Ideally it
2185 would be called after all optimizations that rearrange the CFG have
2186 been called. However part of this optimization may introduce new
2187 register usage, so it must be called before register allocation has
2188 occurred. This means that this optimization is actually called
2189 well before the optimization that reorders basic blocks (see
2190 function above).
2191
2192 This optimization checks the feedback information to determine
2193 which basic blocks are hot/cold, updates flags on the basic blocks
2194 to indicate which section they belong in. This information is
2195 later used for writing out sections in the .o file. Because hot
2196 and cold sections can be arbitrarily large (within the bounds of
2197 memory), far beyond the size of a single function, it is necessary
2198 to fix up all edges that cross section boundaries, to make sure the
2199 instructions used can actually span the required distance. The
2200 fixes are described below.
2201
2202 Fall-through edges must be changed into jumps; it is not safe or
2203 legal to fall through across a section boundary. Whenever a
2204 fall-through edge crossing a section boundary is encountered, a new
2205 basic block is inserted (in the same section as the fall-through
2206 source), and the fall through edge is redirected to the new basic
2207 block. The new basic block contains an unconditional jump to the
2208 original fall-through target. (If the unconditional jump is
2209 insufficient to cross section boundaries, that is dealt with a
2210 little later, see below).
2211
2212 In order to deal with architectures that have short conditional
2213 branches (which cannot span all of memory) we take any conditional
2214 jump that attempts to cross a section boundary and add a level of
2215 indirection: it becomes a conditional jump to a new basic block, in
2216 the same section. The new basic block contains an unconditional
2217 jump to the original target, in the other section.
2218
2219 For those architectures whose unconditional branch is also
2220 incapable of reaching all of memory, those unconditional jumps are
2221 converted into indirect jumps, through a register.
2222
2223 IMPORTANT NOTE: This optimization causes some messy interactions
2224 with the cfg cleanup optimizations; those optimizations want to
2225 merge blocks wherever possible, and to collapse indirect jump
2226 sequences (change "A jumps to B jumps to C" directly into "A jumps
2227 to C"). Those optimizations can undo the jump fixes that
2228 partitioning is required to make (see above), in order to ensure
2229 that jumps attempting to cross section boundaries are really able
2230 to cover whatever distance the jump requires (on many architectures
2231 conditional or unconditional jumps are not able to reach all of
2232 memory). Therefore tests have to be inserted into each such
2233 optimization to make sure that it does not undo stuff necessary to
2234 cross partition boundaries. This would be much less of a problem
2235 if we could perform this optimization later in the compilation, but
2236 unfortunately the fact that we may need to create indirect jumps
2237 (through registers) requires that this optimization be performed
2238 before register allocation.
2239
2240 Hot and cold basic blocks are partitioned and put in separate
2241 sections of the .o file, to reduce paging and improve cache
2242 performance (hopefully). This can result in bits of code from the
2243 same function being widely separated in the .o file. However this
2244 is not obvious to the current bb structure. Therefore we must take
2245 care to ensure that: 1). There are no fall_thru edges that cross
2246 between sections; 2). For those architectures which have "short"
2247 conditional branches, all conditional branches that attempt to
2248 cross between sections are converted to unconditional branches;
2249 and, 3). For those architectures which have "short" unconditional
2250 branches, all unconditional branches that attempt to cross between
2251 sections are converted to indirect jumps.
2252
2253 The code for fixing up fall_thru edges that cross between hot and
2254 cold basic blocks does so by creating new basic blocks containing
2255 unconditional branches to the appropriate label in the "other"
2256 section. The new basic block is then put in the same (hot or cold)
2257 section as the original conditional branch, and the fall_thru edge
2258 is modified to fall into the new basic block instead. By adding
2259 this level of indirection we end up with only unconditional branches
2260 crossing between hot and cold sections.
2261
2262 Conditional branches are dealt with by adding a level of indirection.
2263 A new basic block is added in the same (hot/cold) section as the
2264 conditional branch, and the conditional branch is retargeted to the
2265 new basic block. The new basic block contains an unconditional branch
2266 to the original target of the conditional branch (in the other section).
2267
2268 Unconditional branches are dealt with by converting them into
2269 indirect jumps. */
2270
2271 static unsigned
2272 partition_hot_cold_basic_blocks (void)
2273 {
2274 VEC(edge, heap) *crossing_edges;
2275
2276 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2277 return 0;
2278
2279 df_set_flags (DF_DEFER_INSN_RESCAN);
2280
2281 crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2282 if (crossing_edges == NULL)
2283 return 0;
2284
2285 /* Make sure the source of any crossing edge ends in a jump and the
2286 destination of any crossing edge has a label. */
2287 add_labels_and_missing_jumps (crossing_edges);
2288
2289 /* Convert all crossing fall_thru edges to non-crossing fall
2290 thrus to unconditional jumps (that jump to the original fall
2291 through dest). */
2292 fix_up_fall_thru_edges ();
2293
2294 /* If the architecture does not have conditional branches that can
2295 span all of memory, convert crossing conditional branches into
2296 crossing unconditional branches. */
2297 if (!HAS_LONG_COND_BRANCH)
2298 fix_crossing_conditional_branches ();
2299
2300 /* If the architecture does not have unconditional branches that
2301 can span all of memory, convert crossing unconditional branches
2302 into indirect jumps. Since adding an indirect jump also adds
2303 a new register usage, update the register usage information as
2304 well. */
2305 if (!HAS_LONG_UNCOND_BRANCH)
2306 fix_crossing_unconditional_branches ();
2307
2308 add_reg_crossing_jump_notes ();
2309
2310 /* Clear bb->aux fields that the above routines were using. */
2311 clear_aux_for_blocks ();
2312
2313 VEC_free (edge, heap, crossing_edges);
2314
2315 /* ??? FIXME: DF generates the bb info for a block immediately.
2316 And by immediately, I mean *during* creation of the block.
2317
2318 #0 df_bb_refs_collect
2319 #1 in df_bb_refs_record
2320 #2 in create_basic_block_structure
2321
2322 Which means that the bb_has_eh_pred test in df_bb_refs_collect
2323 will *always* fail, because no edges can have been added to the
2324 block yet. Which of course means we don't add the right
2325 artificial refs, which means we fail df_verify (much) later.
2326
2327 Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2328 that we also shouldn't grab data from the new blocks those new
2329 insns are in either. In this way one can create the block, link
2330 it up properly, and have everything Just Work later, when deferred
2331 insns are processed.
2332
2333 In the meantime, we have no other option but to throw away all
2334 of the DF data and recompute it all. */
2335 if (cfun->eh->lp_array)
2336 {
2337 df_finish_pass (true);
2338 df_scan_alloc (NULL);
2339 df_scan_blocks ();
2340 /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2341 data. We blindly generated all of them when creating the new
2342 landing pad. Delete those assignments we don't use. */
2343 df_set_flags (DF_LR_RUN_DCE);
2344 df_analyze ();
2345 }
2346
2347 return TODO_verify_flow | TODO_verify_rtl_sharing;
2348 }
2349 \f
2350 static bool
2351 gate_handle_reorder_blocks (void)
2352 {
2353 if (targetm.cannot_modify_jumps_p ())
2354 return false;
2355 /* Don't reorder blocks when optimizing for size because extra jump insns may
2356 be created; also barrier may create extra padding.
2357
2358 More correctly we should have a block reordering mode that tried to
2359 minimize the combined size of all the jumps. This would more or less
2360 automatically remove extra jumps, but would also try to use more short
2361 jumps instead of long jumps. */
2362 if (!optimize_function_for_speed_p (cfun))
2363 return false;
2364 return (optimize > 0
2365 && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2366 }
2367
2368
2369 /* Reorder basic blocks. */
2370 static unsigned int
2371 rest_of_handle_reorder_blocks (void)
2372 {
2373 basic_block bb;
2374
2375 /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2376 splitting possibly introduced more crossjumping opportunities. */
2377 cfg_layout_initialize (CLEANUP_EXPENSIVE);
2378
2379 reorder_basic_blocks ();
2380 cleanup_cfg (CLEANUP_EXPENSIVE);
2381
2382 FOR_EACH_BB (bb)
2383 if (bb->next_bb != EXIT_BLOCK_PTR)
2384 bb->aux = bb->next_bb;
2385 cfg_layout_finalize ();
2386
2387 /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes. */
2388 insert_section_boundary_note ();
2389 return 0;
2390 }
2391
2392 struct rtl_opt_pass pass_reorder_blocks =
2393 {
2394 {
2395 RTL_PASS,
2396 "bbro", /* name */
2397 gate_handle_reorder_blocks, /* gate */
2398 rest_of_handle_reorder_blocks, /* execute */
2399 NULL, /* sub */
2400 NULL, /* next */
2401 0, /* static_pass_number */
2402 TV_REORDER_BLOCKS, /* tv_id */
2403 0, /* properties_required */
2404 0, /* properties_provided */
2405 0, /* properties_destroyed */
2406 0, /* todo_flags_start */
2407 TODO_verify_rtl_sharing, /* todo_flags_finish */
2408 }
2409 };
2410
2411 static bool
2412 gate_handle_partition_blocks (void)
2413 {
2414 /* The optimization to partition hot/cold basic blocks into separate
2415 sections of the .o file does not work well with linkonce or with
2416 user defined section attributes. Don't call it if either case
2417 arises. */
2418 return (flag_reorder_blocks_and_partition
2419 && optimize
2420 /* See gate_handle_reorder_blocks. We should not partition if
2421 we are going to omit the reordering. */
2422 && optimize_function_for_speed_p (cfun)
2423 && !DECL_ONE_ONLY (current_function_decl)
2424 && !user_defined_section_attribute);
2425 }
2426
2427 struct rtl_opt_pass pass_partition_blocks =
2428 {
2429 {
2430 RTL_PASS,
2431 "bbpart", /* name */
2432 gate_handle_partition_blocks, /* gate */
2433 partition_hot_cold_basic_blocks, /* execute */
2434 NULL, /* sub */
2435 NULL, /* next */
2436 0, /* static_pass_number */
2437 TV_REORDER_BLOCKS, /* tv_id */
2438 PROP_cfglayout, /* properties_required */
2439 0, /* properties_provided */
2440 0, /* properties_destroyed */
2441 0, /* todo_flags_start */
2442 0 /* todo_flags_finish */
2443 }
2444 };