BASE-VER: Set to 4.8.0.
[gcc.git] / gcc / cfgloopmanip.c
1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "obstack.h"
28 #include "basic-block.h"
29 #include "cfgloop.h"
30 #include "cfglayout.h"
31 #include "cfghooks.h"
32 #include "output.h"
33 #include "tree-flow.h"
34
35 static void copy_loops_to (struct loop **, int,
36 struct loop *);
37 static void loop_redirect_edge (edge, basic_block);
38 static void remove_bbs (basic_block *, int);
39 static bool rpe_enum_p (const_basic_block, const void *);
40 static int find_path (edge, basic_block **);
41 static void fix_loop_placements (struct loop *, bool *);
42 static bool fix_bb_placement (basic_block);
43 static void fix_bb_placements (basic_block, bool *);
44 static void unloop (struct loop *, bool *);
45
46 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
47
48 /* Checks whether basic block BB is dominated by DATA. */
49 static bool
50 rpe_enum_p (const_basic_block bb, const void *data)
51 {
52 return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
53 }
54
55 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
56
57 static void
58 remove_bbs (basic_block *bbs, int nbbs)
59 {
60 int i;
61
62 for (i = 0; i < nbbs; i++)
63 delete_basic_block (bbs[i]);
64 }
65
66 /* Find path -- i.e. the basic blocks dominated by edge E and put them
67 into array BBS, that will be allocated large enough to contain them.
68 E->dest must have exactly one predecessor for this to work (it is
69 easy to achieve and we do not put it here because we do not want to
70 alter anything by this function). The number of basic blocks in the
71 path is returned. */
72 static int
73 find_path (edge e, basic_block **bbs)
74 {
75 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
76
77 /* Find bbs in the path. */
78 *bbs = XCNEWVEC (basic_block, n_basic_blocks);
79 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
80 n_basic_blocks, e->dest);
81 }
82
83 /* Fix placement of basic block BB inside loop hierarchy --
84 Let L be a loop to that BB belongs. Then every successor of BB must either
85 1) belong to some superloop of loop L, or
86 2) be a header of loop K such that K->outer is superloop of L
87 Returns true if we had to move BB into other loop to enforce this condition,
88 false if the placement of BB was already correct (provided that placements
89 of its successors are correct). */
90 static bool
91 fix_bb_placement (basic_block bb)
92 {
93 edge e;
94 edge_iterator ei;
95 struct loop *loop = current_loops->tree_root, *act;
96
97 FOR_EACH_EDGE (e, ei, bb->succs)
98 {
99 if (e->dest == EXIT_BLOCK_PTR)
100 continue;
101
102 act = e->dest->loop_father;
103 if (act->header == e->dest)
104 act = loop_outer (act);
105
106 if (flow_loop_nested_p (loop, act))
107 loop = act;
108 }
109
110 if (loop == bb->loop_father)
111 return false;
112
113 remove_bb_from_loops (bb);
114 add_bb_to_loop (bb, loop);
115
116 return true;
117 }
118
119 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
120 of LOOP to that leads at least one exit edge of LOOP, and set it
121 as the immediate superloop of LOOP. Return true if the immediate superloop
122 of LOOP changed. */
123
124 static bool
125 fix_loop_placement (struct loop *loop)
126 {
127 unsigned i;
128 edge e;
129 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
130 struct loop *father = current_loops->tree_root, *act;
131 bool ret = false;
132
133 FOR_EACH_VEC_ELT (edge, exits, i, e)
134 {
135 act = find_common_loop (loop, e->dest->loop_father);
136 if (flow_loop_nested_p (father, act))
137 father = act;
138 }
139
140 if (father != loop_outer (loop))
141 {
142 for (act = loop_outer (loop); act != father; act = loop_outer (act))
143 act->num_nodes -= loop->num_nodes;
144 flow_loop_tree_node_remove (loop);
145 flow_loop_tree_node_add (father, loop);
146
147 /* The exit edges of LOOP no longer exits its original immediate
148 superloops; remove them from the appropriate exit lists. */
149 FOR_EACH_VEC_ELT (edge, exits, i, e)
150 rescan_loop_exit (e, false, false);
151
152 ret = true;
153 }
154
155 VEC_free (edge, heap, exits);
156 return ret;
157 }
158
159 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
160 enforce condition condition stated in description of fix_bb_placement. We
161 start from basic block FROM that had some of its successors removed, so that
162 his placement no longer has to be correct, and iteratively fix placement of
163 its predecessors that may change if placement of FROM changed. Also fix
164 placement of subloops of FROM->loop_father, that might also be altered due
165 to this change; the condition for them is similar, except that instead of
166 successors we consider edges coming out of the loops.
167
168 If the changes may invalidate the information about irreducible regions,
169 IRRED_INVALIDATED is set to true. */
170
171 static void
172 fix_bb_placements (basic_block from,
173 bool *irred_invalidated)
174 {
175 sbitmap in_queue;
176 basic_block *queue, *qtop, *qbeg, *qend;
177 struct loop *base_loop, *target_loop;
178 edge e;
179
180 /* We pass through blocks back-reachable from FROM, testing whether some
181 of their successors moved to outer loop. It may be necessary to
182 iterate several times, but it is finite, as we stop unless we move
183 the basic block up the loop structure. The whole story is a bit
184 more complicated due to presence of subloops, those are moved using
185 fix_loop_placement. */
186
187 base_loop = from->loop_father;
188 /* If we are already in the outermost loop, the basic blocks cannot be moved
189 outside of it. If FROM is the header of the base loop, it cannot be moved
190 outside of it, either. In both cases, we can end now. */
191 if (base_loop == current_loops->tree_root
192 || from == base_loop->header)
193 return;
194
195 in_queue = sbitmap_alloc (last_basic_block);
196 sbitmap_zero (in_queue);
197 SET_BIT (in_queue, from->index);
198 /* Prevent us from going out of the base_loop. */
199 SET_BIT (in_queue, base_loop->header->index);
200
201 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
202 qtop = queue + base_loop->num_nodes + 1;
203 qbeg = queue;
204 qend = queue + 1;
205 *qbeg = from;
206
207 while (qbeg != qend)
208 {
209 edge_iterator ei;
210 from = *qbeg;
211 qbeg++;
212 if (qbeg == qtop)
213 qbeg = queue;
214 RESET_BIT (in_queue, from->index);
215
216 if (from->loop_father->header == from)
217 {
218 /* Subloop header, maybe move the loop upward. */
219 if (!fix_loop_placement (from->loop_father))
220 continue;
221 target_loop = loop_outer (from->loop_father);
222 }
223 else
224 {
225 /* Ordinary basic block. */
226 if (!fix_bb_placement (from))
227 continue;
228 target_loop = from->loop_father;
229 }
230
231 FOR_EACH_EDGE (e, ei, from->succs)
232 {
233 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
234 *irred_invalidated = true;
235 }
236
237 /* Something has changed, insert predecessors into queue. */
238 FOR_EACH_EDGE (e, ei, from->preds)
239 {
240 basic_block pred = e->src;
241 struct loop *nca;
242
243 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
244 *irred_invalidated = true;
245
246 if (TEST_BIT (in_queue, pred->index))
247 continue;
248
249 /* If it is subloop, then it either was not moved, or
250 the path up the loop tree from base_loop do not contain
251 it. */
252 nca = find_common_loop (pred->loop_father, base_loop);
253 if (pred->loop_father != base_loop
254 && (nca == base_loop
255 || nca != pred->loop_father))
256 pred = pred->loop_father->header;
257 else if (!flow_loop_nested_p (target_loop, pred->loop_father))
258 {
259 /* If PRED is already higher in the loop hierarchy than the
260 TARGET_LOOP to that we moved FROM, the change of the position
261 of FROM does not affect the position of PRED, so there is no
262 point in processing it. */
263 continue;
264 }
265
266 if (TEST_BIT (in_queue, pred->index))
267 continue;
268
269 /* Schedule the basic block. */
270 *qend = pred;
271 qend++;
272 if (qend == qtop)
273 qend = queue;
274 SET_BIT (in_queue, pred->index);
275 }
276 }
277 free (in_queue);
278 free (queue);
279 }
280
281 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
282 and update loop structures and dominators. Return true if we were able
283 to remove the path, false otherwise (and nothing is affected then). */
284 bool
285 remove_path (edge e)
286 {
287 edge ae;
288 basic_block *rem_bbs, *bord_bbs, from, bb;
289 VEC (basic_block, heap) *dom_bbs;
290 int i, nrem, n_bord_bbs;
291 sbitmap seen;
292 bool irred_invalidated = false;
293 edge_iterator ei;
294 struct loop *l, *f;
295
296 if (!can_remove_branch_p (e))
297 return false;
298
299 /* Keep track of whether we need to update information about irreducible
300 regions. This is the case if the removed area is a part of the
301 irreducible region, or if the set of basic blocks that belong to a loop
302 that is inside an irreducible region is changed, or if such a loop is
303 removed. */
304 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
305 irred_invalidated = true;
306
307 /* We need to check whether basic blocks are dominated by the edge
308 e, but we only have basic block dominators. This is easy to
309 fix -- when e->dest has exactly one predecessor, this corresponds
310 to blocks dominated by e->dest, if not, split the edge. */
311 if (!single_pred_p (e->dest))
312 e = single_pred_edge (split_edge (e));
313
314 /* It may happen that by removing path we remove one or more loops
315 we belong to. In this case first unloop the loops, then proceed
316 normally. We may assume that e->dest is not a header of any loop,
317 as it now has exactly one predecessor. */
318 for (l = e->src->loop_father; loop_outer (l); l = f)
319 {
320 f = loop_outer (l);
321 if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
322 unloop (l, &irred_invalidated);
323 }
324
325 /* Identify the path. */
326 nrem = find_path (e, &rem_bbs);
327
328 n_bord_bbs = 0;
329 bord_bbs = XCNEWVEC (basic_block, n_basic_blocks);
330 seen = sbitmap_alloc (last_basic_block);
331 sbitmap_zero (seen);
332
333 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
334 for (i = 0; i < nrem; i++)
335 SET_BIT (seen, rem_bbs[i]->index);
336 if (!irred_invalidated)
337 FOR_EACH_EDGE (ae, ei, e->src->succs)
338 if (ae != e && ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index)
339 && ae->flags & EDGE_IRREDUCIBLE_LOOP)
340 irred_invalidated = true;
341 for (i = 0; i < nrem; i++)
342 {
343 bb = rem_bbs[i];
344 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
345 if (ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index))
346 {
347 SET_BIT (seen, ae->dest->index);
348 bord_bbs[n_bord_bbs++] = ae->dest;
349
350 if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
351 irred_invalidated = true;
352 }
353 }
354
355 /* Remove the path. */
356 from = e->src;
357 remove_branch (e);
358 dom_bbs = NULL;
359
360 /* Cancel loops contained in the path. */
361 for (i = 0; i < nrem; i++)
362 if (rem_bbs[i]->loop_father->header == rem_bbs[i])
363 cancel_loop_tree (rem_bbs[i]->loop_father);
364
365 remove_bbs (rem_bbs, nrem);
366 free (rem_bbs);
367
368 /* Find blocks whose dominators may be affected. */
369 sbitmap_zero (seen);
370 for (i = 0; i < n_bord_bbs; i++)
371 {
372 basic_block ldom;
373
374 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
375 if (TEST_BIT (seen, bb->index))
376 continue;
377 SET_BIT (seen, bb->index);
378
379 for (ldom = first_dom_son (CDI_DOMINATORS, bb);
380 ldom;
381 ldom = next_dom_son (CDI_DOMINATORS, ldom))
382 if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
383 VEC_safe_push (basic_block, heap, dom_bbs, ldom);
384 }
385
386 free (seen);
387
388 /* Recount dominators. */
389 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
390 VEC_free (basic_block, heap, dom_bbs);
391 free (bord_bbs);
392
393 /* Fix placements of basic blocks inside loops and the placement of
394 loops in the loop tree. */
395 fix_bb_placements (from, &irred_invalidated);
396 fix_loop_placements (from->loop_father, &irred_invalidated);
397
398 if (irred_invalidated
399 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
400 mark_irreducible_loops ();
401
402 return true;
403 }
404
405 /* Creates place for a new LOOP in loops structure. */
406
407 static void
408 place_new_loop (struct loop *loop)
409 {
410 loop->num = number_of_loops ();
411 VEC_safe_push (loop_p, gc, current_loops->larray, loop);
412 }
413
414 /* Given LOOP structure with filled header and latch, find the body of the
415 corresponding loop and add it to loops tree. Insert the LOOP as a son of
416 outer. */
417
418 void
419 add_loop (struct loop *loop, struct loop *outer)
420 {
421 basic_block *bbs;
422 int i, n;
423 struct loop *subloop;
424 edge e;
425 edge_iterator ei;
426
427 /* Add it to loop structure. */
428 place_new_loop (loop);
429 flow_loop_tree_node_add (outer, loop);
430
431 /* Find its nodes. */
432 bbs = XNEWVEC (basic_block, n_basic_blocks);
433 n = get_loop_body_with_size (loop, bbs, n_basic_blocks);
434
435 for (i = 0; i < n; i++)
436 {
437 if (bbs[i]->loop_father == outer)
438 {
439 remove_bb_from_loops (bbs[i]);
440 add_bb_to_loop (bbs[i], loop);
441 continue;
442 }
443
444 loop->num_nodes++;
445
446 /* If we find a direct subloop of OUTER, move it to LOOP. */
447 subloop = bbs[i]->loop_father;
448 if (loop_outer (subloop) == outer
449 && subloop->header == bbs[i])
450 {
451 flow_loop_tree_node_remove (subloop);
452 flow_loop_tree_node_add (loop, subloop);
453 }
454 }
455
456 /* Update the information about loop exit edges. */
457 for (i = 0; i < n; i++)
458 {
459 FOR_EACH_EDGE (e, ei, bbs[i]->succs)
460 {
461 rescan_loop_exit (e, false, false);
462 }
463 }
464
465 free (bbs);
466 }
467
468 /* Multiply all frequencies in LOOP by NUM/DEN. */
469 void
470 scale_loop_frequencies (struct loop *loop, int num, int den)
471 {
472 basic_block *bbs;
473
474 bbs = get_loop_body (loop);
475 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
476 free (bbs);
477 }
478
479 /* Recompute dominance information for basic blocks outside LOOP. */
480
481 static void
482 update_dominators_in_loop (struct loop *loop)
483 {
484 VEC (basic_block, heap) *dom_bbs = NULL;
485 sbitmap seen;
486 basic_block *body;
487 unsigned i;
488
489 seen = sbitmap_alloc (last_basic_block);
490 sbitmap_zero (seen);
491 body = get_loop_body (loop);
492
493 for (i = 0; i < loop->num_nodes; i++)
494 SET_BIT (seen, body[i]->index);
495
496 for (i = 0; i < loop->num_nodes; i++)
497 {
498 basic_block ldom;
499
500 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
501 ldom;
502 ldom = next_dom_son (CDI_DOMINATORS, ldom))
503 if (!TEST_BIT (seen, ldom->index))
504 {
505 SET_BIT (seen, ldom->index);
506 VEC_safe_push (basic_block, heap, dom_bbs, ldom);
507 }
508 }
509
510 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
511 free (body);
512 free (seen);
513 VEC_free (basic_block, heap, dom_bbs);
514 }
515
516 /* Creates an if region as shown above. CONDITION is used to create
517 the test for the if.
518
519 |
520 | ------------- -------------
521 | | pred_bb | | pred_bb |
522 | ------------- -------------
523 | | |
524 | | | ENTRY_EDGE
525 | | ENTRY_EDGE V
526 | | ====> -------------
527 | | | cond_bb |
528 | | | CONDITION |
529 | | -------------
530 | V / \
531 | ------------- e_false / \ e_true
532 | | succ_bb | V V
533 | ------------- ----------- -----------
534 | | false_bb | | true_bb |
535 | ----------- -----------
536 | \ /
537 | \ /
538 | V V
539 | -------------
540 | | join_bb |
541 | -------------
542 | | exit_edge (result)
543 | V
544 | -----------
545 | | succ_bb |
546 | -----------
547 |
548 */
549
550 edge
551 create_empty_if_region_on_edge (edge entry_edge, tree condition)
552 {
553
554 basic_block cond_bb, true_bb, false_bb, join_bb;
555 edge e_true, e_false, exit_edge;
556 gimple cond_stmt;
557 tree simple_cond;
558 gimple_stmt_iterator gsi;
559
560 cond_bb = split_edge (entry_edge);
561
562 /* Insert condition in cond_bb. */
563 gsi = gsi_last_bb (cond_bb);
564 simple_cond =
565 force_gimple_operand_gsi (&gsi, condition, true, NULL,
566 false, GSI_NEW_STMT);
567 cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
568 gsi = gsi_last_bb (cond_bb);
569 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
570
571 join_bb = split_edge (single_succ_edge (cond_bb));
572
573 e_true = single_succ_edge (cond_bb);
574 true_bb = split_edge (e_true);
575
576 e_false = make_edge (cond_bb, join_bb, 0);
577 false_bb = split_edge (e_false);
578
579 e_true->flags &= ~EDGE_FALLTHRU;
580 e_true->flags |= EDGE_TRUE_VALUE;
581 e_false->flags &= ~EDGE_FALLTHRU;
582 e_false->flags |= EDGE_FALSE_VALUE;
583
584 set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
585 set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
586 set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
587 set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
588
589 exit_edge = single_succ_edge (join_bb);
590
591 if (single_pred_p (exit_edge->dest))
592 set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
593
594 return exit_edge;
595 }
596
597 /* create_empty_loop_on_edge
598 |
599 | - pred_bb - ------ pred_bb ------
600 | | | | iv0 = initial_value |
601 | -----|----- ---------|-----------
602 | | ______ | entry_edge
603 | | entry_edge / | |
604 | | ====> | -V---V- loop_header -------------
605 | V | | iv_before = phi (iv0, iv_after) |
606 | - succ_bb - | ---|-----------------------------
607 | | | | |
608 | ----------- | ---V--- loop_body ---------------
609 | | | iv_after = iv_before + stride |
610 | | | if (iv_before < upper_bound) |
611 | | ---|--------------\--------------
612 | | | \ exit_e
613 | | V \
614 | | - loop_latch - V- succ_bb -
615 | | | | | |
616 | | /------------- -----------
617 | \ ___ /
618
619 Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
620 that is used before the increment of IV. IV_BEFORE should be used for
621 adding code to the body that uses the IV. OUTER is the outer loop in
622 which the new loop should be inserted.
623
624 Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
625 inserted on the loop entry edge. This implies that this function
626 should be used only when the UPPER_BOUND expression is a loop
627 invariant. */
628
629 struct loop *
630 create_empty_loop_on_edge (edge entry_edge,
631 tree initial_value,
632 tree stride, tree upper_bound,
633 tree iv,
634 tree *iv_before,
635 tree *iv_after,
636 struct loop *outer)
637 {
638 basic_block loop_header, loop_latch, succ_bb, pred_bb;
639 struct loop *loop;
640 gimple_stmt_iterator gsi;
641 gimple_seq stmts;
642 gimple cond_expr;
643 tree exit_test;
644 edge exit_e;
645 int prob;
646
647 gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
648
649 /* Create header, latch and wire up the loop. */
650 pred_bb = entry_edge->src;
651 loop_header = split_edge (entry_edge);
652 loop_latch = split_edge (single_succ_edge (loop_header));
653 succ_bb = single_succ (loop_latch);
654 make_edge (loop_header, succ_bb, 0);
655 redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
656
657 /* Set immediate dominator information. */
658 set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
659 set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
660 set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
661
662 /* Initialize a loop structure and put it in a loop hierarchy. */
663 loop = alloc_loop ();
664 loop->header = loop_header;
665 loop->latch = loop_latch;
666 add_loop (loop, outer);
667
668 /* TODO: Fix frequencies and counts. */
669 prob = REG_BR_PROB_BASE / 2;
670
671 scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
672
673 /* Update dominators. */
674 update_dominators_in_loop (loop);
675
676 /* Modify edge flags. */
677 exit_e = single_exit (loop);
678 exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
679 single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
680
681 /* Construct IV code in loop. */
682 initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
683 if (stmts)
684 {
685 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
686 gsi_commit_edge_inserts ();
687 }
688
689 upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
690 if (stmts)
691 {
692 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
693 gsi_commit_edge_inserts ();
694 }
695
696 gsi = gsi_last_bb (loop_header);
697 create_iv (initial_value, stride, iv, loop, &gsi, false,
698 iv_before, iv_after);
699
700 /* Insert loop exit condition. */
701 cond_expr = gimple_build_cond
702 (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
703
704 exit_test = gimple_cond_lhs (cond_expr);
705 exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
706 false, GSI_NEW_STMT);
707 gimple_cond_set_lhs (cond_expr, exit_test);
708 gsi = gsi_last_bb (exit_e->src);
709 gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
710
711 split_block_after_labels (loop_header);
712
713 return loop;
714 }
715
716 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
717 latch to header and update loop tree and dominators
718 accordingly. Everything between them plus LATCH_EDGE destination must
719 be dominated by HEADER_EDGE destination, and back-reachable from
720 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
721 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
722 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
723 Returns the newly created loop. Frequencies and counts in the new loop
724 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
725
726 struct loop *
727 loopify (edge latch_edge, edge header_edge,
728 basic_block switch_bb, edge true_edge, edge false_edge,
729 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
730 {
731 basic_block succ_bb = latch_edge->dest;
732 basic_block pred_bb = header_edge->src;
733 struct loop *loop = alloc_loop ();
734 struct loop *outer = loop_outer (succ_bb->loop_father);
735 int freq;
736 gcov_type cnt;
737 edge e;
738 edge_iterator ei;
739
740 loop->header = header_edge->dest;
741 loop->latch = latch_edge->src;
742
743 freq = EDGE_FREQUENCY (header_edge);
744 cnt = header_edge->count;
745
746 /* Redirect edges. */
747 loop_redirect_edge (latch_edge, loop->header);
748 loop_redirect_edge (true_edge, succ_bb);
749
750 /* During loop versioning, one of the switch_bb edge is already properly
751 set. Do not redirect it again unless redirect_all_edges is true. */
752 if (redirect_all_edges)
753 {
754 loop_redirect_edge (header_edge, switch_bb);
755 loop_redirect_edge (false_edge, loop->header);
756
757 /* Update dominators. */
758 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
759 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
760 }
761
762 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
763
764 /* Compute new loop. */
765 add_loop (loop, outer);
766
767 /* Add switch_bb to appropriate loop. */
768 if (switch_bb->loop_father)
769 remove_bb_from_loops (switch_bb);
770 add_bb_to_loop (switch_bb, outer);
771
772 /* Fix frequencies. */
773 if (redirect_all_edges)
774 {
775 switch_bb->frequency = freq;
776 switch_bb->count = cnt;
777 FOR_EACH_EDGE (e, ei, switch_bb->succs)
778 {
779 e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE;
780 }
781 }
782 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
783 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
784 update_dominators_in_loop (loop);
785
786 return loop;
787 }
788
789 /* Remove the latch edge of a LOOP and update loops to indicate that
790 the LOOP was removed. After this function, original loop latch will
791 have no successor, which caller is expected to fix somehow.
792
793 If this may cause the information about irreducible regions to become
794 invalid, IRRED_INVALIDATED is set to true. */
795
796 static void
797 unloop (struct loop *loop, bool *irred_invalidated)
798 {
799 basic_block *body;
800 struct loop *ploop;
801 unsigned i, n;
802 basic_block latch = loop->latch;
803 bool dummy = false;
804
805 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
806 *irred_invalidated = true;
807
808 /* This is relatively straightforward. The dominators are unchanged, as
809 loop header dominates loop latch, so the only thing we have to care of
810 is the placement of loops and basic blocks inside the loop tree. We
811 move them all to the loop->outer, and then let fix_bb_placements do
812 its work. */
813
814 body = get_loop_body (loop);
815 n = loop->num_nodes;
816 for (i = 0; i < n; i++)
817 if (body[i]->loop_father == loop)
818 {
819 remove_bb_from_loops (body[i]);
820 add_bb_to_loop (body[i], loop_outer (loop));
821 }
822 free(body);
823
824 while (loop->inner)
825 {
826 ploop = loop->inner;
827 flow_loop_tree_node_remove (ploop);
828 flow_loop_tree_node_add (loop_outer (loop), ploop);
829 }
830
831 /* Remove the loop and free its data. */
832 delete_loop (loop);
833
834 remove_edge (single_succ_edge (latch));
835
836 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
837 there is an irreducible region inside the cancelled loop, the flags will
838 be still correct. */
839 fix_bb_placements (latch, &dummy);
840 }
841
842 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
843 condition stated in description of fix_loop_placement holds for them.
844 It is used in case when we removed some edges coming out of LOOP, which
845 may cause the right placement of LOOP inside loop tree to change.
846
847 IRRED_INVALIDATED is set to true if a change in the loop structures might
848 invalidate the information about irreducible regions. */
849
850 static void
851 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
852 {
853 struct loop *outer;
854
855 while (loop_outer (loop))
856 {
857 outer = loop_outer (loop);
858 if (!fix_loop_placement (loop))
859 break;
860
861 /* Changing the placement of a loop in the loop tree may alter the
862 validity of condition 2) of the description of fix_bb_placement
863 for its preheader, because the successor is the header and belongs
864 to the loop. So call fix_bb_placements to fix up the placement
865 of the preheader and (possibly) of its predecessors. */
866 fix_bb_placements (loop_preheader_edge (loop)->src,
867 irred_invalidated);
868 loop = outer;
869 }
870 }
871
872 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
873 created loop into loops structure. */
874 struct loop *
875 duplicate_loop (struct loop *loop, struct loop *target)
876 {
877 struct loop *cloop;
878 cloop = alloc_loop ();
879 place_new_loop (cloop);
880
881 /* Mark the new loop as copy of LOOP. */
882 set_loop_copy (loop, cloop);
883
884 /* Add it to target. */
885 flow_loop_tree_node_add (target, cloop);
886
887 return cloop;
888 }
889
890 /* Copies structure of subloops of LOOP into TARGET loop, placing
891 newly created loops into loop tree. */
892 void
893 duplicate_subloops (struct loop *loop, struct loop *target)
894 {
895 struct loop *aloop, *cloop;
896
897 for (aloop = loop->inner; aloop; aloop = aloop->next)
898 {
899 cloop = duplicate_loop (aloop, target);
900 duplicate_subloops (aloop, cloop);
901 }
902 }
903
904 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
905 into TARGET loop, placing newly created loops into loop tree. */
906 static void
907 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
908 {
909 struct loop *aloop;
910 int i;
911
912 for (i = 0; i < n; i++)
913 {
914 aloop = duplicate_loop (copied_loops[i], target);
915 duplicate_subloops (copied_loops[i], aloop);
916 }
917 }
918
919 /* Redirects edge E to basic block DEST. */
920 static void
921 loop_redirect_edge (edge e, basic_block dest)
922 {
923 if (e->dest == dest)
924 return;
925
926 redirect_edge_and_branch_force (e, dest);
927 }
928
929 /* Check whether LOOP's body can be duplicated. */
930 bool
931 can_duplicate_loop_p (const struct loop *loop)
932 {
933 int ret;
934 basic_block *bbs = get_loop_body (loop);
935
936 ret = can_copy_bbs_p (bbs, loop->num_nodes);
937 free (bbs);
938
939 return ret;
940 }
941
942 /* Sets probability and count of edge E to zero. The probability and count
943 is redistributed evenly to the remaining edges coming from E->src. */
944
945 static void
946 set_zero_probability (edge e)
947 {
948 basic_block bb = e->src;
949 edge_iterator ei;
950 edge ae, last = NULL;
951 unsigned n = EDGE_COUNT (bb->succs);
952 gcov_type cnt = e->count, cnt1;
953 unsigned prob = e->probability, prob1;
954
955 gcc_assert (n > 1);
956 cnt1 = cnt / (n - 1);
957 prob1 = prob / (n - 1);
958
959 FOR_EACH_EDGE (ae, ei, bb->succs)
960 {
961 if (ae == e)
962 continue;
963
964 ae->probability += prob1;
965 ae->count += cnt1;
966 last = ae;
967 }
968
969 /* Move the rest to one of the edges. */
970 last->probability += prob % (n - 1);
971 last->count += cnt % (n - 1);
972
973 e->probability = 0;
974 e->count = 0;
975 }
976
977 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
978 loop structure and dominators. E's destination must be LOOP header for
979 this to work, i.e. it must be entry or latch edge of this loop; these are
980 unique, as the loops must have preheaders for this function to work
981 correctly (in case E is latch, the function unrolls the loop, if E is entry
982 edge, it peels the loop). Store edges created by copying ORIG edge from
983 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
984 original LOOP body, the other copies are numbered in order given by control
985 flow through them) into TO_REMOVE array. Returns false if duplication is
986 impossible. */
987
988 bool
989 duplicate_loop_to_header_edge (struct loop *loop, edge e,
990 unsigned int ndupl, sbitmap wont_exit,
991 edge orig, VEC (edge, heap) **to_remove,
992 int flags)
993 {
994 struct loop *target, *aloop;
995 struct loop **orig_loops;
996 unsigned n_orig_loops;
997 basic_block header = loop->header, latch = loop->latch;
998 basic_block *new_bbs, *bbs, *first_active;
999 basic_block new_bb, bb, first_active_latch = NULL;
1000 edge ae, latch_edge;
1001 edge spec_edges[2], new_spec_edges[2];
1002 #define SE_LATCH 0
1003 #define SE_ORIG 1
1004 unsigned i, j, n;
1005 int is_latch = (latch == e->src);
1006 int scale_act = 0, *scale_step = NULL, scale_main = 0;
1007 int scale_after_exit = 0;
1008 int p, freq_in, freq_le, freq_out_orig;
1009 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
1010 int add_irreducible_flag;
1011 basic_block place_after;
1012 bitmap bbs_to_scale = NULL;
1013 bitmap_iterator bi;
1014
1015 gcc_assert (e->dest == loop->header);
1016 gcc_assert (ndupl > 0);
1017
1018 if (orig)
1019 {
1020 /* Orig must be edge out of the loop. */
1021 gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1022 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1023 }
1024
1025 n = loop->num_nodes;
1026 bbs = get_loop_body_in_dom_order (loop);
1027 gcc_assert (bbs[0] == loop->header);
1028 gcc_assert (bbs[n - 1] == loop->latch);
1029
1030 /* Check whether duplication is possible. */
1031 if (!can_copy_bbs_p (bbs, loop->num_nodes))
1032 {
1033 free (bbs);
1034 return false;
1035 }
1036 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1037
1038 /* In case we are doing loop peeling and the loop is in the middle of
1039 irreducible region, the peeled copies will be inside it too. */
1040 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1041 gcc_assert (!is_latch || !add_irreducible_flag);
1042
1043 /* Find edge from latch. */
1044 latch_edge = loop_latch_edge (loop);
1045
1046 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1047 {
1048 /* Calculate coefficients by that we have to scale frequencies
1049 of duplicated loop bodies. */
1050 freq_in = header->frequency;
1051 freq_le = EDGE_FREQUENCY (latch_edge);
1052 if (freq_in == 0)
1053 freq_in = 1;
1054 if (freq_in < freq_le)
1055 freq_in = freq_le;
1056 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
1057 if (freq_out_orig > freq_in - freq_le)
1058 freq_out_orig = freq_in - freq_le;
1059 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
1060 prob_pass_wont_exit =
1061 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
1062
1063 if (orig
1064 && REG_BR_PROB_BASE - orig->probability != 0)
1065 {
1066 /* The blocks that are dominated by a removed exit edge ORIG have
1067 frequencies scaled by this. */
1068 scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE,
1069 REG_BR_PROB_BASE - orig->probability);
1070 bbs_to_scale = BITMAP_ALLOC (NULL);
1071 for (i = 0; i < n; i++)
1072 {
1073 if (bbs[i] != orig->src
1074 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1075 bitmap_set_bit (bbs_to_scale, i);
1076 }
1077 }
1078
1079 scale_step = XNEWVEC (int, ndupl);
1080
1081 for (i = 1; i <= ndupl; i++)
1082 scale_step[i - 1] = TEST_BIT (wont_exit, i)
1083 ? prob_pass_wont_exit
1084 : prob_pass_thru;
1085
1086 /* Complete peeling is special as the probability of exit in last
1087 copy becomes 1. */
1088 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1089 {
1090 int wanted_freq = EDGE_FREQUENCY (e);
1091
1092 if (wanted_freq > freq_in)
1093 wanted_freq = freq_in;
1094
1095 gcc_assert (!is_latch);
1096 /* First copy has frequency of incoming edge. Each subsequent
1097 frequency should be reduced by prob_pass_wont_exit. Caller
1098 should've managed the flags so all except for original loop
1099 has won't exist set. */
1100 scale_act = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
1101 /* Now simulate the duplication adjustments and compute header
1102 frequency of the last copy. */
1103 for (i = 0; i < ndupl; i++)
1104 wanted_freq = RDIV (wanted_freq * scale_step[i], REG_BR_PROB_BASE);
1105 scale_main = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
1106 }
1107 else if (is_latch)
1108 {
1109 prob_pass_main = TEST_BIT (wont_exit, 0)
1110 ? prob_pass_wont_exit
1111 : prob_pass_thru;
1112 p = prob_pass_main;
1113 scale_main = REG_BR_PROB_BASE;
1114 for (i = 0; i < ndupl; i++)
1115 {
1116 scale_main += p;
1117 p = RDIV (p * scale_step[i], REG_BR_PROB_BASE);
1118 }
1119 scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main);
1120 scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE);
1121 }
1122 else
1123 {
1124 scale_main = REG_BR_PROB_BASE;
1125 for (i = 0; i < ndupl; i++)
1126 scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE);
1127 scale_act = REG_BR_PROB_BASE - prob_pass_thru;
1128 }
1129 for (i = 0; i < ndupl; i++)
1130 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
1131 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
1132 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
1133 }
1134
1135 /* Loop the new bbs will belong to. */
1136 target = e->src->loop_father;
1137
1138 /* Original loops. */
1139 n_orig_loops = 0;
1140 for (aloop = loop->inner; aloop; aloop = aloop->next)
1141 n_orig_loops++;
1142 orig_loops = XCNEWVEC (struct loop *, n_orig_loops);
1143 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1144 orig_loops[i] = aloop;
1145
1146 set_loop_copy (loop, target);
1147
1148 first_active = XNEWVEC (basic_block, n);
1149 if (is_latch)
1150 {
1151 memcpy (first_active, bbs, n * sizeof (basic_block));
1152 first_active_latch = latch;
1153 }
1154
1155 spec_edges[SE_ORIG] = orig;
1156 spec_edges[SE_LATCH] = latch_edge;
1157
1158 place_after = e->src;
1159 for (j = 0; j < ndupl; j++)
1160 {
1161 /* Copy loops. */
1162 copy_loops_to (orig_loops, n_orig_loops, target);
1163
1164 /* Copy bbs. */
1165 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1166 place_after);
1167 place_after = new_spec_edges[SE_LATCH]->src;
1168
1169 if (flags & DLTHE_RECORD_COPY_NUMBER)
1170 for (i = 0; i < n; i++)
1171 {
1172 gcc_assert (!new_bbs[i]->aux);
1173 new_bbs[i]->aux = (void *)(size_t)(j + 1);
1174 }
1175
1176 /* Note whether the blocks and edges belong to an irreducible loop. */
1177 if (add_irreducible_flag)
1178 {
1179 for (i = 0; i < n; i++)
1180 new_bbs[i]->flags |= BB_DUPLICATED;
1181 for (i = 0; i < n; i++)
1182 {
1183 edge_iterator ei;
1184 new_bb = new_bbs[i];
1185 if (new_bb->loop_father == target)
1186 new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1187
1188 FOR_EACH_EDGE (ae, ei, new_bb->succs)
1189 if ((ae->dest->flags & BB_DUPLICATED)
1190 && (ae->src->loop_father == target
1191 || ae->dest->loop_father == target))
1192 ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1193 }
1194 for (i = 0; i < n; i++)
1195 new_bbs[i]->flags &= ~BB_DUPLICATED;
1196 }
1197
1198 /* Redirect the special edges. */
1199 if (is_latch)
1200 {
1201 redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1202 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1203 loop->header);
1204 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1205 latch = loop->latch = new_bbs[n - 1];
1206 e = latch_edge = new_spec_edges[SE_LATCH];
1207 }
1208 else
1209 {
1210 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1211 loop->header);
1212 redirect_edge_and_branch_force (e, new_bbs[0]);
1213 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1214 e = new_spec_edges[SE_LATCH];
1215 }
1216
1217 /* Record exit edge in this copy. */
1218 if (orig && TEST_BIT (wont_exit, j + 1))
1219 {
1220 if (to_remove)
1221 VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]);
1222 set_zero_probability (new_spec_edges[SE_ORIG]);
1223
1224 /* Scale the frequencies of the blocks dominated by the exit. */
1225 if (bbs_to_scale)
1226 {
1227 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1228 {
1229 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
1230 REG_BR_PROB_BASE);
1231 }
1232 }
1233 }
1234
1235 /* Record the first copy in the control flow order if it is not
1236 the original loop (i.e. in case of peeling). */
1237 if (!first_active_latch)
1238 {
1239 memcpy (first_active, new_bbs, n * sizeof (basic_block));
1240 first_active_latch = new_bbs[n - 1];
1241 }
1242
1243 /* Set counts and frequencies. */
1244 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1245 {
1246 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1247 scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE);
1248 }
1249 }
1250 free (new_bbs);
1251 free (orig_loops);
1252
1253 /* Record the exit edge in the original loop body, and update the frequencies. */
1254 if (orig && TEST_BIT (wont_exit, 0))
1255 {
1256 if (to_remove)
1257 VEC_safe_push (edge, heap, *to_remove, orig);
1258 set_zero_probability (orig);
1259
1260 /* Scale the frequencies of the blocks dominated by the exit. */
1261 if (bbs_to_scale)
1262 {
1263 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1264 {
1265 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
1266 REG_BR_PROB_BASE);
1267 }
1268 }
1269 }
1270
1271 /* Update the original loop. */
1272 if (!is_latch)
1273 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1274 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1275 {
1276 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1277 free (scale_step);
1278 }
1279
1280 /* Update dominators of outer blocks if affected. */
1281 for (i = 0; i < n; i++)
1282 {
1283 basic_block dominated, dom_bb;
1284 VEC (basic_block, heap) *dom_bbs;
1285 unsigned j;
1286
1287 bb = bbs[i];
1288 bb->aux = 0;
1289
1290 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1291 FOR_EACH_VEC_ELT (basic_block, dom_bbs, j, dominated)
1292 {
1293 if (flow_bb_inside_loop_p (loop, dominated))
1294 continue;
1295 dom_bb = nearest_common_dominator (
1296 CDI_DOMINATORS, first_active[i], first_active_latch);
1297 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1298 }
1299 VEC_free (basic_block, heap, dom_bbs);
1300 }
1301 free (first_active);
1302
1303 free (bbs);
1304 BITMAP_FREE (bbs_to_scale);
1305
1306 return true;
1307 }
1308
1309 /* A callback for make_forwarder block, to redirect all edges except for
1310 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1311 whether to redirect it. */
1312
1313 edge mfb_kj_edge;
1314 bool
1315 mfb_keep_just (edge e)
1316 {
1317 return e != mfb_kj_edge;
1318 }
1319
1320 /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1321
1322 static bool
1323 has_preds_from_loop (basic_block block, struct loop *loop)
1324 {
1325 edge e;
1326 edge_iterator ei;
1327
1328 FOR_EACH_EDGE (e, ei, block->preds)
1329 if (e->src->loop_father == loop)
1330 return true;
1331 return false;
1332 }
1333
1334 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1335 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1336 entry; otherwise we also force preheader block to have only one successor.
1337 When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1338 to be a fallthru predecessor to the loop header and to have only
1339 predecessors from outside of the loop.
1340 The function also updates dominators. */
1341
1342 basic_block
1343 create_preheader (struct loop *loop, int flags)
1344 {
1345 edge e, fallthru;
1346 basic_block dummy;
1347 int nentry = 0;
1348 bool irred = false;
1349 bool latch_edge_was_fallthru;
1350 edge one_succ_pred = NULL, single_entry = NULL;
1351 edge_iterator ei;
1352
1353 FOR_EACH_EDGE (e, ei, loop->header->preds)
1354 {
1355 if (e->src == loop->latch)
1356 continue;
1357 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1358 nentry++;
1359 single_entry = e;
1360 if (single_succ_p (e->src))
1361 one_succ_pred = e;
1362 }
1363 gcc_assert (nentry);
1364 if (nentry == 1)
1365 {
1366 bool need_forwarder_block = false;
1367
1368 /* We do not allow entry block to be the loop preheader, since we
1369 cannot emit code there. */
1370 if (single_entry->src == ENTRY_BLOCK_PTR)
1371 need_forwarder_block = true;
1372 else
1373 {
1374 /* If we want simple preheaders, also force the preheader to have
1375 just a single successor. */
1376 if ((flags & CP_SIMPLE_PREHEADERS)
1377 && !single_succ_p (single_entry->src))
1378 need_forwarder_block = true;
1379 /* If we want fallthru preheaders, also create forwarder block when
1380 preheader ends with a jump or has predecessors from loop. */
1381 else if ((flags & CP_FALLTHRU_PREHEADERS)
1382 && (JUMP_P (BB_END (single_entry->src))
1383 || has_preds_from_loop (single_entry->src, loop)))
1384 need_forwarder_block = true;
1385 }
1386 if (! need_forwarder_block)
1387 return NULL;
1388 }
1389
1390 mfb_kj_edge = loop_latch_edge (loop);
1391 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1392 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1393 dummy = fallthru->src;
1394 loop->header = fallthru->dest;
1395
1396 /* Try to be clever in placing the newly created preheader. The idea is to
1397 avoid breaking any "fallthruness" relationship between blocks.
1398
1399 The preheader was created just before the header and all incoming edges
1400 to the header were redirected to the preheader, except the latch edge.
1401 So the only problematic case is when this latch edge was a fallthru
1402 edge: it is not anymore after the preheader creation so we have broken
1403 the fallthruness. We're therefore going to look for a better place. */
1404 if (latch_edge_was_fallthru)
1405 {
1406 if (one_succ_pred)
1407 e = one_succ_pred;
1408 else
1409 e = EDGE_PRED (dummy, 0);
1410
1411 move_block_after (dummy, e->src);
1412 }
1413
1414 if (irred)
1415 {
1416 dummy->flags |= BB_IRREDUCIBLE_LOOP;
1417 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1418 }
1419
1420 if (dump_file)
1421 fprintf (dump_file, "Created preheader block for loop %i\n",
1422 loop->num);
1423
1424 if (flags & CP_FALLTHRU_PREHEADERS)
1425 gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1426 && !JUMP_P (BB_END (dummy)));
1427
1428 return dummy;
1429 }
1430
1431 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1432
1433 void
1434 create_preheaders (int flags)
1435 {
1436 loop_iterator li;
1437 struct loop *loop;
1438
1439 if (!current_loops)
1440 return;
1441
1442 FOR_EACH_LOOP (li, loop, 0)
1443 create_preheader (loop, flags);
1444 loops_state_set (LOOPS_HAVE_PREHEADERS);
1445 }
1446
1447 /* Forces all loop latches to have only single successor. */
1448
1449 void
1450 force_single_succ_latches (void)
1451 {
1452 loop_iterator li;
1453 struct loop *loop;
1454 edge e;
1455
1456 FOR_EACH_LOOP (li, loop, 0)
1457 {
1458 if (loop->latch != loop->header && single_succ_p (loop->latch))
1459 continue;
1460
1461 e = find_edge (loop->latch, loop->header);
1462
1463 split_edge (e);
1464 }
1465 loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1466 }
1467
1468 /* This function is called from loop_version. It splits the entry edge
1469 of the loop we want to version, adds the versioning condition, and
1470 adjust the edges to the two versions of the loop appropriately.
1471 e is an incoming edge. Returns the basic block containing the
1472 condition.
1473
1474 --- edge e ---- > [second_head]
1475
1476 Split it and insert new conditional expression and adjust edges.
1477
1478 --- edge e ---> [cond expr] ---> [first_head]
1479 |
1480 +---------> [second_head]
1481
1482 THEN_PROB is the probability of then branch of the condition. */
1483
1484 static basic_block
1485 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1486 edge e, void *cond_expr, unsigned then_prob)
1487 {
1488 basic_block new_head = NULL;
1489 edge e1;
1490
1491 gcc_assert (e->dest == second_head);
1492
1493 /* Split edge 'e'. This will create a new basic block, where we can
1494 insert conditional expr. */
1495 new_head = split_edge (e);
1496
1497 lv_add_condition_to_bb (first_head, second_head, new_head,
1498 cond_expr);
1499
1500 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1501 e = single_succ_edge (new_head);
1502 e1 = make_edge (new_head, first_head,
1503 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1504 e1->probability = then_prob;
1505 e->probability = REG_BR_PROB_BASE - then_prob;
1506 e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE);
1507 e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE);
1508
1509 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1510 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1511
1512 /* Adjust loop header phi nodes. */
1513 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1514
1515 return new_head;
1516 }
1517
1518 /* Main entry point for Loop Versioning transformation.
1519
1520 This transformation given a condition and a loop, creates
1521 -if (condition) { loop_copy1 } else { loop_copy2 },
1522 where loop_copy1 is the loop transformed in one way, and loop_copy2
1523 is the loop transformed in another way (or unchanged). 'condition'
1524 may be a run time test for things that were not resolved by static
1525 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1526
1527 THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1528 is the ratio by that the frequencies in the original loop should
1529 be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1530 new loop should be scaled.
1531
1532 If PLACE_AFTER is true, we place the new loop after LOOP in the
1533 instruction stream, otherwise it is placed before LOOP. */
1534
1535 struct loop *
1536 loop_version (struct loop *loop,
1537 void *cond_expr, basic_block *condition_bb,
1538 unsigned then_prob, unsigned then_scale, unsigned else_scale,
1539 bool place_after)
1540 {
1541 basic_block first_head, second_head;
1542 edge entry, latch_edge, true_edge, false_edge;
1543 int irred_flag;
1544 struct loop *nloop;
1545 basic_block cond_bb;
1546
1547 /* Record entry and latch edges for the loop */
1548 entry = loop_preheader_edge (loop);
1549 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1550 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1551
1552 /* Note down head of loop as first_head. */
1553 first_head = entry->dest;
1554
1555 /* Duplicate loop. */
1556 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1557 NULL, NULL, NULL, 0))
1558 {
1559 entry->flags |= irred_flag;
1560 return NULL;
1561 }
1562
1563 /* After duplication entry edge now points to new loop head block.
1564 Note down new head as second_head. */
1565 second_head = entry->dest;
1566
1567 /* Split loop entry edge and insert new block with cond expr. */
1568 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1569 entry, cond_expr, then_prob);
1570 if (condition_bb)
1571 *condition_bb = cond_bb;
1572
1573 if (!cond_bb)
1574 {
1575 entry->flags |= irred_flag;
1576 return NULL;
1577 }
1578
1579 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1580
1581 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1582 nloop = loopify (latch_edge,
1583 single_pred_edge (get_bb_copy (loop->header)),
1584 cond_bb, true_edge, false_edge,
1585 false /* Do not redirect all edges. */,
1586 then_scale, else_scale);
1587
1588 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1589 lv_flush_pending_stmts (latch_edge);
1590
1591 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1592 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1593 lv_flush_pending_stmts (false_edge);
1594 /* Adjust irreducible flag. */
1595 if (irred_flag)
1596 {
1597 cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1598 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1599 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1600 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1601 }
1602
1603 if (place_after)
1604 {
1605 basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1606 unsigned i;
1607
1608 after = loop->latch;
1609
1610 for (i = 0; i < nloop->num_nodes; i++)
1611 {
1612 move_block_after (bbs[i], after);
1613 after = bbs[i];
1614 }
1615 free (bbs);
1616 }
1617
1618 /* At this point condition_bb is loop preheader with two successors,
1619 first_head and second_head. Make sure that loop preheader has only
1620 one successor. */
1621 split_edge (loop_preheader_edge (loop));
1622 split_edge (loop_preheader_edge (nloop));
1623
1624 return nloop;
1625 }
1626
1627 /* The structure of loops might have changed. Some loops might get removed
1628 (and their headers and latches were set to NULL), loop exists might get
1629 removed (thus the loop nesting may be wrong), and some blocks and edges
1630 were changed (so the information about bb --> loop mapping does not have
1631 to be correct). But still for the remaining loops the header dominates
1632 the latch, and loops did not get new subloops (new loops might possibly
1633 get created, but we are not interested in them). Fix up the mess.
1634
1635 If CHANGED_BBS is not NULL, basic blocks whose loop has changed are
1636 marked in it. */
1637
1638 void
1639 fix_loop_structure (bitmap changed_bbs)
1640 {
1641 basic_block bb;
1642 struct loop *loop, *ploop;
1643 loop_iterator li;
1644 bool record_exits = false;
1645 struct loop **superloop = XNEWVEC (struct loop *, number_of_loops ());
1646
1647 /* Remove the old bb -> loop mapping. Remember the depth of the blocks in
1648 the loop hierarchy, so that we can recognize blocks whose loop nesting
1649 relationship has changed. */
1650 FOR_EACH_BB (bb)
1651 {
1652 if (changed_bbs)
1653 bb->aux = (void *) (size_t) loop_depth (bb->loop_father);
1654 bb->loop_father = current_loops->tree_root;
1655 }
1656
1657 if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
1658 {
1659 release_recorded_exits ();
1660 record_exits = true;
1661 }
1662
1663 /* Remove the dead loops from structures. We start from the innermost
1664 loops, so that when we remove the loops, we know that the loops inside
1665 are preserved, and do not waste time relinking loops that will be
1666 removed later. */
1667 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1668 {
1669 if (loop->header)
1670 continue;
1671
1672 while (loop->inner)
1673 {
1674 ploop = loop->inner;
1675 flow_loop_tree_node_remove (ploop);
1676 flow_loop_tree_node_add (loop_outer (loop), ploop);
1677 }
1678
1679 /* Remove the loop and free its data. */
1680 delete_loop (loop);
1681 }
1682
1683 /* Rescan the bodies of loops, starting from the outermost ones. We assume
1684 that no optimization interchanges the order of the loops, i.e., it cannot
1685 happen that L1 was superloop of L2 before and it is subloop of L2 now
1686 (without explicitly updating loop information). At the same time, we also
1687 determine the new loop structure. */
1688 current_loops->tree_root->num_nodes = n_basic_blocks;
1689 FOR_EACH_LOOP (li, loop, 0)
1690 {
1691 superloop[loop->num] = loop->header->loop_father;
1692 loop->num_nodes = flow_loop_nodes_find (loop->header, loop);
1693 }
1694
1695 /* Now fix the loop nesting. */
1696 FOR_EACH_LOOP (li, loop, 0)
1697 {
1698 ploop = superloop[loop->num];
1699 if (ploop != loop_outer (loop))
1700 {
1701 flow_loop_tree_node_remove (loop);
1702 flow_loop_tree_node_add (ploop, loop);
1703 }
1704 }
1705 free (superloop);
1706
1707 /* Mark the blocks whose loop has changed. */
1708 if (changed_bbs)
1709 {
1710 FOR_EACH_BB (bb)
1711 {
1712 if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux)
1713 bitmap_set_bit (changed_bbs, bb->index);
1714
1715 bb->aux = NULL;
1716 }
1717 }
1718
1719 if (loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
1720 create_preheaders (CP_SIMPLE_PREHEADERS);
1721
1722 if (loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
1723 force_single_succ_latches ();
1724
1725 if (loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1726 mark_irreducible_loops ();
1727
1728 if (record_exits)
1729 record_loop_exits ();
1730
1731 #ifdef ENABLE_CHECKING
1732 verify_loop_structure ();
1733 #endif
1734 }