Update copyright years.
[gcc.git] / gcc / cfgloopmanip.c
1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "predict.h"
26 #include "vec.h"
27 #include "hashtab.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "hard-reg-set.h"
31 #include "input.h"
32 #include "function.h"
33 #include "dominance.h"
34 #include "cfg.h"
35 #include "cfganal.h"
36 #include "basic-block.h"
37 #include "cfgloop.h"
38 #include "tree.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "gimple-expr.h"
42 #include "is-a.h"
43 #include "gimple.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "tree-ssa-loop-manip.h"
47 #include "dumpfile.h"
48
49 static void copy_loops_to (struct loop **, int,
50 struct loop *);
51 static void loop_redirect_edge (edge, basic_block);
52 static void remove_bbs (basic_block *, int);
53 static bool rpe_enum_p (const_basic_block, const void *);
54 static int find_path (edge, basic_block **);
55 static void fix_loop_placements (struct loop *, bool *);
56 static bool fix_bb_placement (basic_block);
57 static void fix_bb_placements (basic_block, bool *, bitmap);
58
59 /* Checks whether basic block BB is dominated by DATA. */
60 static bool
61 rpe_enum_p (const_basic_block bb, const void *data)
62 {
63 return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
64 }
65
66 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
67
68 static void
69 remove_bbs (basic_block *bbs, int nbbs)
70 {
71 int i;
72
73 for (i = 0; i < nbbs; i++)
74 delete_basic_block (bbs[i]);
75 }
76
77 /* Find path -- i.e. the basic blocks dominated by edge E and put them
78 into array BBS, that will be allocated large enough to contain them.
79 E->dest must have exactly one predecessor for this to work (it is
80 easy to achieve and we do not put it here because we do not want to
81 alter anything by this function). The number of basic blocks in the
82 path is returned. */
83 static int
84 find_path (edge e, basic_block **bbs)
85 {
86 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
87
88 /* Find bbs in the path. */
89 *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
90 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
91 n_basic_blocks_for_fn (cfun), e->dest);
92 }
93
94 /* Fix placement of basic block BB inside loop hierarchy --
95 Let L be a loop to that BB belongs. Then every successor of BB must either
96 1) belong to some superloop of loop L, or
97 2) be a header of loop K such that K->outer is superloop of L
98 Returns true if we had to move BB into other loop to enforce this condition,
99 false if the placement of BB was already correct (provided that placements
100 of its successors are correct). */
101 static bool
102 fix_bb_placement (basic_block bb)
103 {
104 edge e;
105 edge_iterator ei;
106 struct loop *loop = current_loops->tree_root, *act;
107
108 FOR_EACH_EDGE (e, ei, bb->succs)
109 {
110 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
111 continue;
112
113 act = e->dest->loop_father;
114 if (act->header == e->dest)
115 act = loop_outer (act);
116
117 if (flow_loop_nested_p (loop, act))
118 loop = act;
119 }
120
121 if (loop == bb->loop_father)
122 return false;
123
124 remove_bb_from_loops (bb);
125 add_bb_to_loop (bb, loop);
126
127 return true;
128 }
129
130 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
131 of LOOP to that leads at least one exit edge of LOOP, and set it
132 as the immediate superloop of LOOP. Return true if the immediate superloop
133 of LOOP changed.
134
135 IRRED_INVALIDATED is set to true if a change in the loop structures might
136 invalidate the information about irreducible regions. */
137
138 static bool
139 fix_loop_placement (struct loop *loop, bool *irred_invalidated)
140 {
141 unsigned i;
142 edge e;
143 vec<edge> exits = get_loop_exit_edges (loop);
144 struct loop *father = current_loops->tree_root, *act;
145 bool ret = false;
146
147 FOR_EACH_VEC_ELT (exits, i, e)
148 {
149 act = find_common_loop (loop, e->dest->loop_father);
150 if (flow_loop_nested_p (father, act))
151 father = act;
152 }
153
154 if (father != loop_outer (loop))
155 {
156 for (act = loop_outer (loop); act != father; act = loop_outer (act))
157 act->num_nodes -= loop->num_nodes;
158 flow_loop_tree_node_remove (loop);
159 flow_loop_tree_node_add (father, loop);
160
161 /* The exit edges of LOOP no longer exits its original immediate
162 superloops; remove them from the appropriate exit lists. */
163 FOR_EACH_VEC_ELT (exits, i, e)
164 {
165 /* We may need to recompute irreducible loops. */
166 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
167 *irred_invalidated = true;
168 rescan_loop_exit (e, false, false);
169 }
170
171 ret = true;
172 }
173
174 exits.release ();
175 return ret;
176 }
177
178 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
179 enforce condition condition stated in description of fix_bb_placement. We
180 start from basic block FROM that had some of its successors removed, so that
181 his placement no longer has to be correct, and iteratively fix placement of
182 its predecessors that may change if placement of FROM changed. Also fix
183 placement of subloops of FROM->loop_father, that might also be altered due
184 to this change; the condition for them is similar, except that instead of
185 successors we consider edges coming out of the loops.
186
187 If the changes may invalidate the information about irreducible regions,
188 IRRED_INVALIDATED is set to true.
189
190 If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
191 changed loop_father are collected there. */
192
193 static void
194 fix_bb_placements (basic_block from,
195 bool *irred_invalidated,
196 bitmap loop_closed_ssa_invalidated)
197 {
198 sbitmap in_queue;
199 basic_block *queue, *qtop, *qbeg, *qend;
200 struct loop *base_loop, *target_loop;
201 edge e;
202
203 /* We pass through blocks back-reachable from FROM, testing whether some
204 of their successors moved to outer loop. It may be necessary to
205 iterate several times, but it is finite, as we stop unless we move
206 the basic block up the loop structure. The whole story is a bit
207 more complicated due to presence of subloops, those are moved using
208 fix_loop_placement. */
209
210 base_loop = from->loop_father;
211 /* If we are already in the outermost loop, the basic blocks cannot be moved
212 outside of it. If FROM is the header of the base loop, it cannot be moved
213 outside of it, either. In both cases, we can end now. */
214 if (base_loop == current_loops->tree_root
215 || from == base_loop->header)
216 return;
217
218 in_queue = sbitmap_alloc (last_basic_block_for_fn (cfun));
219 bitmap_clear (in_queue);
220 bitmap_set_bit (in_queue, from->index);
221 /* Prevent us from going out of the base_loop. */
222 bitmap_set_bit (in_queue, base_loop->header->index);
223
224 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
225 qtop = queue + base_loop->num_nodes + 1;
226 qbeg = queue;
227 qend = queue + 1;
228 *qbeg = from;
229
230 while (qbeg != qend)
231 {
232 edge_iterator ei;
233 from = *qbeg;
234 qbeg++;
235 if (qbeg == qtop)
236 qbeg = queue;
237 bitmap_clear_bit (in_queue, from->index);
238
239 if (from->loop_father->header == from)
240 {
241 /* Subloop header, maybe move the loop upward. */
242 if (!fix_loop_placement (from->loop_father, irred_invalidated))
243 continue;
244 target_loop = loop_outer (from->loop_father);
245 if (loop_closed_ssa_invalidated)
246 {
247 basic_block *bbs = get_loop_body (from->loop_father);
248 for (unsigned i = 0; i < from->loop_father->num_nodes; ++i)
249 bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
250 free (bbs);
251 }
252 }
253 else
254 {
255 /* Ordinary basic block. */
256 if (!fix_bb_placement (from))
257 continue;
258 target_loop = from->loop_father;
259 if (loop_closed_ssa_invalidated)
260 bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
261 }
262
263 FOR_EACH_EDGE (e, ei, from->succs)
264 {
265 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
266 *irred_invalidated = true;
267 }
268
269 /* Something has changed, insert predecessors into queue. */
270 FOR_EACH_EDGE (e, ei, from->preds)
271 {
272 basic_block pred = e->src;
273 struct loop *nca;
274
275 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
276 *irred_invalidated = true;
277
278 if (bitmap_bit_p (in_queue, pred->index))
279 continue;
280
281 /* If it is subloop, then it either was not moved, or
282 the path up the loop tree from base_loop do not contain
283 it. */
284 nca = find_common_loop (pred->loop_father, base_loop);
285 if (pred->loop_father != base_loop
286 && (nca == base_loop
287 || nca != pred->loop_father))
288 pred = pred->loop_father->header;
289 else if (!flow_loop_nested_p (target_loop, pred->loop_father))
290 {
291 /* If PRED is already higher in the loop hierarchy than the
292 TARGET_LOOP to that we moved FROM, the change of the position
293 of FROM does not affect the position of PRED, so there is no
294 point in processing it. */
295 continue;
296 }
297
298 if (bitmap_bit_p (in_queue, pred->index))
299 continue;
300
301 /* Schedule the basic block. */
302 *qend = pred;
303 qend++;
304 if (qend == qtop)
305 qend = queue;
306 bitmap_set_bit (in_queue, pred->index);
307 }
308 }
309 free (in_queue);
310 free (queue);
311 }
312
313 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
314 and update loop structures and dominators. Return true if we were able
315 to remove the path, false otherwise (and nothing is affected then). */
316 bool
317 remove_path (edge e)
318 {
319 edge ae;
320 basic_block *rem_bbs, *bord_bbs, from, bb;
321 vec<basic_block> dom_bbs;
322 int i, nrem, n_bord_bbs;
323 sbitmap seen;
324 bool irred_invalidated = false;
325 edge_iterator ei;
326 struct loop *l, *f;
327
328 if (!can_remove_branch_p (e))
329 return false;
330
331 /* Keep track of whether we need to update information about irreducible
332 regions. This is the case if the removed area is a part of the
333 irreducible region, or if the set of basic blocks that belong to a loop
334 that is inside an irreducible region is changed, or if such a loop is
335 removed. */
336 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
337 irred_invalidated = true;
338
339 /* We need to check whether basic blocks are dominated by the edge
340 e, but we only have basic block dominators. This is easy to
341 fix -- when e->dest has exactly one predecessor, this corresponds
342 to blocks dominated by e->dest, if not, split the edge. */
343 if (!single_pred_p (e->dest))
344 e = single_pred_edge (split_edge (e));
345
346 /* It may happen that by removing path we remove one or more loops
347 we belong to. In this case first unloop the loops, then proceed
348 normally. We may assume that e->dest is not a header of any loop,
349 as it now has exactly one predecessor. */
350 for (l = e->src->loop_father; loop_outer (l); l = f)
351 {
352 f = loop_outer (l);
353 if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
354 unloop (l, &irred_invalidated, NULL);
355 }
356
357 /* Identify the path. */
358 nrem = find_path (e, &rem_bbs);
359
360 n_bord_bbs = 0;
361 bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
362 seen = sbitmap_alloc (last_basic_block_for_fn (cfun));
363 bitmap_clear (seen);
364
365 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
366 for (i = 0; i < nrem; i++)
367 bitmap_set_bit (seen, rem_bbs[i]->index);
368 if (!irred_invalidated)
369 FOR_EACH_EDGE (ae, ei, e->src->succs)
370 if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
371 && !bitmap_bit_p (seen, ae->dest->index)
372 && ae->flags & EDGE_IRREDUCIBLE_LOOP)
373 {
374 irred_invalidated = true;
375 break;
376 }
377
378 for (i = 0; i < nrem; i++)
379 {
380 bb = rem_bbs[i];
381 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
382 if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
383 && !bitmap_bit_p (seen, ae->dest->index))
384 {
385 bitmap_set_bit (seen, ae->dest->index);
386 bord_bbs[n_bord_bbs++] = ae->dest;
387
388 if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
389 irred_invalidated = true;
390 }
391 }
392
393 /* Remove the path. */
394 from = e->src;
395 remove_branch (e);
396 dom_bbs.create (0);
397
398 /* Cancel loops contained in the path. */
399 for (i = 0; i < nrem; i++)
400 if (rem_bbs[i]->loop_father->header == rem_bbs[i])
401 cancel_loop_tree (rem_bbs[i]->loop_father);
402
403 remove_bbs (rem_bbs, nrem);
404 free (rem_bbs);
405
406 /* Find blocks whose dominators may be affected. */
407 bitmap_clear (seen);
408 for (i = 0; i < n_bord_bbs; i++)
409 {
410 basic_block ldom;
411
412 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
413 if (bitmap_bit_p (seen, bb->index))
414 continue;
415 bitmap_set_bit (seen, bb->index);
416
417 for (ldom = first_dom_son (CDI_DOMINATORS, bb);
418 ldom;
419 ldom = next_dom_son (CDI_DOMINATORS, ldom))
420 if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
421 dom_bbs.safe_push (ldom);
422 }
423
424 free (seen);
425
426 /* Recount dominators. */
427 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
428 dom_bbs.release ();
429 free (bord_bbs);
430
431 /* Fix placements of basic blocks inside loops and the placement of
432 loops in the loop tree. */
433 fix_bb_placements (from, &irred_invalidated, NULL);
434 fix_loop_placements (from->loop_father, &irred_invalidated);
435
436 if (irred_invalidated
437 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
438 mark_irreducible_loops ();
439
440 return true;
441 }
442
443 /* Creates place for a new LOOP in loops structure of FN. */
444
445 void
446 place_new_loop (struct function *fn, struct loop *loop)
447 {
448 loop->num = number_of_loops (fn);
449 vec_safe_push (loops_for_fn (fn)->larray, loop);
450 }
451
452 /* Given LOOP structure with filled header and latch, find the body of the
453 corresponding loop and add it to loops tree. Insert the LOOP as a son of
454 outer. */
455
456 void
457 add_loop (struct loop *loop, struct loop *outer)
458 {
459 basic_block *bbs;
460 int i, n;
461 struct loop *subloop;
462 edge e;
463 edge_iterator ei;
464
465 /* Add it to loop structure. */
466 place_new_loop (cfun, loop);
467 flow_loop_tree_node_add (outer, loop);
468
469 /* Find its nodes. */
470 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
471 n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
472
473 for (i = 0; i < n; i++)
474 {
475 if (bbs[i]->loop_father == outer)
476 {
477 remove_bb_from_loops (bbs[i]);
478 add_bb_to_loop (bbs[i], loop);
479 continue;
480 }
481
482 loop->num_nodes++;
483
484 /* If we find a direct subloop of OUTER, move it to LOOP. */
485 subloop = bbs[i]->loop_father;
486 if (loop_outer (subloop) == outer
487 && subloop->header == bbs[i])
488 {
489 flow_loop_tree_node_remove (subloop);
490 flow_loop_tree_node_add (loop, subloop);
491 }
492 }
493
494 /* Update the information about loop exit edges. */
495 for (i = 0; i < n; i++)
496 {
497 FOR_EACH_EDGE (e, ei, bbs[i]->succs)
498 {
499 rescan_loop_exit (e, false, false);
500 }
501 }
502
503 free (bbs);
504 }
505
506 /* Multiply all frequencies in LOOP by NUM/DEN. */
507
508 void
509 scale_loop_frequencies (struct loop *loop, int num, int den)
510 {
511 basic_block *bbs;
512
513 bbs = get_loop_body (loop);
514 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
515 free (bbs);
516 }
517
518 /* Multiply all frequencies in LOOP by SCALE/REG_BR_PROB_BASE.
519 If ITERATION_BOUND is non-zero, scale even further if loop is predicted
520 to iterate too many times. */
521
522 void
523 scale_loop_profile (struct loop *loop, int scale, gcov_type iteration_bound)
524 {
525 gcov_type iterations = expected_loop_iterations_unbounded (loop);
526 edge e;
527 edge_iterator ei;
528
529 if (dump_file && (dump_flags & TDF_DETAILS))
530 fprintf (dump_file, ";; Scaling loop %i with scale %f, "
531 "bounding iterations to %i from guessed %i\n",
532 loop->num, (double)scale / REG_BR_PROB_BASE,
533 (int)iteration_bound, (int)iterations);
534
535 /* See if loop is predicted to iterate too many times. */
536 if (iteration_bound && iterations > 0
537 && apply_probability (iterations, scale) > iteration_bound)
538 {
539 /* Fixing loop profile for different trip count is not trivial; the exit
540 probabilities has to be updated to match and frequencies propagated down
541 to the loop body.
542
543 We fully update only the simple case of loop with single exit that is
544 either from the latch or BB just before latch and leads from BB with
545 simple conditional jump. This is OK for use in vectorizer. */
546 e = single_exit (loop);
547 if (e)
548 {
549 edge other_e;
550 int freq_delta;
551 gcov_type count_delta;
552
553 FOR_EACH_EDGE (other_e, ei, e->src->succs)
554 if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
555 && e != other_e)
556 break;
557
558 /* Probability of exit must be 1/iterations. */
559 freq_delta = EDGE_FREQUENCY (e);
560 e->probability = REG_BR_PROB_BASE / iteration_bound;
561 other_e->probability = inverse_probability (e->probability);
562 freq_delta -= EDGE_FREQUENCY (e);
563
564 /* Adjust counts accordingly. */
565 count_delta = e->count;
566 e->count = apply_probability (e->src->count, e->probability);
567 other_e->count = apply_probability (e->src->count, other_e->probability);
568 count_delta -= e->count;
569
570 /* If latch exists, change its frequency and count, since we changed
571 probability of exit. Theoretically we should update everything from
572 source of exit edge to latch, but for vectorizer this is enough. */
573 if (loop->latch
574 && loop->latch != e->src)
575 {
576 loop->latch->frequency += freq_delta;
577 if (loop->latch->frequency < 0)
578 loop->latch->frequency = 0;
579 loop->latch->count += count_delta;
580 if (loop->latch->count < 0)
581 loop->latch->count = 0;
582 }
583 }
584
585 /* Roughly speaking we want to reduce the loop body profile by the
586 the difference of loop iterations. We however can do better if
587 we look at the actual profile, if it is available. */
588 scale = RDIV (iteration_bound * scale, iterations);
589 if (loop->header->count)
590 {
591 gcov_type count_in = 0;
592
593 FOR_EACH_EDGE (e, ei, loop->header->preds)
594 if (e->src != loop->latch)
595 count_in += e->count;
596
597 if (count_in != 0)
598 scale = GCOV_COMPUTE_SCALE (count_in * iteration_bound,
599 loop->header->count);
600 }
601 else if (loop->header->frequency)
602 {
603 int freq_in = 0;
604
605 FOR_EACH_EDGE (e, ei, loop->header->preds)
606 if (e->src != loop->latch)
607 freq_in += EDGE_FREQUENCY (e);
608
609 if (freq_in != 0)
610 scale = GCOV_COMPUTE_SCALE (freq_in * iteration_bound,
611 loop->header->frequency);
612 }
613 if (!scale)
614 scale = 1;
615 }
616
617 if (scale == REG_BR_PROB_BASE)
618 return;
619
620 /* Scale the actual probabilities. */
621 scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
622 if (dump_file && (dump_flags & TDF_DETAILS))
623 fprintf (dump_file, ";; guessed iterations are now %i\n",
624 (int)expected_loop_iterations_unbounded (loop));
625 }
626
627 /* Recompute dominance information for basic blocks outside LOOP. */
628
629 static void
630 update_dominators_in_loop (struct loop *loop)
631 {
632 vec<basic_block> dom_bbs = vNULL;
633 sbitmap seen;
634 basic_block *body;
635 unsigned i;
636
637 seen = sbitmap_alloc (last_basic_block_for_fn (cfun));
638 bitmap_clear (seen);
639 body = get_loop_body (loop);
640
641 for (i = 0; i < loop->num_nodes; i++)
642 bitmap_set_bit (seen, body[i]->index);
643
644 for (i = 0; i < loop->num_nodes; i++)
645 {
646 basic_block ldom;
647
648 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
649 ldom;
650 ldom = next_dom_son (CDI_DOMINATORS, ldom))
651 if (!bitmap_bit_p (seen, ldom->index))
652 {
653 bitmap_set_bit (seen, ldom->index);
654 dom_bbs.safe_push (ldom);
655 }
656 }
657
658 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
659 free (body);
660 free (seen);
661 dom_bbs.release ();
662 }
663
664 /* Creates an if region as shown above. CONDITION is used to create
665 the test for the if.
666
667 |
668 | ------------- -------------
669 | | pred_bb | | pred_bb |
670 | ------------- -------------
671 | | |
672 | | | ENTRY_EDGE
673 | | ENTRY_EDGE V
674 | | ====> -------------
675 | | | cond_bb |
676 | | | CONDITION |
677 | | -------------
678 | V / \
679 | ------------- e_false / \ e_true
680 | | succ_bb | V V
681 | ------------- ----------- -----------
682 | | false_bb | | true_bb |
683 | ----------- -----------
684 | \ /
685 | \ /
686 | V V
687 | -------------
688 | | join_bb |
689 | -------------
690 | | exit_edge (result)
691 | V
692 | -----------
693 | | succ_bb |
694 | -----------
695 |
696 */
697
698 edge
699 create_empty_if_region_on_edge (edge entry_edge, tree condition)
700 {
701
702 basic_block cond_bb, true_bb, false_bb, join_bb;
703 edge e_true, e_false, exit_edge;
704 gcond *cond_stmt;
705 tree simple_cond;
706 gimple_stmt_iterator gsi;
707
708 cond_bb = split_edge (entry_edge);
709
710 /* Insert condition in cond_bb. */
711 gsi = gsi_last_bb (cond_bb);
712 simple_cond =
713 force_gimple_operand_gsi (&gsi, condition, true, NULL,
714 false, GSI_NEW_STMT);
715 cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
716 gsi = gsi_last_bb (cond_bb);
717 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
718
719 join_bb = split_edge (single_succ_edge (cond_bb));
720
721 e_true = single_succ_edge (cond_bb);
722 true_bb = split_edge (e_true);
723
724 e_false = make_edge (cond_bb, join_bb, 0);
725 false_bb = split_edge (e_false);
726
727 e_true->flags &= ~EDGE_FALLTHRU;
728 e_true->flags |= EDGE_TRUE_VALUE;
729 e_false->flags &= ~EDGE_FALLTHRU;
730 e_false->flags |= EDGE_FALSE_VALUE;
731
732 set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
733 set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
734 set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
735 set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
736
737 exit_edge = single_succ_edge (join_bb);
738
739 if (single_pred_p (exit_edge->dest))
740 set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
741
742 return exit_edge;
743 }
744
745 /* create_empty_loop_on_edge
746 |
747 | - pred_bb - ------ pred_bb ------
748 | | | | iv0 = initial_value |
749 | -----|----- ---------|-----------
750 | | ______ | entry_edge
751 | | entry_edge / | |
752 | | ====> | -V---V- loop_header -------------
753 | V | | iv_before = phi (iv0, iv_after) |
754 | - succ_bb - | ---|-----------------------------
755 | | | | |
756 | ----------- | ---V--- loop_body ---------------
757 | | | iv_after = iv_before + stride |
758 | | | if (iv_before < upper_bound) |
759 | | ---|--------------\--------------
760 | | | \ exit_e
761 | | V \
762 | | - loop_latch - V- succ_bb -
763 | | | | | |
764 | | /------------- -----------
765 | \ ___ /
766
767 Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
768 that is used before the increment of IV. IV_BEFORE should be used for
769 adding code to the body that uses the IV. OUTER is the outer loop in
770 which the new loop should be inserted.
771
772 Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
773 inserted on the loop entry edge. This implies that this function
774 should be used only when the UPPER_BOUND expression is a loop
775 invariant. */
776
777 struct loop *
778 create_empty_loop_on_edge (edge entry_edge,
779 tree initial_value,
780 tree stride, tree upper_bound,
781 tree iv,
782 tree *iv_before,
783 tree *iv_after,
784 struct loop *outer)
785 {
786 basic_block loop_header, loop_latch, succ_bb, pred_bb;
787 struct loop *loop;
788 gimple_stmt_iterator gsi;
789 gimple_seq stmts;
790 gcond *cond_expr;
791 tree exit_test;
792 edge exit_e;
793 int prob;
794
795 gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
796
797 /* Create header, latch and wire up the loop. */
798 pred_bb = entry_edge->src;
799 loop_header = split_edge (entry_edge);
800 loop_latch = split_edge (single_succ_edge (loop_header));
801 succ_bb = single_succ (loop_latch);
802 make_edge (loop_header, succ_bb, 0);
803 redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
804
805 /* Set immediate dominator information. */
806 set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
807 set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
808 set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
809
810 /* Initialize a loop structure and put it in a loop hierarchy. */
811 loop = alloc_loop ();
812 loop->header = loop_header;
813 loop->latch = loop_latch;
814 add_loop (loop, outer);
815
816 /* TODO: Fix frequencies and counts. */
817 prob = REG_BR_PROB_BASE / 2;
818
819 scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
820
821 /* Update dominators. */
822 update_dominators_in_loop (loop);
823
824 /* Modify edge flags. */
825 exit_e = single_exit (loop);
826 exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
827 single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
828
829 /* Construct IV code in loop. */
830 initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
831 if (stmts)
832 {
833 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
834 gsi_commit_edge_inserts ();
835 }
836
837 upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
838 if (stmts)
839 {
840 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
841 gsi_commit_edge_inserts ();
842 }
843
844 gsi = gsi_last_bb (loop_header);
845 create_iv (initial_value, stride, iv, loop, &gsi, false,
846 iv_before, iv_after);
847
848 /* Insert loop exit condition. */
849 cond_expr = gimple_build_cond
850 (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
851
852 exit_test = gimple_cond_lhs (cond_expr);
853 exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
854 false, GSI_NEW_STMT);
855 gimple_cond_set_lhs (cond_expr, exit_test);
856 gsi = gsi_last_bb (exit_e->src);
857 gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
858
859 split_block_after_labels (loop_header);
860
861 return loop;
862 }
863
864 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
865 latch to header and update loop tree and dominators
866 accordingly. Everything between them plus LATCH_EDGE destination must
867 be dominated by HEADER_EDGE destination, and back-reachable from
868 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
869 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
870 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
871 Returns the newly created loop. Frequencies and counts in the new loop
872 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
873
874 struct loop *
875 loopify (edge latch_edge, edge header_edge,
876 basic_block switch_bb, edge true_edge, edge false_edge,
877 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
878 {
879 basic_block succ_bb = latch_edge->dest;
880 basic_block pred_bb = header_edge->src;
881 struct loop *loop = alloc_loop ();
882 struct loop *outer = loop_outer (succ_bb->loop_father);
883 int freq;
884 gcov_type cnt;
885 edge e;
886 edge_iterator ei;
887
888 loop->header = header_edge->dest;
889 loop->latch = latch_edge->src;
890
891 freq = EDGE_FREQUENCY (header_edge);
892 cnt = header_edge->count;
893
894 /* Redirect edges. */
895 loop_redirect_edge (latch_edge, loop->header);
896 loop_redirect_edge (true_edge, succ_bb);
897
898 /* During loop versioning, one of the switch_bb edge is already properly
899 set. Do not redirect it again unless redirect_all_edges is true. */
900 if (redirect_all_edges)
901 {
902 loop_redirect_edge (header_edge, switch_bb);
903 loop_redirect_edge (false_edge, loop->header);
904
905 /* Update dominators. */
906 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
907 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
908 }
909
910 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
911
912 /* Compute new loop. */
913 add_loop (loop, outer);
914
915 /* Add switch_bb to appropriate loop. */
916 if (switch_bb->loop_father)
917 remove_bb_from_loops (switch_bb);
918 add_bb_to_loop (switch_bb, outer);
919
920 /* Fix frequencies. */
921 if (redirect_all_edges)
922 {
923 switch_bb->frequency = freq;
924 switch_bb->count = cnt;
925 FOR_EACH_EDGE (e, ei, switch_bb->succs)
926 {
927 e->count = apply_probability (switch_bb->count, e->probability);
928 }
929 }
930 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
931 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
932 update_dominators_in_loop (loop);
933
934 return loop;
935 }
936
937 /* Remove the latch edge of a LOOP and update loops to indicate that
938 the LOOP was removed. After this function, original loop latch will
939 have no successor, which caller is expected to fix somehow.
940
941 If this may cause the information about irreducible regions to become
942 invalid, IRRED_INVALIDATED is set to true.
943
944 LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
945 basic blocks that had non-trivial update on their loop_father.*/
946
947 void
948 unloop (struct loop *loop, bool *irred_invalidated,
949 bitmap loop_closed_ssa_invalidated)
950 {
951 basic_block *body;
952 struct loop *ploop;
953 unsigned i, n;
954 basic_block latch = loop->latch;
955 bool dummy = false;
956
957 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
958 *irred_invalidated = true;
959
960 /* This is relatively straightforward. The dominators are unchanged, as
961 loop header dominates loop latch, so the only thing we have to care of
962 is the placement of loops and basic blocks inside the loop tree. We
963 move them all to the loop->outer, and then let fix_bb_placements do
964 its work. */
965
966 body = get_loop_body (loop);
967 n = loop->num_nodes;
968 for (i = 0; i < n; i++)
969 if (body[i]->loop_father == loop)
970 {
971 remove_bb_from_loops (body[i]);
972 add_bb_to_loop (body[i], loop_outer (loop));
973 }
974 free (body);
975
976 while (loop->inner)
977 {
978 ploop = loop->inner;
979 flow_loop_tree_node_remove (ploop);
980 flow_loop_tree_node_add (loop_outer (loop), ploop);
981 }
982
983 /* Remove the loop and free its data. */
984 delete_loop (loop);
985
986 remove_edge (single_succ_edge (latch));
987
988 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
989 there is an irreducible region inside the cancelled loop, the flags will
990 be still correct. */
991 fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
992 }
993
994 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
995 condition stated in description of fix_loop_placement holds for them.
996 It is used in case when we removed some edges coming out of LOOP, which
997 may cause the right placement of LOOP inside loop tree to change.
998
999 IRRED_INVALIDATED is set to true if a change in the loop structures might
1000 invalidate the information about irreducible regions. */
1001
1002 static void
1003 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
1004 {
1005 struct loop *outer;
1006
1007 while (loop_outer (loop))
1008 {
1009 outer = loop_outer (loop);
1010 if (!fix_loop_placement (loop, irred_invalidated))
1011 break;
1012
1013 /* Changing the placement of a loop in the loop tree may alter the
1014 validity of condition 2) of the description of fix_bb_placement
1015 for its preheader, because the successor is the header and belongs
1016 to the loop. So call fix_bb_placements to fix up the placement
1017 of the preheader and (possibly) of its predecessors. */
1018 fix_bb_placements (loop_preheader_edge (loop)->src,
1019 irred_invalidated, NULL);
1020 loop = outer;
1021 }
1022 }
1023
1024 /* Duplicate loop bounds and other information we store about
1025 the loop into its duplicate. */
1026
1027 void
1028 copy_loop_info (struct loop *loop, struct loop *target)
1029 {
1030 gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1031 target->any_upper_bound = loop->any_upper_bound;
1032 target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1033 target->any_estimate = loop->any_estimate;
1034 target->nb_iterations_estimate = loop->nb_iterations_estimate;
1035 target->estimate_state = loop->estimate_state;
1036 target->warned_aggressive_loop_optimizations
1037 |= loop->warned_aggressive_loop_optimizations;
1038 }
1039
1040 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1041 created loop into loops structure. */
1042 struct loop *
1043 duplicate_loop (struct loop *loop, struct loop *target)
1044 {
1045 struct loop *cloop;
1046 cloop = alloc_loop ();
1047 place_new_loop (cfun, cloop);
1048
1049 copy_loop_info (loop, cloop);
1050
1051 /* Mark the new loop as copy of LOOP. */
1052 set_loop_copy (loop, cloop);
1053
1054 /* Add it to target. */
1055 flow_loop_tree_node_add (target, cloop);
1056
1057 return cloop;
1058 }
1059
1060 /* Copies structure of subloops of LOOP into TARGET loop, placing
1061 newly created loops into loop tree. */
1062 void
1063 duplicate_subloops (struct loop *loop, struct loop *target)
1064 {
1065 struct loop *aloop, *cloop;
1066
1067 for (aloop = loop->inner; aloop; aloop = aloop->next)
1068 {
1069 cloop = duplicate_loop (aloop, target);
1070 duplicate_subloops (aloop, cloop);
1071 }
1072 }
1073
1074 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1075 into TARGET loop, placing newly created loops into loop tree. */
1076 static void
1077 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
1078 {
1079 struct loop *aloop;
1080 int i;
1081
1082 for (i = 0; i < n; i++)
1083 {
1084 aloop = duplicate_loop (copied_loops[i], target);
1085 duplicate_subloops (copied_loops[i], aloop);
1086 }
1087 }
1088
1089 /* Redirects edge E to basic block DEST. */
1090 static void
1091 loop_redirect_edge (edge e, basic_block dest)
1092 {
1093 if (e->dest == dest)
1094 return;
1095
1096 redirect_edge_and_branch_force (e, dest);
1097 }
1098
1099 /* Check whether LOOP's body can be duplicated. */
1100 bool
1101 can_duplicate_loop_p (const struct loop *loop)
1102 {
1103 int ret;
1104 basic_block *bbs = get_loop_body (loop);
1105
1106 ret = can_copy_bbs_p (bbs, loop->num_nodes);
1107 free (bbs);
1108
1109 return ret;
1110 }
1111
1112 /* Sets probability and count of edge E to zero. The probability and count
1113 is redistributed evenly to the remaining edges coming from E->src. */
1114
1115 static void
1116 set_zero_probability (edge e)
1117 {
1118 basic_block bb = e->src;
1119 edge_iterator ei;
1120 edge ae, last = NULL;
1121 unsigned n = EDGE_COUNT (bb->succs);
1122 gcov_type cnt = e->count, cnt1;
1123 unsigned prob = e->probability, prob1;
1124
1125 gcc_assert (n > 1);
1126 cnt1 = cnt / (n - 1);
1127 prob1 = prob / (n - 1);
1128
1129 FOR_EACH_EDGE (ae, ei, bb->succs)
1130 {
1131 if (ae == e)
1132 continue;
1133
1134 ae->probability += prob1;
1135 ae->count += cnt1;
1136 last = ae;
1137 }
1138
1139 /* Move the rest to one of the edges. */
1140 last->probability += prob % (n - 1);
1141 last->count += cnt % (n - 1);
1142
1143 e->probability = 0;
1144 e->count = 0;
1145 }
1146
1147 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1148 loop structure and dominators. E's destination must be LOOP header for
1149 this to work, i.e. it must be entry or latch edge of this loop; these are
1150 unique, as the loops must have preheaders for this function to work
1151 correctly (in case E is latch, the function unrolls the loop, if E is entry
1152 edge, it peels the loop). Store edges created by copying ORIG edge from
1153 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
1154 original LOOP body, the other copies are numbered in order given by control
1155 flow through them) into TO_REMOVE array. Returns false if duplication is
1156 impossible. */
1157
1158 bool
1159 duplicate_loop_to_header_edge (struct loop *loop, edge e,
1160 unsigned int ndupl, sbitmap wont_exit,
1161 edge orig, vec<edge> *to_remove,
1162 int flags)
1163 {
1164 struct loop *target, *aloop;
1165 struct loop **orig_loops;
1166 unsigned n_orig_loops;
1167 basic_block header = loop->header, latch = loop->latch;
1168 basic_block *new_bbs, *bbs, *first_active;
1169 basic_block new_bb, bb, first_active_latch = NULL;
1170 edge ae, latch_edge;
1171 edge spec_edges[2], new_spec_edges[2];
1172 #define SE_LATCH 0
1173 #define SE_ORIG 1
1174 unsigned i, j, n;
1175 int is_latch = (latch == e->src);
1176 int scale_act = 0, *scale_step = NULL, scale_main = 0;
1177 int scale_after_exit = 0;
1178 int p, freq_in, freq_le, freq_out_orig;
1179 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
1180 int add_irreducible_flag;
1181 basic_block place_after;
1182 bitmap bbs_to_scale = NULL;
1183 bitmap_iterator bi;
1184
1185 gcc_assert (e->dest == loop->header);
1186 gcc_assert (ndupl > 0);
1187
1188 if (orig)
1189 {
1190 /* Orig must be edge out of the loop. */
1191 gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1192 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1193 }
1194
1195 n = loop->num_nodes;
1196 bbs = get_loop_body_in_dom_order (loop);
1197 gcc_assert (bbs[0] == loop->header);
1198 gcc_assert (bbs[n - 1] == loop->latch);
1199
1200 /* Check whether duplication is possible. */
1201 if (!can_copy_bbs_p (bbs, loop->num_nodes))
1202 {
1203 free (bbs);
1204 return false;
1205 }
1206 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1207
1208 /* In case we are doing loop peeling and the loop is in the middle of
1209 irreducible region, the peeled copies will be inside it too. */
1210 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1211 gcc_assert (!is_latch || !add_irreducible_flag);
1212
1213 /* Find edge from latch. */
1214 latch_edge = loop_latch_edge (loop);
1215
1216 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1217 {
1218 /* Calculate coefficients by that we have to scale frequencies
1219 of duplicated loop bodies. */
1220 freq_in = header->frequency;
1221 freq_le = EDGE_FREQUENCY (latch_edge);
1222 if (freq_in == 0)
1223 freq_in = 1;
1224 if (freq_in < freq_le)
1225 freq_in = freq_le;
1226 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
1227 if (freq_out_orig > freq_in - freq_le)
1228 freq_out_orig = freq_in - freq_le;
1229 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
1230 prob_pass_wont_exit =
1231 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
1232
1233 if (orig
1234 && REG_BR_PROB_BASE - orig->probability != 0)
1235 {
1236 /* The blocks that are dominated by a removed exit edge ORIG have
1237 frequencies scaled by this. */
1238 scale_after_exit
1239 = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE,
1240 REG_BR_PROB_BASE - orig->probability);
1241 bbs_to_scale = BITMAP_ALLOC (NULL);
1242 for (i = 0; i < n; i++)
1243 {
1244 if (bbs[i] != orig->src
1245 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1246 bitmap_set_bit (bbs_to_scale, i);
1247 }
1248 }
1249
1250 scale_step = XNEWVEC (int, ndupl);
1251
1252 for (i = 1; i <= ndupl; i++)
1253 scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1254 ? prob_pass_wont_exit
1255 : prob_pass_thru;
1256
1257 /* Complete peeling is special as the probability of exit in last
1258 copy becomes 1. */
1259 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1260 {
1261 int wanted_freq = EDGE_FREQUENCY (e);
1262
1263 if (wanted_freq > freq_in)
1264 wanted_freq = freq_in;
1265
1266 gcc_assert (!is_latch);
1267 /* First copy has frequency of incoming edge. Each subsequent
1268 frequency should be reduced by prob_pass_wont_exit. Caller
1269 should've managed the flags so all except for original loop
1270 has won't exist set. */
1271 scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
1272 /* Now simulate the duplication adjustments and compute header
1273 frequency of the last copy. */
1274 for (i = 0; i < ndupl; i++)
1275 wanted_freq = combine_probabilities (wanted_freq, scale_step[i]);
1276 scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
1277 }
1278 else if (is_latch)
1279 {
1280 prob_pass_main = bitmap_bit_p (wont_exit, 0)
1281 ? prob_pass_wont_exit
1282 : prob_pass_thru;
1283 p = prob_pass_main;
1284 scale_main = REG_BR_PROB_BASE;
1285 for (i = 0; i < ndupl; i++)
1286 {
1287 scale_main += p;
1288 p = combine_probabilities (p, scale_step[i]);
1289 }
1290 scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main);
1291 scale_act = combine_probabilities (scale_main, prob_pass_main);
1292 }
1293 else
1294 {
1295 scale_main = REG_BR_PROB_BASE;
1296 for (i = 0; i < ndupl; i++)
1297 scale_main = combine_probabilities (scale_main, scale_step[i]);
1298 scale_act = REG_BR_PROB_BASE - prob_pass_thru;
1299 }
1300 for (i = 0; i < ndupl; i++)
1301 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
1302 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
1303 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
1304 }
1305
1306 /* Loop the new bbs will belong to. */
1307 target = e->src->loop_father;
1308
1309 /* Original loops. */
1310 n_orig_loops = 0;
1311 for (aloop = loop->inner; aloop; aloop = aloop->next)
1312 n_orig_loops++;
1313 orig_loops = XNEWVEC (struct loop *, n_orig_loops);
1314 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1315 orig_loops[i] = aloop;
1316
1317 set_loop_copy (loop, target);
1318
1319 first_active = XNEWVEC (basic_block, n);
1320 if (is_latch)
1321 {
1322 memcpy (first_active, bbs, n * sizeof (basic_block));
1323 first_active_latch = latch;
1324 }
1325
1326 spec_edges[SE_ORIG] = orig;
1327 spec_edges[SE_LATCH] = latch_edge;
1328
1329 place_after = e->src;
1330 for (j = 0; j < ndupl; j++)
1331 {
1332 /* Copy loops. */
1333 copy_loops_to (orig_loops, n_orig_loops, target);
1334
1335 /* Copy bbs. */
1336 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1337 place_after, true);
1338 place_after = new_spec_edges[SE_LATCH]->src;
1339
1340 if (flags & DLTHE_RECORD_COPY_NUMBER)
1341 for (i = 0; i < n; i++)
1342 {
1343 gcc_assert (!new_bbs[i]->aux);
1344 new_bbs[i]->aux = (void *)(size_t)(j + 1);
1345 }
1346
1347 /* Note whether the blocks and edges belong to an irreducible loop. */
1348 if (add_irreducible_flag)
1349 {
1350 for (i = 0; i < n; i++)
1351 new_bbs[i]->flags |= BB_DUPLICATED;
1352 for (i = 0; i < n; i++)
1353 {
1354 edge_iterator ei;
1355 new_bb = new_bbs[i];
1356 if (new_bb->loop_father == target)
1357 new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1358
1359 FOR_EACH_EDGE (ae, ei, new_bb->succs)
1360 if ((ae->dest->flags & BB_DUPLICATED)
1361 && (ae->src->loop_father == target
1362 || ae->dest->loop_father == target))
1363 ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1364 }
1365 for (i = 0; i < n; i++)
1366 new_bbs[i]->flags &= ~BB_DUPLICATED;
1367 }
1368
1369 /* Redirect the special edges. */
1370 if (is_latch)
1371 {
1372 redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1373 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1374 loop->header);
1375 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1376 latch = loop->latch = new_bbs[n - 1];
1377 e = latch_edge = new_spec_edges[SE_LATCH];
1378 }
1379 else
1380 {
1381 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1382 loop->header);
1383 redirect_edge_and_branch_force (e, new_bbs[0]);
1384 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1385 e = new_spec_edges[SE_LATCH];
1386 }
1387
1388 /* Record exit edge in this copy. */
1389 if (orig && bitmap_bit_p (wont_exit, j + 1))
1390 {
1391 if (to_remove)
1392 to_remove->safe_push (new_spec_edges[SE_ORIG]);
1393 set_zero_probability (new_spec_edges[SE_ORIG]);
1394
1395 /* Scale the frequencies of the blocks dominated by the exit. */
1396 if (bbs_to_scale)
1397 {
1398 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1399 {
1400 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
1401 REG_BR_PROB_BASE);
1402 }
1403 }
1404 }
1405
1406 /* Record the first copy in the control flow order if it is not
1407 the original loop (i.e. in case of peeling). */
1408 if (!first_active_latch)
1409 {
1410 memcpy (first_active, new_bbs, n * sizeof (basic_block));
1411 first_active_latch = new_bbs[n - 1];
1412 }
1413
1414 /* Set counts and frequencies. */
1415 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1416 {
1417 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1418 scale_act = combine_probabilities (scale_act, scale_step[j]);
1419 }
1420 }
1421 free (new_bbs);
1422 free (orig_loops);
1423
1424 /* Record the exit edge in the original loop body, and update the frequencies. */
1425 if (orig && bitmap_bit_p (wont_exit, 0))
1426 {
1427 if (to_remove)
1428 to_remove->safe_push (orig);
1429 set_zero_probability (orig);
1430
1431 /* Scale the frequencies of the blocks dominated by the exit. */
1432 if (bbs_to_scale)
1433 {
1434 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1435 {
1436 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
1437 REG_BR_PROB_BASE);
1438 }
1439 }
1440 }
1441
1442 /* Update the original loop. */
1443 if (!is_latch)
1444 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1445 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1446 {
1447 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1448 free (scale_step);
1449 }
1450
1451 /* Update dominators of outer blocks if affected. */
1452 for (i = 0; i < n; i++)
1453 {
1454 basic_block dominated, dom_bb;
1455 vec<basic_block> dom_bbs;
1456 unsigned j;
1457
1458 bb = bbs[i];
1459 bb->aux = 0;
1460
1461 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1462 FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1463 {
1464 if (flow_bb_inside_loop_p (loop, dominated))
1465 continue;
1466 dom_bb = nearest_common_dominator (
1467 CDI_DOMINATORS, first_active[i], first_active_latch);
1468 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1469 }
1470 dom_bbs.release ();
1471 }
1472 free (first_active);
1473
1474 free (bbs);
1475 BITMAP_FREE (bbs_to_scale);
1476
1477 return true;
1478 }
1479
1480 /* A callback for make_forwarder block, to redirect all edges except for
1481 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1482 whether to redirect it. */
1483
1484 edge mfb_kj_edge;
1485 bool
1486 mfb_keep_just (edge e)
1487 {
1488 return e != mfb_kj_edge;
1489 }
1490
1491 /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1492
1493 static bool
1494 has_preds_from_loop (basic_block block, struct loop *loop)
1495 {
1496 edge e;
1497 edge_iterator ei;
1498
1499 FOR_EACH_EDGE (e, ei, block->preds)
1500 if (e->src->loop_father == loop)
1501 return true;
1502 return false;
1503 }
1504
1505 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1506 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1507 entry; otherwise we also force preheader block to have only one successor.
1508 When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1509 to be a fallthru predecessor to the loop header and to have only
1510 predecessors from outside of the loop.
1511 The function also updates dominators. */
1512
1513 basic_block
1514 create_preheader (struct loop *loop, int flags)
1515 {
1516 edge e, fallthru;
1517 basic_block dummy;
1518 int nentry = 0;
1519 bool irred = false;
1520 bool latch_edge_was_fallthru;
1521 edge one_succ_pred = NULL, single_entry = NULL;
1522 edge_iterator ei;
1523
1524 FOR_EACH_EDGE (e, ei, loop->header->preds)
1525 {
1526 if (e->src == loop->latch)
1527 continue;
1528 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1529 nentry++;
1530 single_entry = e;
1531 if (single_succ_p (e->src))
1532 one_succ_pred = e;
1533 }
1534 gcc_assert (nentry);
1535 if (nentry == 1)
1536 {
1537 bool need_forwarder_block = false;
1538
1539 /* We do not allow entry block to be the loop preheader, since we
1540 cannot emit code there. */
1541 if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1542 need_forwarder_block = true;
1543 else
1544 {
1545 /* If we want simple preheaders, also force the preheader to have
1546 just a single successor. */
1547 if ((flags & CP_SIMPLE_PREHEADERS)
1548 && !single_succ_p (single_entry->src))
1549 need_forwarder_block = true;
1550 /* If we want fallthru preheaders, also create forwarder block when
1551 preheader ends with a jump or has predecessors from loop. */
1552 else if ((flags & CP_FALLTHRU_PREHEADERS)
1553 && (JUMP_P (BB_END (single_entry->src))
1554 || has_preds_from_loop (single_entry->src, loop)))
1555 need_forwarder_block = true;
1556 }
1557 if (! need_forwarder_block)
1558 return NULL;
1559 }
1560
1561 mfb_kj_edge = loop_latch_edge (loop);
1562 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1563 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1564 dummy = fallthru->src;
1565 loop->header = fallthru->dest;
1566
1567 /* Try to be clever in placing the newly created preheader. The idea is to
1568 avoid breaking any "fallthruness" relationship between blocks.
1569
1570 The preheader was created just before the header and all incoming edges
1571 to the header were redirected to the preheader, except the latch edge.
1572 So the only problematic case is when this latch edge was a fallthru
1573 edge: it is not anymore after the preheader creation so we have broken
1574 the fallthruness. We're therefore going to look for a better place. */
1575 if (latch_edge_was_fallthru)
1576 {
1577 if (one_succ_pred)
1578 e = one_succ_pred;
1579 else
1580 e = EDGE_PRED (dummy, 0);
1581
1582 move_block_after (dummy, e->src);
1583 }
1584
1585 if (irred)
1586 {
1587 dummy->flags |= BB_IRREDUCIBLE_LOOP;
1588 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1589 }
1590
1591 if (dump_file)
1592 fprintf (dump_file, "Created preheader block for loop %i\n",
1593 loop->num);
1594
1595 if (flags & CP_FALLTHRU_PREHEADERS)
1596 gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1597 && !JUMP_P (BB_END (dummy)));
1598
1599 return dummy;
1600 }
1601
1602 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1603
1604 void
1605 create_preheaders (int flags)
1606 {
1607 struct loop *loop;
1608
1609 if (!current_loops)
1610 return;
1611
1612 FOR_EACH_LOOP (loop, 0)
1613 create_preheader (loop, flags);
1614 loops_state_set (LOOPS_HAVE_PREHEADERS);
1615 }
1616
1617 /* Forces all loop latches to have only single successor. */
1618
1619 void
1620 force_single_succ_latches (void)
1621 {
1622 struct loop *loop;
1623 edge e;
1624
1625 FOR_EACH_LOOP (loop, 0)
1626 {
1627 if (loop->latch != loop->header && single_succ_p (loop->latch))
1628 continue;
1629
1630 e = find_edge (loop->latch, loop->header);
1631 gcc_checking_assert (e != NULL);
1632
1633 split_edge (e);
1634 }
1635 loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1636 }
1637
1638 /* This function is called from loop_version. It splits the entry edge
1639 of the loop we want to version, adds the versioning condition, and
1640 adjust the edges to the two versions of the loop appropriately.
1641 e is an incoming edge. Returns the basic block containing the
1642 condition.
1643
1644 --- edge e ---- > [second_head]
1645
1646 Split it and insert new conditional expression and adjust edges.
1647
1648 --- edge e ---> [cond expr] ---> [first_head]
1649 |
1650 +---------> [second_head]
1651
1652 THEN_PROB is the probability of then branch of the condition. */
1653
1654 static basic_block
1655 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1656 edge e, void *cond_expr, unsigned then_prob)
1657 {
1658 basic_block new_head = NULL;
1659 edge e1;
1660
1661 gcc_assert (e->dest == second_head);
1662
1663 /* Split edge 'e'. This will create a new basic block, where we can
1664 insert conditional expr. */
1665 new_head = split_edge (e);
1666
1667 lv_add_condition_to_bb (first_head, second_head, new_head,
1668 cond_expr);
1669
1670 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1671 e = single_succ_edge (new_head);
1672 e1 = make_edge (new_head, first_head,
1673 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1674 e1->probability = then_prob;
1675 e->probability = REG_BR_PROB_BASE - then_prob;
1676 e1->count = apply_probability (e->count, e1->probability);
1677 e->count = apply_probability (e->count, e->probability);
1678
1679 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1680 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1681
1682 /* Adjust loop header phi nodes. */
1683 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1684
1685 return new_head;
1686 }
1687
1688 /* Main entry point for Loop Versioning transformation.
1689
1690 This transformation given a condition and a loop, creates
1691 -if (condition) { loop_copy1 } else { loop_copy2 },
1692 where loop_copy1 is the loop transformed in one way, and loop_copy2
1693 is the loop transformed in another way (or unchanged). 'condition'
1694 may be a run time test for things that were not resolved by static
1695 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1696
1697 THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1698 is the ratio by that the frequencies in the original loop should
1699 be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1700 new loop should be scaled.
1701
1702 If PLACE_AFTER is true, we place the new loop after LOOP in the
1703 instruction stream, otherwise it is placed before LOOP. */
1704
1705 struct loop *
1706 loop_version (struct loop *loop,
1707 void *cond_expr, basic_block *condition_bb,
1708 unsigned then_prob, unsigned then_scale, unsigned else_scale,
1709 bool place_after)
1710 {
1711 basic_block first_head, second_head;
1712 edge entry, latch_edge, true_edge, false_edge;
1713 int irred_flag;
1714 struct loop *nloop;
1715 basic_block cond_bb;
1716
1717 /* Record entry and latch edges for the loop */
1718 entry = loop_preheader_edge (loop);
1719 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1720 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1721
1722 /* Note down head of loop as first_head. */
1723 first_head = entry->dest;
1724
1725 /* Duplicate loop. */
1726 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1727 NULL, NULL, NULL, 0))
1728 {
1729 entry->flags |= irred_flag;
1730 return NULL;
1731 }
1732
1733 /* After duplication entry edge now points to new loop head block.
1734 Note down new head as second_head. */
1735 second_head = entry->dest;
1736
1737 /* Split loop entry edge and insert new block with cond expr. */
1738 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1739 entry, cond_expr, then_prob);
1740 if (condition_bb)
1741 *condition_bb = cond_bb;
1742
1743 if (!cond_bb)
1744 {
1745 entry->flags |= irred_flag;
1746 return NULL;
1747 }
1748
1749 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1750
1751 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1752 nloop = loopify (latch_edge,
1753 single_pred_edge (get_bb_copy (loop->header)),
1754 cond_bb, true_edge, false_edge,
1755 false /* Do not redirect all edges. */,
1756 then_scale, else_scale);
1757
1758 copy_loop_info (loop, nloop);
1759
1760 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1761 lv_flush_pending_stmts (latch_edge);
1762
1763 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1764 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1765 lv_flush_pending_stmts (false_edge);
1766 /* Adjust irreducible flag. */
1767 if (irred_flag)
1768 {
1769 cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1770 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1771 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1772 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1773 }
1774
1775 if (place_after)
1776 {
1777 basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1778 unsigned i;
1779
1780 after = loop->latch;
1781
1782 for (i = 0; i < nloop->num_nodes; i++)
1783 {
1784 move_block_after (bbs[i], after);
1785 after = bbs[i];
1786 }
1787 free (bbs);
1788 }
1789
1790 /* At this point condition_bb is loop preheader with two successors,
1791 first_head and second_head. Make sure that loop preheader has only
1792 one successor. */
1793 split_edge (loop_preheader_edge (loop));
1794 split_edge (loop_preheader_edge (nloop));
1795
1796 return nloop;
1797 }