b093a7de081a2644a86b926facef9511a2da4e3e
[gcc.git] / gcc / loop-unroll.c
1 /* Loop unrolling and peeling.
2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
28 #include "cfgloop.h"
29 #include "cfglayout.h"
30 #include "params.h"
31 #include "output.h"
32 #include "expr.h"
33
34 /* This pass performs loop unrolling and peeling. We only perform these
35 optimizations on innermost loops (with single exception) because
36 the impact on performance is greatest here, and we want to avoid
37 unnecessary code size growth. The gain is caused by greater sequentiality
38 of code, better code to optimize for further passes and in some cases
39 by fewer testings of exit conditions. The main problem is code growth,
40 that impacts performance negatively due to effect of caches.
41
42 What we do:
43
44 -- complete peeling of once-rolling loops; this is the above mentioned
45 exception, as this causes loop to be cancelled completely and
46 does not cause code growth
47 -- complete peeling of loops that roll (small) constant times.
48 -- simple peeling of first iterations of loops that do not roll much
49 (according to profile feedback)
50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
60
61 There is a lot of parameters (defined and described in params.def) that
62 control how much we unroll/peel.
63
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
67 */
68
69 static void decide_unrolling_and_peeling (struct loops *, int);
70 static void peel_loops_completely (struct loops *, int);
71 static void decide_peel_simple (struct loop *, int);
72 static void decide_peel_once_rolling (struct loop *, int);
73 static void decide_peel_completely (struct loop *, int);
74 static void decide_unroll_stupid (struct loop *, int);
75 static void decide_unroll_constant_iterations (struct loop *, int);
76 static void decide_unroll_runtime_iterations (struct loop *, int);
77 static void peel_loop_simple (struct loops *, struct loop *);
78 static void peel_loop_completely (struct loops *, struct loop *);
79 static void unroll_loop_stupid (struct loops *, struct loop *);
80 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
81 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
82
83 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
84 void
85 unroll_and_peel_loops (struct loops *loops, int flags)
86 {
87 struct loop *loop, *next;
88 bool check;
89
90 /* First perform complete loop peeling (it is almost surely a win,
91 and affects parameters for further decision a lot). */
92 peel_loops_completely (loops, flags);
93
94 /* Now decide rest of unrolling and peeling. */
95 decide_unrolling_and_peeling (loops, flags);
96
97 loop = loops->tree_root;
98 while (loop->inner)
99 loop = loop->inner;
100
101 /* Scan the loops, inner ones first. */
102 while (loop != loops->tree_root)
103 {
104 if (loop->next)
105 {
106 next = loop->next;
107 while (next->inner)
108 next = next->inner;
109 }
110 else
111 next = loop->outer;
112
113 check = true;
114 /* And perform the appropriate transformations. */
115 switch (loop->lpt_decision.decision)
116 {
117 case LPT_PEEL_COMPLETELY:
118 /* Already done. */
119 abort ();
120 case LPT_PEEL_SIMPLE:
121 peel_loop_simple (loops, loop);
122 break;
123 case LPT_UNROLL_CONSTANT:
124 unroll_loop_constant_iterations (loops, loop);
125 break;
126 case LPT_UNROLL_RUNTIME:
127 unroll_loop_runtime_iterations (loops, loop);
128 break;
129 case LPT_UNROLL_STUPID:
130 unroll_loop_stupid (loops, loop);
131 break;
132 case LPT_NONE:
133 check = false;
134 break;
135 default:
136 abort ();
137 }
138 if (check)
139 {
140 #ifdef ENABLE_CHECKING
141 verify_dominators (CDI_DOMINATORS);
142 verify_loop_structure (loops);
143 #endif
144 }
145 loop = next;
146 }
147
148 iv_analysis_done ();
149 }
150
151 /* Check whether exit of the LOOP is at the end of loop body. */
152
153 static bool
154 loop_exit_at_end_p (struct loop *loop)
155 {
156 struct niter_desc *desc = get_simple_loop_desc (loop);
157 rtx insn;
158
159 if (desc->in_edge->dest != loop->latch)
160 return false;
161
162 /* Check that the latch is empty. */
163 FOR_BB_INSNS (loop->latch, insn)
164 {
165 if (INSN_P (insn))
166 return false;
167 }
168
169 return true;
170 }
171
172 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
173 static void
174 peel_loops_completely (struct loops *loops, int flags)
175 {
176 struct loop *loop, *next;
177
178 loop = loops->tree_root;
179 while (loop->inner)
180 loop = loop->inner;
181
182 while (loop != loops->tree_root)
183 {
184 if (loop->next)
185 {
186 next = loop->next;
187 while (next->inner)
188 next = next->inner;
189 }
190 else
191 next = loop->outer;
192
193 loop->lpt_decision.decision = LPT_NONE;
194
195 if (rtl_dump_file)
196 fprintf (rtl_dump_file, "\n;; *** Considering loop %d for complete peeling ***\n",
197 loop->num);
198
199 loop->ninsns = num_loop_insns (loop);
200
201 decide_peel_once_rolling (loop, flags);
202 if (loop->lpt_decision.decision == LPT_NONE)
203 decide_peel_completely (loop, flags);
204
205 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
206 {
207 peel_loop_completely (loops, loop);
208 #ifdef ENABLE_CHECKING
209 verify_dominators (CDI_DOMINATORS);
210 verify_loop_structure (loops);
211 #endif
212 }
213 loop = next;
214 }
215 }
216
217 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
218 static void
219 decide_unrolling_and_peeling (struct loops *loops, int flags)
220 {
221 struct loop *loop = loops->tree_root, *next;
222
223 while (loop->inner)
224 loop = loop->inner;
225
226 /* Scan the loops, inner ones first. */
227 while (loop != loops->tree_root)
228 {
229 if (loop->next)
230 {
231 next = loop->next;
232 while (next->inner)
233 next = next->inner;
234 }
235 else
236 next = loop->outer;
237
238 loop->lpt_decision.decision = LPT_NONE;
239
240 if (rtl_dump_file)
241 fprintf (rtl_dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
242
243 /* Do not peel cold areas. */
244 if (!maybe_hot_bb_p (loop->header))
245 {
246 if (rtl_dump_file)
247 fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
248 loop = next;
249 continue;
250 }
251
252 /* Can the loop be manipulated? */
253 if (!can_duplicate_loop_p (loop))
254 {
255 if (rtl_dump_file)
256 fprintf (rtl_dump_file,
257 ";; Not considering loop, cannot duplicate\n");
258 loop = next;
259 continue;
260 }
261
262 /* Skip non-innermost loops. */
263 if (loop->inner)
264 {
265 if (rtl_dump_file)
266 fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
267 loop = next;
268 continue;
269 }
270
271 loop->ninsns = num_loop_insns (loop);
272 loop->av_ninsns = average_num_loop_insns (loop);
273
274 /* Try transformations one by one in decreasing order of
275 priority. */
276
277 decide_unroll_constant_iterations (loop, flags);
278 if (loop->lpt_decision.decision == LPT_NONE)
279 decide_unroll_runtime_iterations (loop, flags);
280 if (loop->lpt_decision.decision == LPT_NONE)
281 decide_unroll_stupid (loop, flags);
282 if (loop->lpt_decision.decision == LPT_NONE)
283 decide_peel_simple (loop, flags);
284
285 loop = next;
286 }
287 }
288
289 /* Decide whether the LOOP is once rolling and suitable for complete
290 peeling. */
291 static void
292 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
293 {
294 struct niter_desc *desc;
295
296 if (rtl_dump_file)
297 fprintf (rtl_dump_file, "\n;; Considering peeling once rolling loop\n");
298
299 /* Is the loop small enough? */
300 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
301 {
302 if (rtl_dump_file)
303 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
304 return;
305 }
306
307 /* Check for simple loops. */
308 desc = get_simple_loop_desc (loop);
309
310 /* Check number of iterations. */
311 if (!desc->simple_p
312 || desc->assumptions
313 || !desc->const_iter
314 || desc->niter != 0)
315 {
316 if (rtl_dump_file)
317 fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
318 return;
319 }
320
321 /* Success. */
322 if (rtl_dump_file)
323 fprintf (rtl_dump_file, ";; Decided to peel exactly once rolling loop\n");
324 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
325 }
326
327 /* Decide whether the LOOP is suitable for complete peeling. */
328 static void
329 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
330 {
331 unsigned npeel;
332 struct niter_desc *desc;
333
334 if (rtl_dump_file)
335 fprintf (rtl_dump_file, "\n;; Considering peeling completely\n");
336
337 /* Skip non-innermost loops. */
338 if (loop->inner)
339 {
340 if (rtl_dump_file)
341 fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
342 return;
343 }
344
345 /* Do not peel cold areas. */
346 if (!maybe_hot_bb_p (loop->header))
347 {
348 if (rtl_dump_file)
349 fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
350 return;
351 }
352
353 /* Can the loop be manipulated? */
354 if (!can_duplicate_loop_p (loop))
355 {
356 if (rtl_dump_file)
357 fprintf (rtl_dump_file,
358 ";; Not considering loop, cannot duplicate\n");
359 return;
360 }
361
362 /* npeel = number of iterations to peel. */
363 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
364 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
365 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
366
367 /* Is the loop small enough? */
368 if (!npeel)
369 {
370 if (rtl_dump_file)
371 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
372 return;
373 }
374
375 /* Check for simple loops. */
376 desc = get_simple_loop_desc (loop);
377
378 /* Check number of iterations. */
379 if (!desc->simple_p
380 || desc->assumptions
381 || !desc->const_iter)
382 {
383 if (rtl_dump_file)
384 fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
385 return;
386 }
387
388 if (desc->niter > npeel - 1)
389 {
390 if (rtl_dump_file)
391 {
392 fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
393 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
394 fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
395 }
396 return;
397 }
398
399 /* Success. */
400 if (rtl_dump_file)
401 fprintf (rtl_dump_file, ";; Decided to peel loop completely\n");
402 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
403 }
404
405 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
406 completely. The transformation done:
407
408 for (i = 0; i < 4; i++)
409 body;
410
411 ==>
412
413 i = 0;
414 body; i++;
415 body; i++;
416 body; i++;
417 body; i++;
418 */
419 static void
420 peel_loop_completely (struct loops *loops, struct loop *loop)
421 {
422 sbitmap wont_exit;
423 unsigned HOST_WIDE_INT npeel;
424 unsigned n_remove_edges, i;
425 edge *remove_edges, ei;
426 struct niter_desc *desc = get_simple_loop_desc (loop);
427
428 npeel = desc->niter;
429
430 if (npeel)
431 {
432 wont_exit = sbitmap_alloc (npeel + 1);
433 sbitmap_ones (wont_exit);
434 RESET_BIT (wont_exit, 0);
435 if (desc->noloop_assumptions)
436 RESET_BIT (wont_exit, 1);
437
438 remove_edges = xcalloc (npeel, sizeof (edge));
439 n_remove_edges = 0;
440
441 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
442 loops, npeel,
443 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
444 DLTHE_FLAG_UPDATE_FREQ))
445 abort ();
446
447 free (wont_exit);
448
449 /* Remove the exit edges. */
450 for (i = 0; i < n_remove_edges; i++)
451 remove_path (loops, remove_edges[i]);
452 free (remove_edges);
453 }
454
455 ei = desc->in_edge;
456 free_simple_loop_desc (loop);
457
458 /* Now remove the unreachable part of the last iteration and cancel
459 the loop. */
460 remove_path (loops, ei);
461
462 if (rtl_dump_file)
463 fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
464 }
465
466 /* Decide whether to unroll LOOP iterating constant number of times and how much. */
467
468 static void
469 decide_unroll_constant_iterations (struct loop *loop, int flags)
470 {
471 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
472 struct niter_desc *desc;
473
474 if (!(flags & UAP_UNROLL))
475 {
476 /* We were not asked to, just return back silently. */
477 return;
478 }
479
480 if (rtl_dump_file)
481 fprintf (rtl_dump_file,
482 "\n;; Considering unrolling loop with constant number of iterations\n");
483
484 /* nunroll = total number of copies of the original loop body in
485 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
486 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
487 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
488 if (nunroll > nunroll_by_av)
489 nunroll = nunroll_by_av;
490 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
491 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
492
493 /* Skip big loops. */
494 if (nunroll <= 1)
495 {
496 if (rtl_dump_file)
497 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
498 return;
499 }
500
501 /* Check for simple loops. */
502 desc = get_simple_loop_desc (loop);
503
504 /* Check number of iterations. */
505 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
506 {
507 if (rtl_dump_file)
508 fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
509 return;
510 }
511
512 /* Check whether the loop rolls enough to consider. */
513 if (desc->niter < 2 * nunroll)
514 {
515 if (rtl_dump_file)
516 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
517 return;
518 }
519
520 /* Success; now compute number of iterations to unroll. We alter
521 nunroll so that as few as possible copies of loop body are
522 necessary, while still not decreasing the number of unrollings
523 too much (at most by 1). */
524 best_copies = 2 * nunroll + 10;
525
526 i = 2 * nunroll + 2;
527 if (i - 1 >= desc->niter)
528 i = desc->niter - 2;
529
530 for (; i >= nunroll - 1; i--)
531 {
532 unsigned exit_mod = desc->niter % (i + 1);
533
534 if (!loop_exit_at_end_p (loop))
535 n_copies = exit_mod + i + 1;
536 else if (exit_mod != (unsigned) i
537 || desc->noloop_assumptions != NULL_RTX)
538 n_copies = exit_mod + i + 2;
539 else
540 n_copies = i + 1;
541
542 if (n_copies < best_copies)
543 {
544 best_copies = n_copies;
545 best_unroll = i;
546 }
547 }
548
549 if (rtl_dump_file)
550 fprintf (rtl_dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
551 best_unroll + 1, best_copies, nunroll);
552
553 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
554 loop->lpt_decision.times = best_unroll;
555
556 if (rtl_dump_file)
557 fprintf (rtl_dump_file,
558 ";; Decided to unroll the constant times rolling loop, %d times.\n",
559 loop->lpt_decision.times);
560 }
561
562 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
563 times. The transformation does this:
564
565 for (i = 0; i < 102; i++)
566 body;
567
568 ==>
569
570 i = 0;
571 body; i++;
572 body; i++;
573 while (i < 102)
574 {
575 body; i++;
576 body; i++;
577 body; i++;
578 body; i++;
579 }
580 */
581 static void
582 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
583 {
584 unsigned HOST_WIDE_INT niter;
585 unsigned exit_mod;
586 sbitmap wont_exit;
587 unsigned n_remove_edges, i;
588 edge *remove_edges;
589 unsigned max_unroll = loop->lpt_decision.times;
590 struct niter_desc *desc = get_simple_loop_desc (loop);
591 bool exit_at_end = loop_exit_at_end_p (loop);
592
593 niter = desc->niter;
594
595 if (niter <= max_unroll + 1)
596 abort (); /* Should not get here (such loop should be peeled instead). */
597
598 exit_mod = niter % (max_unroll + 1);
599
600 wont_exit = sbitmap_alloc (max_unroll + 1);
601 sbitmap_ones (wont_exit);
602
603 remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
604 n_remove_edges = 0;
605
606 if (!exit_at_end)
607 {
608 /* The exit is not at the end of the loop; leave exit test
609 in the first copy, so that the loops that start with test
610 of exit condition have continuous body after unrolling. */
611
612 if (rtl_dump_file)
613 fprintf (rtl_dump_file, ";; Condition on beginning of loop.\n");
614
615 /* Peel exit_mod iterations. */
616 RESET_BIT (wont_exit, 0);
617 if (desc->noloop_assumptions)
618 RESET_BIT (wont_exit, 1);
619
620 if (exit_mod)
621 {
622 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
623 loops, exit_mod,
624 wont_exit, desc->out_edge,
625 remove_edges, &n_remove_edges,
626 DLTHE_FLAG_UPDATE_FREQ))
627 abort ();
628
629 desc->noloop_assumptions = NULL_RTX;
630 desc->niter -= exit_mod;
631 desc->niter_max -= exit_mod;
632 }
633
634 SET_BIT (wont_exit, 1);
635 }
636 else
637 {
638 /* Leave exit test in last copy, for the same reason as above if
639 the loop tests the condition at the end of loop body. */
640
641 if (rtl_dump_file)
642 fprintf (rtl_dump_file, ";; Condition on end of loop.\n");
643
644 /* We know that niter >= max_unroll + 2; so we do not need to care of
645 case when we would exit before reaching the loop. So just peel
646 exit_mod + 1 iterations. */
647 if (exit_mod != max_unroll
648 || desc->noloop_assumptions)
649 {
650 RESET_BIT (wont_exit, 0);
651 if (desc->noloop_assumptions)
652 RESET_BIT (wont_exit, 1);
653
654 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
655 loops, exit_mod + 1,
656 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
657 DLTHE_FLAG_UPDATE_FREQ))
658 abort ();
659
660 desc->niter -= exit_mod + 1;
661 desc->niter_max -= exit_mod + 1;
662 desc->noloop_assumptions = NULL_RTX;
663
664 SET_BIT (wont_exit, 0);
665 SET_BIT (wont_exit, 1);
666 }
667
668 RESET_BIT (wont_exit, max_unroll);
669 }
670
671 /* Now unroll the loop. */
672 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
673 loops, max_unroll,
674 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
675 DLTHE_FLAG_UPDATE_FREQ))
676 abort ();
677
678 free (wont_exit);
679
680 if (exit_at_end)
681 {
682 basic_block exit_block = desc->in_edge->src->rbi->copy;
683 /* Find a new in and out edge; they are in the last copy we have made. */
684
685 if (exit_block->succ->dest == desc->out_edge->dest)
686 {
687 desc->out_edge = exit_block->succ;
688 desc->in_edge = exit_block->succ->succ_next;
689 }
690 else
691 {
692 desc->out_edge = exit_block->succ->succ_next;
693 desc->in_edge = exit_block->succ;
694 }
695 }
696
697 desc->niter /= max_unroll + 1;
698 desc->niter_max /= max_unroll + 1;
699 desc->niter_expr = GEN_INT (desc->niter);
700
701 /* Remove the edges. */
702 for (i = 0; i < n_remove_edges; i++)
703 remove_path (loops, remove_edges[i]);
704 free (remove_edges);
705
706 if (rtl_dump_file)
707 fprintf (rtl_dump_file, ";; Unrolled loop %d times, constant # of iterations %i insns\n",max_unroll, num_loop_insns (loop));
708 }
709
710 /* Decide whether to unroll LOOP iterating runtime computable number of times
711 and how much. */
712 static void
713 decide_unroll_runtime_iterations (struct loop *loop, int flags)
714 {
715 unsigned nunroll, nunroll_by_av, i;
716 struct niter_desc *desc;
717
718 if (!(flags & UAP_UNROLL))
719 {
720 /* We were not asked to, just return back silently. */
721 return;
722 }
723
724 if (rtl_dump_file)
725 fprintf (rtl_dump_file,
726 "\n;; Considering unrolling loop with runtime computable number of iterations\n");
727
728 /* nunroll = total number of copies of the original loop body in
729 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
730 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
731 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
732 if (nunroll > nunroll_by_av)
733 nunroll = nunroll_by_av;
734 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
735 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
736
737 /* Skip big loops. */
738 if (nunroll <= 1)
739 {
740 if (rtl_dump_file)
741 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
742 return;
743 }
744
745 /* Check for simple loops. */
746 desc = get_simple_loop_desc (loop);
747
748 /* Check simpleness. */
749 if (!desc->simple_p || desc->assumptions)
750 {
751 if (rtl_dump_file)
752 fprintf (rtl_dump_file,
753 ";; Unable to prove that the number of iterations can be counted in runtime\n");
754 return;
755 }
756
757 if (desc->const_iter)
758 {
759 if (rtl_dump_file)
760 fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
761 return;
762 }
763
764 /* If we have profile feedback, check whether the loop rolls. */
765 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
766 {
767 if (rtl_dump_file)
768 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
769 return;
770 }
771
772 /* Success; now force nunroll to be power of 2, as we are unable to
773 cope with overflows in computation of number of iterations. */
774 for (i = 1; 2 * i <= nunroll; i *= 2)
775 continue;
776
777 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
778 loop->lpt_decision.times = i - 1;
779
780 if (rtl_dump_file)
781 fprintf (rtl_dump_file,
782 ";; Decided to unroll the runtime computable times rolling loop, %d times.\n",
783 loop->lpt_decision.times);
784 }
785
786 /* Unroll LOOP for that we are able to count number of iterations in runtime
787 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
788 extra care for case n < 0):
789
790 for (i = 0; i < n; i++)
791 body;
792
793 ==>
794
795 i = 0;
796 mod = n % 4;
797
798 switch (mod)
799 {
800 case 3:
801 body; i++;
802 case 2:
803 body; i++;
804 case 1:
805 body; i++;
806 case 0: ;
807 }
808
809 while (i < n)
810 {
811 body; i++;
812 body; i++;
813 body; i++;
814 body; i++;
815 }
816 */
817 static void
818 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
819 {
820 rtx old_niter, niter, init_code, branch_code, tmp;
821 unsigned i, j, p;
822 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
823 unsigned n_dom_bbs;
824 sbitmap wont_exit;
825 int may_exit_copy;
826 unsigned n_peel, n_remove_edges;
827 edge *remove_edges, e;
828 bool extra_zero_check, last_may_exit;
829 unsigned max_unroll = loop->lpt_decision.times;
830 struct niter_desc *desc = get_simple_loop_desc (loop);
831 bool exit_at_end = loop_exit_at_end_p (loop);
832
833 /* Remember blocks whose dominators will have to be updated. */
834 dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
835 n_dom_bbs = 0;
836
837 body = get_loop_body (loop);
838 for (i = 0; i < loop->num_nodes; i++)
839 {
840 unsigned nldom;
841 basic_block *ldom;
842
843 nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
844 for (j = 0; j < nldom; j++)
845 if (!flow_bb_inside_loop_p (loop, ldom[j]))
846 dom_bbs[n_dom_bbs++] = ldom[j];
847
848 free (ldom);
849 }
850 free (body);
851
852 if (!exit_at_end)
853 {
854 /* Leave exit in first copy (for explanation why see comment in
855 unroll_loop_constant_iterations). */
856 may_exit_copy = 0;
857 n_peel = max_unroll - 1;
858 extra_zero_check = true;
859 last_may_exit = false;
860 }
861 else
862 {
863 /* Leave exit in last copy (for explanation why see comment in
864 unroll_loop_constant_iterations). */
865 may_exit_copy = max_unroll;
866 n_peel = max_unroll;
867 extra_zero_check = false;
868 last_may_exit = true;
869 }
870
871 /* Get expression for number of iterations. */
872 start_sequence ();
873 old_niter = niter = gen_reg_rtx (desc->mode);
874 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
875 if (tmp != niter)
876 emit_move_insn (niter, tmp);
877
878 /* Count modulo by ANDing it with max_unroll; we use the fact that
879 the number of unrollings is a power of two, and thus this is correct
880 even if there is overflow in the computation. */
881 niter = expand_simple_binop (desc->mode, AND,
882 niter,
883 GEN_INT (max_unroll),
884 NULL_RTX, 0, OPTAB_LIB_WIDEN);
885
886 init_code = get_insns ();
887 end_sequence ();
888
889 /* Precondition the loop. */
890 loop_split_edge_with (loop_preheader_edge (loop), init_code);
891
892 remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
893 n_remove_edges = 0;
894
895 wont_exit = sbitmap_alloc (max_unroll + 2);
896
897 /* Peel the first copy of loop body (almost always we must leave exit test
898 here; the only exception is when we have extra zero check and the number
899 of iterations is reliable. Also record the place of (possible) extra
900 zero check. */
901 sbitmap_zero (wont_exit);
902 if (extra_zero_check
903 && !desc->noloop_assumptions)
904 SET_BIT (wont_exit, 1);
905 ezc_swtch = loop_preheader_edge (loop)->src;
906 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
907 loops, 1,
908 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
909 DLTHE_FLAG_UPDATE_FREQ))
910 abort ();
911
912 /* Record the place where switch will be built for preconditioning. */
913 swtch = loop_split_edge_with (loop_preheader_edge (loop),
914 NULL_RTX);
915
916 for (i = 0; i < n_peel; i++)
917 {
918 /* Peel the copy. */
919 sbitmap_zero (wont_exit);
920 if (i != n_peel - 1 || !last_may_exit)
921 SET_BIT (wont_exit, 1);
922 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
923 loops, 1,
924 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
925 DLTHE_FLAG_UPDATE_FREQ))
926 abort ();
927
928 /* Create item for switch. */
929 j = n_peel - i - (extra_zero_check ? 0 : 1);
930 p = REG_BR_PROB_BASE / (i + 2);
931
932 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
933 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
934 block_label (preheader), p, NULL_RTX);
935
936 swtch = loop_split_edge_with (swtch->pred, branch_code);
937 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
938 swtch->succ->probability = REG_BR_PROB_BASE - p;
939 e = make_edge (swtch, preheader,
940 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
941 e->probability = p;
942 }
943
944 if (extra_zero_check)
945 {
946 /* Add branch for zero iterations. */
947 p = REG_BR_PROB_BASE / (max_unroll + 1);
948 swtch = ezc_swtch;
949 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
950 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
951 block_label (preheader), p, NULL_RTX);
952
953 swtch = loop_split_edge_with (swtch->succ, branch_code);
954 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
955 swtch->succ->probability = REG_BR_PROB_BASE - p;
956 e = make_edge (swtch, preheader,
957 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
958 e->probability = p;
959 }
960
961 /* Recount dominators for outer blocks. */
962 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
963
964 /* And unroll loop. */
965
966 sbitmap_ones (wont_exit);
967 RESET_BIT (wont_exit, may_exit_copy);
968
969 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
970 loops, max_unroll,
971 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
972 DLTHE_FLAG_UPDATE_FREQ))
973 abort ();
974
975 free (wont_exit);
976
977 if (exit_at_end)
978 {
979 basic_block exit_block = desc->in_edge->src->rbi->copy;
980 /* Find a new in and out edge; they are in the last copy we have made. */
981
982 if (exit_block->succ->dest == desc->out_edge->dest)
983 {
984 desc->out_edge = exit_block->succ;
985 desc->in_edge = exit_block->succ->succ_next;
986 }
987 else
988 {
989 desc->out_edge = exit_block->succ->succ_next;
990 desc->in_edge = exit_block->succ;
991 }
992 }
993
994 /* Remove the edges. */
995 for (i = 0; i < n_remove_edges; i++)
996 remove_path (loops, remove_edges[i]);
997 free (remove_edges);
998
999 /* We must be careful when updating the number of iterations due to
1000 preconditioning and the fact that the value must be valid at entry
1001 of the loop. After passing through the above code, we see that
1002 the correct new number of iterations is this: */
1003 if (desc->const_iter)
1004 abort ();
1005 desc->niter_expr =
1006 simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1007 desc->niter_max /= max_unroll + 1;
1008 if (exit_at_end)
1009 {
1010 desc->niter_expr =
1011 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1012 desc->noloop_assumptions = NULL_RTX;
1013 desc->niter_max--;
1014 }
1015
1016 if (rtl_dump_file)
1017 fprintf (rtl_dump_file,
1018 ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
1019 max_unroll, num_loop_insns (loop));
1020 }
1021
1022 /* Decide whether to simply peel LOOP and how much. */
1023 static void
1024 decide_peel_simple (struct loop *loop, int flags)
1025 {
1026 unsigned npeel;
1027 struct niter_desc *desc;
1028
1029 if (!(flags & UAP_PEEL))
1030 {
1031 /* We were not asked to, just return back silently. */
1032 return;
1033 }
1034
1035 if (rtl_dump_file)
1036 fprintf (rtl_dump_file, "\n;; Considering simply peeling loop\n");
1037
1038 /* npeel = number of iterations to peel. */
1039 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1040 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1041 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1042
1043 /* Skip big loops. */
1044 if (!npeel)
1045 {
1046 if (rtl_dump_file)
1047 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
1048 return;
1049 }
1050
1051 /* Check for simple loops. */
1052 desc = get_simple_loop_desc (loop);
1053
1054 /* Check number of iterations. */
1055 if (desc->simple_p && !desc->assumptions && desc->const_iter)
1056 {
1057 if (rtl_dump_file)
1058 fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
1059 return;
1060 }
1061
1062 /* Do not simply peel loops with branches inside -- it increases number
1063 of mispredicts. */
1064 if (num_loop_branches (loop) > 1)
1065 {
1066 if (rtl_dump_file)
1067 fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
1068 return;
1069 }
1070
1071 if (loop->header->count)
1072 {
1073 unsigned niter = expected_loop_iterations (loop);
1074 if (niter + 1 > npeel)
1075 {
1076 if (rtl_dump_file)
1077 {
1078 fprintf (rtl_dump_file, ";; Not peeling loop, rolls too much (");
1079 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) (niter + 1));
1080 fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
1081 }
1082 return;
1083 }
1084 npeel = niter + 1;
1085 }
1086 else
1087 {
1088 /* For now we have no good heuristics to decide whether loop peeling
1089 will be effective, so disable it. */
1090 if (rtl_dump_file)
1091 fprintf (rtl_dump_file,
1092 ";; Not peeling loop, no evidence it will be profitable\n");
1093 return;
1094 }
1095
1096 /* Success. */
1097 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1098 loop->lpt_decision.times = npeel;
1099
1100 if (rtl_dump_file)
1101 fprintf (rtl_dump_file, ";; Decided to simply peel the loop, %d times.\n",
1102 loop->lpt_decision.times);
1103 }
1104
1105 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1106 while (cond)
1107 body;
1108
1109 ==>
1110
1111 if (!cond) goto end;
1112 body;
1113 if (!cond) goto end;
1114 body;
1115 while (cond)
1116 body;
1117 end: ;
1118 */
1119 static void
1120 peel_loop_simple (struct loops *loops, struct loop *loop)
1121 {
1122 sbitmap wont_exit;
1123 unsigned npeel = loop->lpt_decision.times;
1124 struct niter_desc *desc = get_simple_loop_desc (loop);
1125
1126 wont_exit = sbitmap_alloc (npeel + 1);
1127 sbitmap_zero (wont_exit);
1128
1129 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1130 loops, npeel, wont_exit, NULL, NULL, NULL,
1131 DLTHE_FLAG_UPDATE_FREQ))
1132 abort ();
1133
1134 free (wont_exit);
1135
1136 if (desc->simple_p)
1137 {
1138 if (desc->const_iter)
1139 {
1140 desc->niter -= npeel;
1141 desc->niter_expr = GEN_INT (desc->niter);
1142 desc->noloop_assumptions = NULL_RTX;
1143 }
1144 else
1145 {
1146 /* We cannot just update niter_expr, as its value might be clobbered
1147 inside loop. We could handle this by counting the number into
1148 temporary just like we do in runtime unrolling, but it does not
1149 seem worthwhile. */
1150 free_simple_loop_desc (loop);
1151 }
1152 }
1153 if (rtl_dump_file)
1154 fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
1155 }
1156
1157 /* Decide whether to unroll LOOP stupidly and how much. */
1158 static void
1159 decide_unroll_stupid (struct loop *loop, int flags)
1160 {
1161 unsigned nunroll, nunroll_by_av, i;
1162 struct niter_desc *desc;
1163
1164 if (!(flags & UAP_UNROLL_ALL))
1165 {
1166 /* We were not asked to, just return back silently. */
1167 return;
1168 }
1169
1170 if (rtl_dump_file)
1171 fprintf (rtl_dump_file, "\n;; Considering unrolling loop stupidly\n");
1172
1173 /* nunroll = total number of copies of the original loop body in
1174 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1175 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1176 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1177 if (nunroll > nunroll_by_av)
1178 nunroll = nunroll_by_av;
1179 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1180 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1181
1182 /* Skip big loops. */
1183 if (nunroll <= 1)
1184 {
1185 if (rtl_dump_file)
1186 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
1187 return;
1188 }
1189
1190 /* Check for simple loops. */
1191 desc = get_simple_loop_desc (loop);
1192
1193 /* Check simpleness. */
1194 if (desc->simple_p && !desc->assumptions)
1195 {
1196 if (rtl_dump_file)
1197 fprintf (rtl_dump_file, ";; The loop is simple\n");
1198 return;
1199 }
1200
1201 /* Do not unroll loops with branches inside -- it increases number
1202 of mispredicts. */
1203 if (num_loop_branches (loop) > 1)
1204 {
1205 if (rtl_dump_file)
1206 fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
1207 return;
1208 }
1209
1210 /* If we have profile feedback, check whether the loop rolls. */
1211 if (loop->header->count
1212 && expected_loop_iterations (loop) < 2 * nunroll)
1213 {
1214 if (rtl_dump_file)
1215 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
1216 return;
1217 }
1218
1219 /* Success. Now force nunroll to be power of 2, as it seems that this
1220 improves results (partially because of better alignments, partially
1221 because of some dark magic). */
1222 for (i = 1; 2 * i <= nunroll; i *= 2)
1223 continue;
1224
1225 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1226 loop->lpt_decision.times = i - 1;
1227
1228 if (rtl_dump_file)
1229 fprintf (rtl_dump_file,
1230 ";; Decided to unroll the loop stupidly, %d times.\n",
1231 loop->lpt_decision.times);
1232 }
1233
1234 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1235 while (cond)
1236 body;
1237
1238 ==>
1239
1240 while (cond)
1241 {
1242 body;
1243 if (!cond) break;
1244 body;
1245 if (!cond) break;
1246 body;
1247 if (!cond) break;
1248 body;
1249 }
1250 */
1251 static void
1252 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1253 {
1254 sbitmap wont_exit;
1255 unsigned nunroll = loop->lpt_decision.times;
1256 struct niter_desc *desc = get_simple_loop_desc (loop);
1257
1258 wont_exit = sbitmap_alloc (nunroll + 1);
1259 sbitmap_zero (wont_exit);
1260
1261 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1262 loops, nunroll, wont_exit, NULL, NULL, NULL,
1263 DLTHE_FLAG_UPDATE_FREQ))
1264 abort ();
1265
1266 free (wont_exit);
1267
1268 if (desc->simple_p)
1269 {
1270 /* We indeed may get here provided that there are nontrivial assumptions
1271 for a loop to be really simple. We could update the counts, but the
1272 problem is that we are unable to decide which exit will be taken
1273 (not really true in case the number of iterations is constant,
1274 but noone will do anything with this information, so we do not
1275 worry about it). */
1276 desc->simple_p = false;
1277 }
1278
1279 if (rtl_dump_file)
1280 fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
1281 nunroll, num_loop_insns (loop));
1282 }