Makefile.in (loop-unroll.o): Add HASHTAB_H and RECOG_H dependency.
[gcc.git] / gcc / loop-unroll.c
1 /* Loop unrolling and peeling.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
28 #include "cfgloop.h"
29 #include "cfglayout.h"
30 #include "params.h"
31 #include "output.h"
32 #include "expr.h"
33 #include "hashtab.h"
34 #include "recog.h"
35
36 /* This pass performs loop unrolling and peeling. We only perform these
37 optimizations on innermost loops (with single exception) because
38 the impact on performance is greatest here, and we want to avoid
39 unnecessary code size growth. The gain is caused by greater sequentiality
40 of code, better code to optimize for further passes and in some cases
41 by fewer testings of exit conditions. The main problem is code growth,
42 that impacts performance negatively due to effect of caches.
43
44 What we do:
45
46 -- complete peeling of once-rolling loops; this is the above mentioned
47 exception, as this causes loop to be cancelled completely and
48 does not cause code growth
49 -- complete peeling of loops that roll (small) constant times.
50 -- simple peeling of first iterations of loops that do not roll much
51 (according to profile feedback)
52 -- unrolling of loops that roll constant times; this is almost always
53 win, as we get rid of exit condition tests.
54 -- unrolling of loops that roll number of times that we can compute
55 in runtime; we also get rid of exit condition tests here, but there
56 is the extra expense for calculating the number of iterations
57 -- simple unrolling of remaining loops; this is performed only if we
58 are asked to, as the gain is questionable in this case and often
59 it may even slow down the code
60 For more detailed descriptions of each of those, see comments at
61 appropriate function below.
62
63 There is a lot of parameters (defined and described in params.def) that
64 control how much we unroll/peel.
65
66 ??? A great problem is that we don't have a good way how to determine
67 how many times we should unroll the loop; the experiments I have made
68 showed that this choice may affect performance in order of several %.
69 */
70
71 /* Information about induction variables to split. */
72
73 struct iv_to_split
74 {
75 rtx insn; /* The insn in that the induction variable occurs. */
76 rtx base_var; /* The variable on that the values in the further
77 iterations are based. */
78 rtx step; /* Step of the induction variable. */
79 unsigned n_loc;
80 unsigned loc[3]; /* Location where the definition of the induction
81 variable occurs in the insn. For example if
82 N_LOC is 2, the expression is located at
83 XEXP (XEXP (single_set, loc[0]), loc[1]). */
84 };
85
86 struct split_ivs_info
87 {
88 htab_t insns_to_split; /* A hashtable of insns to split. */
89 unsigned first_new_block; /* The first basic block that was
90 duplicated. */
91 };
92
93 static void decide_unrolling_and_peeling (struct loops *, int);
94 static void peel_loops_completely (struct loops *, int);
95 static void decide_peel_simple (struct loop *, int);
96 static void decide_peel_once_rolling (struct loop *, int);
97 static void decide_peel_completely (struct loop *, int);
98 static void decide_unroll_stupid (struct loop *, int);
99 static void decide_unroll_constant_iterations (struct loop *, int);
100 static void decide_unroll_runtime_iterations (struct loop *, int);
101 static void peel_loop_simple (struct loops *, struct loop *);
102 static void peel_loop_completely (struct loops *, struct loop *);
103 static void unroll_loop_stupid (struct loops *, struct loop *);
104 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
105 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
106 static struct split_ivs_info *analyze_ivs_to_split (struct loop *);
107 static void si_info_start_duplication (struct split_ivs_info *);
108 static void split_ivs_in_copies (struct split_ivs_info *, unsigned, bool, bool);
109 static void free_si_info (struct split_ivs_info *);
110
111 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
112 void
113 unroll_and_peel_loops (struct loops *loops, int flags)
114 {
115 struct loop *loop, *next;
116 bool check;
117
118 /* First perform complete loop peeling (it is almost surely a win,
119 and affects parameters for further decision a lot). */
120 peel_loops_completely (loops, flags);
121
122 /* Now decide rest of unrolling and peeling. */
123 decide_unrolling_and_peeling (loops, flags);
124
125 loop = loops->tree_root;
126 while (loop->inner)
127 loop = loop->inner;
128
129 /* Scan the loops, inner ones first. */
130 while (loop != loops->tree_root)
131 {
132 if (loop->next)
133 {
134 next = loop->next;
135 while (next->inner)
136 next = next->inner;
137 }
138 else
139 next = loop->outer;
140
141 check = true;
142 /* And perform the appropriate transformations. */
143 switch (loop->lpt_decision.decision)
144 {
145 case LPT_PEEL_COMPLETELY:
146 /* Already done. */
147 gcc_unreachable ();
148 case LPT_PEEL_SIMPLE:
149 peel_loop_simple (loops, loop);
150 break;
151 case LPT_UNROLL_CONSTANT:
152 unroll_loop_constant_iterations (loops, loop);
153 break;
154 case LPT_UNROLL_RUNTIME:
155 unroll_loop_runtime_iterations (loops, loop);
156 break;
157 case LPT_UNROLL_STUPID:
158 unroll_loop_stupid (loops, loop);
159 break;
160 case LPT_NONE:
161 check = false;
162 break;
163 default:
164 gcc_unreachable ();
165 }
166 if (check)
167 {
168 #ifdef ENABLE_CHECKING
169 verify_dominators (CDI_DOMINATORS);
170 verify_loop_structure (loops);
171 #endif
172 }
173 loop = next;
174 }
175
176 iv_analysis_done ();
177 }
178
179 /* Check whether exit of the LOOP is at the end of loop body. */
180
181 static bool
182 loop_exit_at_end_p (struct loop *loop)
183 {
184 struct niter_desc *desc = get_simple_loop_desc (loop);
185 rtx insn;
186
187 if (desc->in_edge->dest != loop->latch)
188 return false;
189
190 /* Check that the latch is empty. */
191 FOR_BB_INSNS (loop->latch, insn)
192 {
193 if (INSN_P (insn))
194 return false;
195 }
196
197 return true;
198 }
199
200 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
201 static void
202 peel_loops_completely (struct loops *loops, int flags)
203 {
204 struct loop *loop, *next;
205
206 loop = loops->tree_root;
207 while (loop->inner)
208 loop = loop->inner;
209
210 while (loop != loops->tree_root)
211 {
212 if (loop->next)
213 {
214 next = loop->next;
215 while (next->inner)
216 next = next->inner;
217 }
218 else
219 next = loop->outer;
220
221 loop->lpt_decision.decision = LPT_NONE;
222
223 if (dump_file)
224 fprintf (dump_file,
225 "\n;; *** Considering loop %d for complete peeling ***\n",
226 loop->num);
227
228 loop->ninsns = num_loop_insns (loop);
229
230 decide_peel_once_rolling (loop, flags);
231 if (loop->lpt_decision.decision == LPT_NONE)
232 decide_peel_completely (loop, flags);
233
234 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
235 {
236 peel_loop_completely (loops, loop);
237 #ifdef ENABLE_CHECKING
238 verify_dominators (CDI_DOMINATORS);
239 verify_loop_structure (loops);
240 #endif
241 }
242 loop = next;
243 }
244 }
245
246 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
247 static void
248 decide_unrolling_and_peeling (struct loops *loops, int flags)
249 {
250 struct loop *loop = loops->tree_root, *next;
251
252 while (loop->inner)
253 loop = loop->inner;
254
255 /* Scan the loops, inner ones first. */
256 while (loop != loops->tree_root)
257 {
258 if (loop->next)
259 {
260 next = loop->next;
261 while (next->inner)
262 next = next->inner;
263 }
264 else
265 next = loop->outer;
266
267 loop->lpt_decision.decision = LPT_NONE;
268
269 if (dump_file)
270 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
271
272 /* Do not peel cold areas. */
273 if (!maybe_hot_bb_p (loop->header))
274 {
275 if (dump_file)
276 fprintf (dump_file, ";; Not considering loop, cold area\n");
277 loop = next;
278 continue;
279 }
280
281 /* Can the loop be manipulated? */
282 if (!can_duplicate_loop_p (loop))
283 {
284 if (dump_file)
285 fprintf (dump_file,
286 ";; Not considering loop, cannot duplicate\n");
287 loop = next;
288 continue;
289 }
290
291 /* Skip non-innermost loops. */
292 if (loop->inner)
293 {
294 if (dump_file)
295 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
296 loop = next;
297 continue;
298 }
299
300 loop->ninsns = num_loop_insns (loop);
301 loop->av_ninsns = average_num_loop_insns (loop);
302
303 /* Try transformations one by one in decreasing order of
304 priority. */
305
306 decide_unroll_constant_iterations (loop, flags);
307 if (loop->lpt_decision.decision == LPT_NONE)
308 decide_unroll_runtime_iterations (loop, flags);
309 if (loop->lpt_decision.decision == LPT_NONE)
310 decide_unroll_stupid (loop, flags);
311 if (loop->lpt_decision.decision == LPT_NONE)
312 decide_peel_simple (loop, flags);
313
314 loop = next;
315 }
316 }
317
318 /* Decide whether the LOOP is once rolling and suitable for complete
319 peeling. */
320 static void
321 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
322 {
323 struct niter_desc *desc;
324
325 if (dump_file)
326 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
327
328 /* Is the loop small enough? */
329 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
330 {
331 if (dump_file)
332 fprintf (dump_file, ";; Not considering loop, is too big\n");
333 return;
334 }
335
336 /* Check for simple loops. */
337 desc = get_simple_loop_desc (loop);
338
339 /* Check number of iterations. */
340 if (!desc->simple_p
341 || desc->assumptions
342 || !desc->const_iter
343 || desc->niter != 0)
344 {
345 if (dump_file)
346 fprintf (dump_file,
347 ";; Unable to prove that the loop rolls exactly once\n");
348 return;
349 }
350
351 /* Success. */
352 if (dump_file)
353 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
354 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
355 }
356
357 /* Decide whether the LOOP is suitable for complete peeling. */
358 static void
359 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
360 {
361 unsigned npeel;
362 struct niter_desc *desc;
363
364 if (dump_file)
365 fprintf (dump_file, "\n;; Considering peeling completely\n");
366
367 /* Skip non-innermost loops. */
368 if (loop->inner)
369 {
370 if (dump_file)
371 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
372 return;
373 }
374
375 /* Do not peel cold areas. */
376 if (!maybe_hot_bb_p (loop->header))
377 {
378 if (dump_file)
379 fprintf (dump_file, ";; Not considering loop, cold area\n");
380 return;
381 }
382
383 /* Can the loop be manipulated? */
384 if (!can_duplicate_loop_p (loop))
385 {
386 if (dump_file)
387 fprintf (dump_file,
388 ";; Not considering loop, cannot duplicate\n");
389 return;
390 }
391
392 /* npeel = number of iterations to peel. */
393 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
394 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
395 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
396
397 /* Is the loop small enough? */
398 if (!npeel)
399 {
400 if (dump_file)
401 fprintf (dump_file, ";; Not considering loop, is too big\n");
402 return;
403 }
404
405 /* Check for simple loops. */
406 desc = get_simple_loop_desc (loop);
407
408 /* Check number of iterations. */
409 if (!desc->simple_p
410 || desc->assumptions
411 || !desc->const_iter)
412 {
413 if (dump_file)
414 fprintf (dump_file,
415 ";; Unable to prove that the loop iterates constant times\n");
416 return;
417 }
418
419 if (desc->niter > npeel - 1)
420 {
421 if (dump_file)
422 {
423 fprintf (dump_file,
424 ";; Not peeling loop completely, rolls too much (");
425 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
426 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
427 }
428 return;
429 }
430
431 /* Success. */
432 if (dump_file)
433 fprintf (dump_file, ";; Decided to peel loop completely\n");
434 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
435 }
436
437 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
438 completely. The transformation done:
439
440 for (i = 0; i < 4; i++)
441 body;
442
443 ==>
444
445 i = 0;
446 body; i++;
447 body; i++;
448 body; i++;
449 body; i++;
450 */
451 static void
452 peel_loop_completely (struct loops *loops, struct loop *loop)
453 {
454 sbitmap wont_exit;
455 unsigned HOST_WIDE_INT npeel;
456 unsigned n_remove_edges, i;
457 edge *remove_edges, ei;
458 struct niter_desc *desc = get_simple_loop_desc (loop);
459 struct split_ivs_info *si_info = NULL;
460
461 npeel = desc->niter;
462
463 if (npeel)
464 {
465 wont_exit = sbitmap_alloc (npeel + 1);
466 sbitmap_ones (wont_exit);
467 RESET_BIT (wont_exit, 0);
468 if (desc->noloop_assumptions)
469 RESET_BIT (wont_exit, 1);
470
471 remove_edges = xcalloc (npeel, sizeof (edge));
472 n_remove_edges = 0;
473
474 if (flag_split_ivs_in_unroller)
475 si_info = analyze_ivs_to_split (loop);
476
477 si_info_start_duplication (si_info);
478 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
479 loops, npeel,
480 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
481 DLTHE_FLAG_UPDATE_FREQ))
482 abort ();
483
484 free (wont_exit);
485
486 if (si_info)
487 {
488 split_ivs_in_copies (si_info, npeel, false, true);
489 free_si_info (si_info);
490 }
491
492 /* Remove the exit edges. */
493 for (i = 0; i < n_remove_edges; i++)
494 remove_path (loops, remove_edges[i]);
495 free (remove_edges);
496 }
497
498 ei = desc->in_edge;
499 free_simple_loop_desc (loop);
500
501 /* Now remove the unreachable part of the last iteration and cancel
502 the loop. */
503 remove_path (loops, ei);
504
505 if (dump_file)
506 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
507 }
508
509 /* Decide whether to unroll LOOP iterating constant number of times
510 and how much. */
511
512 static void
513 decide_unroll_constant_iterations (struct loop *loop, int flags)
514 {
515 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
516 struct niter_desc *desc;
517
518 if (!(flags & UAP_UNROLL))
519 {
520 /* We were not asked to, just return back silently. */
521 return;
522 }
523
524 if (dump_file)
525 fprintf (dump_file,
526 "\n;; Considering unrolling loop with constant "
527 "number of iterations\n");
528
529 /* nunroll = total number of copies of the original loop body in
530 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
531 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
532 nunroll_by_av
533 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
534 if (nunroll > nunroll_by_av)
535 nunroll = nunroll_by_av;
536 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
537 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
538
539 /* Skip big loops. */
540 if (nunroll <= 1)
541 {
542 if (dump_file)
543 fprintf (dump_file, ";; Not considering loop, is too big\n");
544 return;
545 }
546
547 /* Check for simple loops. */
548 desc = get_simple_loop_desc (loop);
549
550 /* Check number of iterations. */
551 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
552 {
553 if (dump_file)
554 fprintf (dump_file,
555 ";; Unable to prove that the loop iterates constant times\n");
556 return;
557 }
558
559 /* Check whether the loop rolls enough to consider. */
560 if (desc->niter < 2 * nunroll)
561 {
562 if (dump_file)
563 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
564 return;
565 }
566
567 /* Success; now compute number of iterations to unroll. We alter
568 nunroll so that as few as possible copies of loop body are
569 necessary, while still not decreasing the number of unrollings
570 too much (at most by 1). */
571 best_copies = 2 * nunroll + 10;
572
573 i = 2 * nunroll + 2;
574 if (i - 1 >= desc->niter)
575 i = desc->niter - 2;
576
577 for (; i >= nunroll - 1; i--)
578 {
579 unsigned exit_mod = desc->niter % (i + 1);
580
581 if (!loop_exit_at_end_p (loop))
582 n_copies = exit_mod + i + 1;
583 else if (exit_mod != (unsigned) i
584 || desc->noloop_assumptions != NULL_RTX)
585 n_copies = exit_mod + i + 2;
586 else
587 n_copies = i + 1;
588
589 if (n_copies < best_copies)
590 {
591 best_copies = n_copies;
592 best_unroll = i;
593 }
594 }
595
596 if (dump_file)
597 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
598 best_unroll + 1, best_copies, nunroll);
599
600 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
601 loop->lpt_decision.times = best_unroll;
602
603 if (dump_file)
604 fprintf (dump_file,
605 ";; Decided to unroll the constant times rolling loop, %d times.\n",
606 loop->lpt_decision.times);
607 }
608
609 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
610 times. The transformation does this:
611
612 for (i = 0; i < 102; i++)
613 body;
614
615 ==>
616
617 i = 0;
618 body; i++;
619 body; i++;
620 while (i < 102)
621 {
622 body; i++;
623 body; i++;
624 body; i++;
625 body; i++;
626 }
627 */
628 static void
629 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
630 {
631 unsigned HOST_WIDE_INT niter;
632 unsigned exit_mod;
633 sbitmap wont_exit;
634 unsigned n_remove_edges, i;
635 edge *remove_edges;
636 unsigned max_unroll = loop->lpt_decision.times;
637 struct niter_desc *desc = get_simple_loop_desc (loop);
638 bool exit_at_end = loop_exit_at_end_p (loop);
639 struct split_ivs_info *si_info = NULL;
640
641 niter = desc->niter;
642
643 /* Should not get here (such loop should be peeled instead). */
644 gcc_assert (niter > max_unroll + 1);
645
646 exit_mod = niter % (max_unroll + 1);
647
648 wont_exit = sbitmap_alloc (max_unroll + 1);
649 sbitmap_ones (wont_exit);
650
651 remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
652 n_remove_edges = 0;
653
654 if (flag_split_ivs_in_unroller)
655 si_info = analyze_ivs_to_split (loop);
656
657 if (!exit_at_end)
658 {
659 /* The exit is not at the end of the loop; leave exit test
660 in the first copy, so that the loops that start with test
661 of exit condition have continuous body after unrolling. */
662
663 if (dump_file)
664 fprintf (dump_file, ";; Condition on beginning of loop.\n");
665
666 /* Peel exit_mod iterations. */
667 RESET_BIT (wont_exit, 0);
668 if (desc->noloop_assumptions)
669 RESET_BIT (wont_exit, 1);
670
671 if (exit_mod)
672 {
673 si_info_start_duplication (si_info);
674 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
675 loops, exit_mod,
676 wont_exit, desc->out_edge,
677 remove_edges, &n_remove_edges,
678 DLTHE_FLAG_UPDATE_FREQ))
679 abort ();
680
681 if (si_info && exit_mod > 1)
682 split_ivs_in_copies (si_info, exit_mod, false, false);
683
684 desc->noloop_assumptions = NULL_RTX;
685 desc->niter -= exit_mod;
686 desc->niter_max -= exit_mod;
687 }
688
689 SET_BIT (wont_exit, 1);
690 }
691 else
692 {
693 /* Leave exit test in last copy, for the same reason as above if
694 the loop tests the condition at the end of loop body. */
695
696 if (dump_file)
697 fprintf (dump_file, ";; Condition on end of loop.\n");
698
699 /* We know that niter >= max_unroll + 2; so we do not need to care of
700 case when we would exit before reaching the loop. So just peel
701 exit_mod + 1 iterations. */
702 if (exit_mod != max_unroll
703 || desc->noloop_assumptions)
704 {
705 RESET_BIT (wont_exit, 0);
706 if (desc->noloop_assumptions)
707 RESET_BIT (wont_exit, 1);
708
709 si_info_start_duplication (si_info);
710 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
711 loops, exit_mod + 1,
712 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
713 DLTHE_FLAG_UPDATE_FREQ))
714 abort ();
715
716 if (si_info && exit_mod > 0)
717 split_ivs_in_copies (si_info, exit_mod + 1, false, false);
718
719 desc->niter -= exit_mod + 1;
720 desc->niter_max -= exit_mod + 1;
721 desc->noloop_assumptions = NULL_RTX;
722
723 SET_BIT (wont_exit, 0);
724 SET_BIT (wont_exit, 1);
725 }
726
727 RESET_BIT (wont_exit, max_unroll);
728 }
729
730 /* Now unroll the loop. */
731 si_info_start_duplication (si_info);
732 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
733 loops, max_unroll,
734 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
735 DLTHE_FLAG_UPDATE_FREQ))
736 abort ();
737
738 if (si_info)
739 {
740 split_ivs_in_copies (si_info, max_unroll, true, true);
741 free_si_info (si_info);
742 }
743
744 free (wont_exit);
745
746 if (exit_at_end)
747 {
748 basic_block exit_block = desc->in_edge->src->rbi->copy;
749 /* Find a new in and out edge; they are in the last copy we have made. */
750
751 if (exit_block->succ->dest == desc->out_edge->dest)
752 {
753 desc->out_edge = exit_block->succ;
754 desc->in_edge = exit_block->succ->succ_next;
755 }
756 else
757 {
758 desc->out_edge = exit_block->succ->succ_next;
759 desc->in_edge = exit_block->succ;
760 }
761 }
762
763 desc->niter /= max_unroll + 1;
764 desc->niter_max /= max_unroll + 1;
765 desc->niter_expr = GEN_INT (desc->niter);
766
767 /* Remove the edges. */
768 for (i = 0; i < n_remove_edges; i++)
769 remove_path (loops, remove_edges[i]);
770 free (remove_edges);
771
772 if (dump_file)
773 fprintf (dump_file,
774 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
775 max_unroll, num_loop_insns (loop));
776 }
777
778 /* Decide whether to unroll LOOP iterating runtime computable number of times
779 and how much. */
780 static void
781 decide_unroll_runtime_iterations (struct loop *loop, int flags)
782 {
783 unsigned nunroll, nunroll_by_av, i;
784 struct niter_desc *desc;
785
786 if (!(flags & UAP_UNROLL))
787 {
788 /* We were not asked to, just return back silently. */
789 return;
790 }
791
792 if (dump_file)
793 fprintf (dump_file,
794 "\n;; Considering unrolling loop with runtime "
795 "computable number of iterations\n");
796
797 /* nunroll = total number of copies of the original loop body in
798 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
799 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
800 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
801 if (nunroll > nunroll_by_av)
802 nunroll = nunroll_by_av;
803 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
804 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
805
806 /* Skip big loops. */
807 if (nunroll <= 1)
808 {
809 if (dump_file)
810 fprintf (dump_file, ";; Not considering loop, is too big\n");
811 return;
812 }
813
814 /* Check for simple loops. */
815 desc = get_simple_loop_desc (loop);
816
817 /* Check simpleness. */
818 if (!desc->simple_p || desc->assumptions)
819 {
820 if (dump_file)
821 fprintf (dump_file,
822 ";; Unable to prove that the number of iterations "
823 "can be counted in runtime\n");
824 return;
825 }
826
827 if (desc->const_iter)
828 {
829 if (dump_file)
830 fprintf (dump_file, ";; Loop iterates constant times\n");
831 return;
832 }
833
834 /* If we have profile feedback, check whether the loop rolls. */
835 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
836 {
837 if (dump_file)
838 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
839 return;
840 }
841
842 /* Success; now force nunroll to be power of 2, as we are unable to
843 cope with overflows in computation of number of iterations. */
844 for (i = 1; 2 * i <= nunroll; i *= 2)
845 continue;
846
847 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
848 loop->lpt_decision.times = i - 1;
849
850 if (dump_file)
851 fprintf (dump_file,
852 ";; Decided to unroll the runtime computable "
853 "times rolling loop, %d times.\n",
854 loop->lpt_decision.times);
855 }
856
857 /* Unroll LOOP for that we are able to count number of iterations in runtime
858 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
859 extra care for case n < 0):
860
861 for (i = 0; i < n; i++)
862 body;
863
864 ==>
865
866 i = 0;
867 mod = n % 4;
868
869 switch (mod)
870 {
871 case 3:
872 body; i++;
873 case 2:
874 body; i++;
875 case 1:
876 body; i++;
877 case 0: ;
878 }
879
880 while (i < n)
881 {
882 body; i++;
883 body; i++;
884 body; i++;
885 body; i++;
886 }
887 */
888 static void
889 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
890 {
891 rtx old_niter, niter, init_code, branch_code, tmp;
892 unsigned i, j, p;
893 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
894 unsigned n_dom_bbs;
895 sbitmap wont_exit;
896 int may_exit_copy;
897 unsigned n_peel, n_remove_edges;
898 edge *remove_edges, e;
899 bool extra_zero_check, last_may_exit;
900 unsigned max_unroll = loop->lpt_decision.times;
901 struct niter_desc *desc = get_simple_loop_desc (loop);
902 bool exit_at_end = loop_exit_at_end_p (loop);
903 struct split_ivs_info *si_info = NULL;
904
905 if (flag_split_ivs_in_unroller)
906 si_info = analyze_ivs_to_split (loop);
907
908 /* Remember blocks whose dominators will have to be updated. */
909 dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
910 n_dom_bbs = 0;
911
912 body = get_loop_body (loop);
913 for (i = 0; i < loop->num_nodes; i++)
914 {
915 unsigned nldom;
916 basic_block *ldom;
917
918 nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
919 for (j = 0; j < nldom; j++)
920 if (!flow_bb_inside_loop_p (loop, ldom[j]))
921 dom_bbs[n_dom_bbs++] = ldom[j];
922
923 free (ldom);
924 }
925 free (body);
926
927 if (!exit_at_end)
928 {
929 /* Leave exit in first copy (for explanation why see comment in
930 unroll_loop_constant_iterations). */
931 may_exit_copy = 0;
932 n_peel = max_unroll - 1;
933 extra_zero_check = true;
934 last_may_exit = false;
935 }
936 else
937 {
938 /* Leave exit in last copy (for explanation why see comment in
939 unroll_loop_constant_iterations). */
940 may_exit_copy = max_unroll;
941 n_peel = max_unroll;
942 extra_zero_check = false;
943 last_may_exit = true;
944 }
945
946 /* Get expression for number of iterations. */
947 start_sequence ();
948 old_niter = niter = gen_reg_rtx (desc->mode);
949 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
950 if (tmp != niter)
951 emit_move_insn (niter, tmp);
952
953 /* Count modulo by ANDing it with max_unroll; we use the fact that
954 the number of unrollings is a power of two, and thus this is correct
955 even if there is overflow in the computation. */
956 niter = expand_simple_binop (desc->mode, AND,
957 niter,
958 GEN_INT (max_unroll),
959 NULL_RTX, 0, OPTAB_LIB_WIDEN);
960
961 init_code = get_insns ();
962 end_sequence ();
963
964 /* Precondition the loop. */
965 loop_split_edge_with (loop_preheader_edge (loop), init_code);
966
967 remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
968 n_remove_edges = 0;
969
970 wont_exit = sbitmap_alloc (max_unroll + 2);
971
972 /* Peel the first copy of loop body (almost always we must leave exit test
973 here; the only exception is when we have extra zero check and the number
974 of iterations is reliable. Also record the place of (possible) extra
975 zero check. */
976 sbitmap_zero (wont_exit);
977 if (extra_zero_check
978 && !desc->noloop_assumptions)
979 SET_BIT (wont_exit, 1);
980 ezc_swtch = loop_preheader_edge (loop)->src;
981 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
982 loops, 1,
983 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
984 DLTHE_FLAG_UPDATE_FREQ))
985 abort ();
986
987 /* Record the place where switch will be built for preconditioning. */
988 swtch = loop_split_edge_with (loop_preheader_edge (loop),
989 NULL_RTX);
990
991 for (i = 0; i < n_peel; i++)
992 {
993 /* Peel the copy. */
994 sbitmap_zero (wont_exit);
995 if (i != n_peel - 1 || !last_may_exit)
996 SET_BIT (wont_exit, 1);
997 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
998 loops, 1,
999 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1000 DLTHE_FLAG_UPDATE_FREQ))
1001 abort ();
1002
1003 /* Create item for switch. */
1004 j = n_peel - i - (extra_zero_check ? 0 : 1);
1005 p = REG_BR_PROB_BASE / (i + 2);
1006
1007 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1008 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1009 block_label (preheader), p, NULL_RTX);
1010
1011 swtch = loop_split_edge_with (swtch->pred, branch_code);
1012 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1013 swtch->succ->probability = REG_BR_PROB_BASE - p;
1014 e = make_edge (swtch, preheader,
1015 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
1016 e->probability = p;
1017 }
1018
1019 if (extra_zero_check)
1020 {
1021 /* Add branch for zero iterations. */
1022 p = REG_BR_PROB_BASE / (max_unroll + 1);
1023 swtch = ezc_swtch;
1024 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1025 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1026 block_label (preheader), p, NULL_RTX);
1027
1028 swtch = loop_split_edge_with (swtch->succ, branch_code);
1029 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1030 swtch->succ->probability = REG_BR_PROB_BASE - p;
1031 e = make_edge (swtch, preheader,
1032 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
1033 e->probability = p;
1034 }
1035
1036 /* Recount dominators for outer blocks. */
1037 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1038
1039 /* And unroll loop. */
1040
1041 sbitmap_ones (wont_exit);
1042 RESET_BIT (wont_exit, may_exit_copy);
1043
1044 si_info_start_duplication (si_info);
1045 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1046 loops, max_unroll,
1047 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1048 DLTHE_FLAG_UPDATE_FREQ))
1049 abort ();
1050
1051 if (si_info)
1052 {
1053 split_ivs_in_copies (si_info, max_unroll, true, true);
1054 free_si_info (si_info);
1055 }
1056
1057 free (wont_exit);
1058
1059 if (exit_at_end)
1060 {
1061 basic_block exit_block = desc->in_edge->src->rbi->copy;
1062 /* Find a new in and out edge; they are in the last copy we have made. */
1063
1064 if (exit_block->succ->dest == desc->out_edge->dest)
1065 {
1066 desc->out_edge = exit_block->succ;
1067 desc->in_edge = exit_block->succ->succ_next;
1068 }
1069 else
1070 {
1071 desc->out_edge = exit_block->succ->succ_next;
1072 desc->in_edge = exit_block->succ;
1073 }
1074 }
1075
1076 /* Remove the edges. */
1077 for (i = 0; i < n_remove_edges; i++)
1078 remove_path (loops, remove_edges[i]);
1079 free (remove_edges);
1080
1081 /* We must be careful when updating the number of iterations due to
1082 preconditioning and the fact that the value must be valid at entry
1083 of the loop. After passing through the above code, we see that
1084 the correct new number of iterations is this: */
1085 gcc_assert (!desc->const_iter);
1086 desc->niter_expr =
1087 simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1088 desc->niter_max /= max_unroll + 1;
1089 if (exit_at_end)
1090 {
1091 desc->niter_expr =
1092 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1093 desc->noloop_assumptions = NULL_RTX;
1094 desc->niter_max--;
1095 }
1096
1097 if (dump_file)
1098 fprintf (dump_file,
1099 ";; Unrolled loop %d times, counting # of iterations "
1100 "in runtime, %i insns\n",
1101 max_unroll, num_loop_insns (loop));
1102 }
1103
1104 /* Decide whether to simply peel LOOP and how much. */
1105 static void
1106 decide_peel_simple (struct loop *loop, int flags)
1107 {
1108 unsigned npeel;
1109 struct niter_desc *desc;
1110
1111 if (!(flags & UAP_PEEL))
1112 {
1113 /* We were not asked to, just return back silently. */
1114 return;
1115 }
1116
1117 if (dump_file)
1118 fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1119
1120 /* npeel = number of iterations to peel. */
1121 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1122 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1123 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1124
1125 /* Skip big loops. */
1126 if (!npeel)
1127 {
1128 if (dump_file)
1129 fprintf (dump_file, ";; Not considering loop, is too big\n");
1130 return;
1131 }
1132
1133 /* Check for simple loops. */
1134 desc = get_simple_loop_desc (loop);
1135
1136 /* Check number of iterations. */
1137 if (desc->simple_p && !desc->assumptions && desc->const_iter)
1138 {
1139 if (dump_file)
1140 fprintf (dump_file, ";; Loop iterates constant times\n");
1141 return;
1142 }
1143
1144 /* Do not simply peel loops with branches inside -- it increases number
1145 of mispredicts. */
1146 if (num_loop_branches (loop) > 1)
1147 {
1148 if (dump_file)
1149 fprintf (dump_file, ";; Not peeling, contains branches\n");
1150 return;
1151 }
1152
1153 if (loop->header->count)
1154 {
1155 unsigned niter = expected_loop_iterations (loop);
1156 if (niter + 1 > npeel)
1157 {
1158 if (dump_file)
1159 {
1160 fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1161 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1162 (HOST_WIDEST_INT) (niter + 1));
1163 fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1164 npeel);
1165 }
1166 return;
1167 }
1168 npeel = niter + 1;
1169 }
1170 else
1171 {
1172 /* For now we have no good heuristics to decide whether loop peeling
1173 will be effective, so disable it. */
1174 if (dump_file)
1175 fprintf (dump_file,
1176 ";; Not peeling loop, no evidence it will be profitable\n");
1177 return;
1178 }
1179
1180 /* Success. */
1181 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1182 loop->lpt_decision.times = npeel;
1183
1184 if (dump_file)
1185 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1186 loop->lpt_decision.times);
1187 }
1188
1189 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1190 while (cond)
1191 body;
1192
1193 ==>
1194
1195 if (!cond) goto end;
1196 body;
1197 if (!cond) goto end;
1198 body;
1199 while (cond)
1200 body;
1201 end: ;
1202 */
1203 static void
1204 peel_loop_simple (struct loops *loops, struct loop *loop)
1205 {
1206 sbitmap wont_exit;
1207 unsigned npeel = loop->lpt_decision.times;
1208 struct niter_desc *desc = get_simple_loop_desc (loop);
1209 struct split_ivs_info *si_info = NULL;
1210
1211 if (flag_split_ivs_in_unroller && npeel > 1)
1212 si_info = analyze_ivs_to_split (loop);
1213
1214 wont_exit = sbitmap_alloc (npeel + 1);
1215 sbitmap_zero (wont_exit);
1216
1217 si_info_start_duplication (si_info);
1218 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1219 loops, npeel, wont_exit, NULL, NULL, NULL,
1220 DLTHE_FLAG_UPDATE_FREQ))
1221 abort ();
1222
1223 free (wont_exit);
1224
1225 if (si_info)
1226 {
1227 split_ivs_in_copies (si_info, npeel, false, false);
1228 free_si_info (si_info);
1229 }
1230
1231 if (desc->simple_p)
1232 {
1233 if (desc->const_iter)
1234 {
1235 desc->niter -= npeel;
1236 desc->niter_expr = GEN_INT (desc->niter);
1237 desc->noloop_assumptions = NULL_RTX;
1238 }
1239 else
1240 {
1241 /* We cannot just update niter_expr, as its value might be clobbered
1242 inside loop. We could handle this by counting the number into
1243 temporary just like we do in runtime unrolling, but it does not
1244 seem worthwhile. */
1245 free_simple_loop_desc (loop);
1246 }
1247 }
1248 if (dump_file)
1249 fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1250 }
1251
1252 /* Decide whether to unroll LOOP stupidly and how much. */
1253 static void
1254 decide_unroll_stupid (struct loop *loop, int flags)
1255 {
1256 unsigned nunroll, nunroll_by_av, i;
1257 struct niter_desc *desc;
1258
1259 if (!(flags & UAP_UNROLL_ALL))
1260 {
1261 /* We were not asked to, just return back silently. */
1262 return;
1263 }
1264
1265 if (dump_file)
1266 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1267
1268 /* nunroll = total number of copies of the original loop body in
1269 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1270 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1271 nunroll_by_av
1272 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1273 if (nunroll > nunroll_by_av)
1274 nunroll = nunroll_by_av;
1275 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1276 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1277
1278 /* Skip big loops. */
1279 if (nunroll <= 1)
1280 {
1281 if (dump_file)
1282 fprintf (dump_file, ";; Not considering loop, is too big\n");
1283 return;
1284 }
1285
1286 /* Check for simple loops. */
1287 desc = get_simple_loop_desc (loop);
1288
1289 /* Check simpleness. */
1290 if (desc->simple_p && !desc->assumptions)
1291 {
1292 if (dump_file)
1293 fprintf (dump_file, ";; The loop is simple\n");
1294 return;
1295 }
1296
1297 /* Do not unroll loops with branches inside -- it increases number
1298 of mispredicts. */
1299 if (num_loop_branches (loop) > 1)
1300 {
1301 if (dump_file)
1302 fprintf (dump_file, ";; Not unrolling, contains branches\n");
1303 return;
1304 }
1305
1306 /* If we have profile feedback, check whether the loop rolls. */
1307 if (loop->header->count
1308 && expected_loop_iterations (loop) < 2 * nunroll)
1309 {
1310 if (dump_file)
1311 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1312 return;
1313 }
1314
1315 /* Success. Now force nunroll to be power of 2, as it seems that this
1316 improves results (partially because of better alignments, partially
1317 because of some dark magic). */
1318 for (i = 1; 2 * i <= nunroll; i *= 2)
1319 continue;
1320
1321 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1322 loop->lpt_decision.times = i - 1;
1323
1324 if (dump_file)
1325 fprintf (dump_file,
1326 ";; Decided to unroll the loop stupidly, %d times.\n",
1327 loop->lpt_decision.times);
1328 }
1329
1330 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1331 while (cond)
1332 body;
1333
1334 ==>
1335
1336 while (cond)
1337 {
1338 body;
1339 if (!cond) break;
1340 body;
1341 if (!cond) break;
1342 body;
1343 if (!cond) break;
1344 body;
1345 }
1346 */
1347 static void
1348 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1349 {
1350 sbitmap wont_exit;
1351 unsigned nunroll = loop->lpt_decision.times;
1352 struct niter_desc *desc = get_simple_loop_desc (loop);
1353 struct split_ivs_info *si_info = NULL;
1354
1355 if (flag_split_ivs_in_unroller)
1356 si_info = analyze_ivs_to_split (loop);
1357
1358 wont_exit = sbitmap_alloc (nunroll + 1);
1359 sbitmap_zero (wont_exit);
1360
1361 si_info_start_duplication (si_info);
1362 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1363 loops, nunroll, wont_exit, NULL, NULL, NULL,
1364 DLTHE_FLAG_UPDATE_FREQ))
1365 abort ();
1366
1367 if (si_info)
1368 {
1369 split_ivs_in_copies (si_info, nunroll, true, true);
1370 free_si_info (si_info);
1371 }
1372
1373 free (wont_exit);
1374
1375 if (desc->simple_p)
1376 {
1377 /* We indeed may get here provided that there are nontrivial assumptions
1378 for a loop to be really simple. We could update the counts, but the
1379 problem is that we are unable to decide which exit will be taken
1380 (not really true in case the number of iterations is constant,
1381 but noone will do anything with this information, so we do not
1382 worry about it). */
1383 desc->simple_p = false;
1384 }
1385
1386 if (dump_file)
1387 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1388 nunroll, num_loop_insns (loop));
1389 }
1390
1391 /* A hash function for information about insns to split. */
1392
1393 static hashval_t
1394 si_info_hash (const void *ivts)
1395 {
1396 return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1397 }
1398
1399 /* An equality functions for information about insns to split. */
1400
1401 static int
1402 si_info_eq (const void *ivts1, const void *ivts2)
1403 {
1404 const struct iv_to_split *i1 = ivts1;
1405 const struct iv_to_split *i2 = ivts2;
1406
1407 return i1->insn == i2->insn;
1408 }
1409
1410 /* Determine whether there is an induction variable in INSN that
1411 we would like to split during unrolling. Return NULL if INSN
1412 contains no interesting IVs. Otherwise, allocate an IV_TO_SPLIT
1413 structure, fill it with the relevant information and return a
1414 pointer to it. */
1415
1416 static struct iv_to_split *
1417 analyze_iv_to_split_insn (rtx insn)
1418 {
1419 rtx set, dest;
1420 struct rtx_iv iv;
1421 struct iv_to_split *ivts;
1422
1423 /* For now we just split the basic induction variables. Later this may be
1424 extended for example by selecting also addresses of memory references. */
1425 set = single_set (insn);
1426 if (!set)
1427 return NULL;
1428
1429 dest = SET_DEST (set);
1430 if (!REG_P (dest))
1431 return NULL;
1432
1433 if (!biv_p (insn, dest))
1434 return NULL;
1435
1436 if (!iv_analyze (insn, dest, &iv))
1437 abort ();
1438
1439 if (iv.step == const0_rtx
1440 || iv.mode != iv.extend_mode)
1441 return NULL;
1442
1443 /* Record the insn to split. */
1444 ivts = xmalloc (sizeof (struct iv_to_split));
1445 ivts->insn = insn;
1446 ivts->base_var = NULL_RTX;
1447 ivts->step = iv.step;
1448 ivts->n_loc = 1;
1449 ivts->loc[0] = 1;
1450
1451 return ivts;
1452 }
1453
1454 /* Determines which of induction variables in LOOP to split.
1455 Return a SPLIT_IVS_INFO struct with the hash table filled
1456 with all insns to split IVs in. The FIRST_NEW_BLOCK field
1457 is undefined for the return value. */
1458
1459 static struct split_ivs_info *
1460 analyze_ivs_to_split (struct loop *loop)
1461 {
1462 basic_block *body, bb;
1463 unsigned i;
1464 struct split_ivs_info *si_info = xcalloc (1, sizeof (struct split_ivs_info));
1465 rtx insn;
1466 struct iv_to_split *ivts;
1467 PTR *slot;
1468
1469 si_info->insns_to_split = htab_create (5 * loop->num_nodes,
1470 si_info_hash, si_info_eq, free);
1471
1472 iv_analysis_loop_init (loop);
1473
1474 body = get_loop_body (loop);
1475 for (i = 0; i < loop->num_nodes; i++)
1476 {
1477 bb = body[i];
1478 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1479 continue;
1480
1481 FOR_BB_INSNS (bb, insn)
1482 {
1483 if (!INSN_P (insn))
1484 continue;
1485
1486 ivts = analyze_iv_to_split_insn (insn);
1487
1488 if (!ivts)
1489 continue;
1490
1491 slot = htab_find_slot (si_info->insns_to_split, ivts, INSERT);
1492 *slot = ivts;
1493 }
1494 }
1495
1496 free (body);
1497
1498 return si_info;
1499 }
1500
1501 /* Called just before loop duplication. Records start of duplicated area
1502 to SI_INFO. */
1503
1504 static void
1505 si_info_start_duplication (struct split_ivs_info *si_info)
1506 {
1507 if (si_info)
1508 si_info->first_new_block = last_basic_block;
1509 }
1510
1511 /* Determine the number of iterations between initialization of the base
1512 variable and the current copy (N_COPY). N_COPIES is the total number
1513 of newly created copies. UNROLLING is true if we are unrolling
1514 (not peeling) the loop. */
1515
1516 static unsigned
1517 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1518 {
1519 if (unrolling)
1520 {
1521 /* If we are unrolling, initialization is done in the original loop
1522 body (number 0). */
1523 return n_copy;
1524 }
1525 else
1526 {
1527 /* If we are peeling, the copy in that the initialization occurs has
1528 number 1. The original loop (number 0) is the last. */
1529 if (n_copy)
1530 return n_copy - 1;
1531 else
1532 return n_copies;
1533 }
1534 }
1535
1536 /* Locate in EXPR the expression corresponding to the location recorded
1537 in IVTS, and return a pointer to the RTX for this location. */
1538
1539 static rtx *
1540 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1541 {
1542 unsigned i;
1543 rtx *ret = &expr;
1544
1545 for (i = 0; i < ivts->n_loc; i++)
1546 ret = &XEXP (*ret, ivts->loc[i]);
1547
1548 return ret;
1549 }
1550
1551 /* Allocate basic variable for the induction variable chain. Callback for
1552 htab_traverse. */
1553
1554 static int
1555 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1556 {
1557 struct iv_to_split *ivts = *slot;
1558 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1559
1560 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1561
1562 return 1;
1563 }
1564
1565 /* Insert initialization of basic variable of IVTS before INSN, taking
1566 the initial value from INSN. */
1567
1568 static void
1569 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1570 {
1571 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1572 rtx seq;
1573
1574 start_sequence ();
1575 expr = force_operand (expr, ivts->base_var);
1576 if (expr != ivts->base_var)
1577 emit_move_insn (ivts->base_var, expr);
1578 seq = get_insns ();
1579 end_sequence ();
1580
1581 emit_insn_before (seq, insn);
1582 }
1583
1584 /* Replace the use of induction variable described in IVTS in INSN
1585 by base variable + DELTA * step. */
1586
1587 static void
1588 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1589 {
1590 rtx expr, *loc, seq, incr, var;
1591 enum machine_mode mode = GET_MODE (ivts->base_var);
1592 rtx src, dest, set;
1593
1594 /* Construct base + DELTA * step. */
1595 if (!delta)
1596 expr = ivts->base_var;
1597 else
1598 {
1599 incr = simplify_gen_binary (MULT, mode,
1600 ivts->step, gen_int_mode (delta, mode));
1601 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1602 ivts->base_var, incr);
1603 }
1604
1605 /* Figure out where to do the replacement. */
1606 loc = get_ivts_expr (single_set (insn), ivts);
1607
1608 /* If we can make the replacement right away, we're done. */
1609 if (validate_change (insn, loc, expr, 0))
1610 return;
1611
1612 /* Otherwise, force EXPR into a register and try again. */
1613 start_sequence ();
1614 var = gen_reg_rtx (mode);
1615 expr = force_operand (expr, var);
1616 if (expr != var)
1617 emit_move_insn (var, expr);
1618 seq = get_insns ();
1619 end_sequence ();
1620 emit_insn_before (seq, insn);
1621
1622 if (validate_change (insn, loc, var, 0))
1623 return;
1624
1625 /* The last chance. Try recreating the assignment in insn
1626 completely from scratch. */
1627 set = single_set (insn);
1628 gcc_assert (set);
1629
1630 start_sequence ();
1631 *loc = var;
1632 src = copy_rtx (SET_SRC (set));
1633 dest = copy_rtx (SET_DEST (set));
1634 src = force_operand (src, dest);
1635 if (src != dest)
1636 emit_move_insn (dest, src);
1637 seq = get_insns ();
1638 end_sequence ();
1639
1640 emit_insn_before (seq, insn);
1641 delete_insn (insn);
1642 }
1643
1644 /* Splits induction variables (that are marked in SI_INFO) in copies of loop.
1645 I.e. replace
1646
1647 i = i + 1;
1648 ...
1649 i = i + 1;
1650 ...
1651 i = i + 1;
1652 ...
1653
1654 type chains by
1655
1656 i0 = i + 1
1657 ...
1658 i = i0 + 1
1659 ...
1660 i = i0 + 2
1661 ...
1662
1663 UNROLLING is true if we unrolled (not peeled) the loop.
1664 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1665 the loop (as it should happen in complete unrolling, but not in ordinary
1666 peeling of the loop). */
1667
1668 static void
1669 split_ivs_in_copies (struct split_ivs_info *si_info, unsigned n_copies,
1670 bool unrolling, bool rewrite_original_loop)
1671 {
1672 unsigned i, delta;
1673 basic_block bb, orig_bb;
1674 rtx insn, orig_insn, next;
1675 struct iv_to_split ivts_templ, *ivts;
1676
1677 /* Sanity check -- we need to put initialization in the original loop
1678 body. */
1679 gcc_assert (!unrolling || rewrite_original_loop);
1680
1681 /* Allocate the basic variables (i0). */
1682 htab_traverse (si_info->insns_to_split, allocate_basic_variable, NULL);
1683
1684 for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
1685 {
1686 bb = BASIC_BLOCK (i);
1687 orig_bb = bb->rbi->original;
1688
1689 delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
1690 unrolling);
1691 orig_insn = BB_HEAD (orig_bb);
1692 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
1693 {
1694 next = NEXT_INSN (insn);
1695 if (!INSN_P (insn))
1696 continue;
1697
1698 while (!INSN_P (orig_insn))
1699 orig_insn = NEXT_INSN (orig_insn);
1700
1701 ivts_templ.insn = orig_insn;
1702 ivts = htab_find (si_info->insns_to_split, &ivts_templ);
1703 if (ivts)
1704 {
1705
1706 #ifdef ENABLE_CHECKING
1707 if (!rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)))
1708 abort ();
1709 #endif
1710
1711 if (!delta)
1712 insert_base_initialization (ivts, insn);
1713 split_iv (ivts, insn, delta);
1714 }
1715 orig_insn = NEXT_INSN (orig_insn);
1716 }
1717 }
1718
1719 if (!rewrite_original_loop)
1720 return;
1721
1722 /* Rewrite also the original loop body. Find them as originals of the blocks
1723 in the last copied iteration, i.e. those that have
1724 bb->rbi->original->copy == bb. */
1725 for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
1726 {
1727 bb = BASIC_BLOCK (i);
1728 orig_bb = bb->rbi->original;
1729 if (orig_bb->rbi->copy != bb)
1730 continue;
1731
1732 delta = determine_split_iv_delta (0, n_copies, unrolling);
1733 for (orig_insn = BB_HEAD (orig_bb);
1734 orig_insn != NEXT_INSN (BB_END (bb));
1735 orig_insn = next)
1736 {
1737 next = NEXT_INSN (orig_insn);
1738
1739 if (!INSN_P (orig_insn))
1740 continue;
1741
1742 ivts_templ.insn = orig_insn;
1743 ivts = htab_find (si_info->insns_to_split, &ivts_templ);
1744 if (!ivts)
1745 continue;
1746
1747 if (!delta)
1748 insert_base_initialization (ivts, orig_insn);
1749 split_iv (ivts, orig_insn, delta);
1750 }
1751 }
1752 }
1753
1754 /* Release SI_INFO. */
1755
1756 static void
1757 free_si_info (struct split_ivs_info *si_info)
1758 {
1759 htab_delete (si_info->insns_to_split);
1760 free (si_info);
1761 }