(unroll_loop): Don't move reg if used in copy_end and that is a
[gcc.git] / gcc / unroll.c
1 /* Try to unroll loops, and split induction variables.
2 Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21 /* Try to unroll a loop, and split induction variables.
22
23 Loops for which the number of iterations can be calculated exactly are
24 handled specially. If the number of iterations times the insn_count is
25 less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
26 Otherwise, we try to unroll the loop a number of times modulo the number
27 of iterations, so that only one exit test will be needed. It is unrolled
28 a number of times approximately equal to MAX_UNROLLED_INSNS divided by
29 the insn count.
30
31 Otherwise, if the number of iterations can be calculated exactly at
32 run time, and the loop is always entered at the top, then we try to
33 precondition the loop. That is, at run time, calculate how many times
34 the loop will execute, and then execute the loop body a few times so
35 that the remaining iterations will be some multiple of 4 (or 2 if the
36 loop is large). Then fall through to a loop unrolled 4 (or 2) times,
37 with only one exit test needed at the end of the loop.
38
39 Otherwise, if the number of iterations can not be calculated exactly,
40 not even at run time, then we still unroll the loop a number of times
41 approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
42 but there must be an exit test after each copy of the loop body.
43
44 For each induction variable, which is dead outside the loop (replaceable)
45 or for which we can easily calculate the final value, if we can easily
46 calculate its value at each place where it is set as a function of the
47 current loop unroll count and the variable's value at loop entry, then
48 the induction variable is split into `N' different variables, one for
49 each copy of the loop body. One variable is live across the backward
50 branch, and the others are all calculated as a function of this variable.
51 This helps eliminate data dependencies, and leads to further opportunities
52 for cse. */
53
54 /* Possible improvements follow: */
55
56 /* ??? Add an extra pass somewhere to determine whether unrolling will
57 give any benefit. E.g. after generating all unrolled insns, compute the
58 cost of all insns and compare against cost of insns in rolled loop.
59
60 - On traditional architectures, unrolling a non-constant bound loop
61 is a win if there is a giv whose only use is in memory addresses, the
62 memory addresses can be split, and hence giv increments can be
63 eliminated.
64 - It is also a win if the loop is executed many times, and preconditioning
65 can be performed for the loop.
66 Add code to check for these and similar cases. */
67
68 /* ??? Improve control of which loops get unrolled. Could use profiling
69 info to only unroll the most commonly executed loops. Perhaps have
70 a user specifyable option to control the amount of code expansion,
71 or the percent of loops to consider for unrolling. Etc. */
72
73 /* ??? Look at the register copies inside the loop to see if they form a
74 simple permutation. If so, iterate the permutation until it gets back to
75 the start state. This is how many times we should unroll the loop, for
76 best results, because then all register copies can be eliminated.
77 For example, the lisp nreverse function should be unrolled 3 times
78 while (this)
79 {
80 next = this->cdr;
81 this->cdr = prev;
82 prev = this;
83 this = next;
84 }
85
86 ??? The number of times to unroll the loop may also be based on data
87 references in the loop. For example, if we have a loop that references
88 x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */
89
90 /* ??? Add some simple linear equation solving capability so that we can
91 determine the number of loop iterations for more complex loops.
92 For example, consider this loop from gdb
93 #define SWAP_TARGET_AND_HOST(buffer,len)
94 {
95 char tmp;
96 char *p = (char *) buffer;
97 char *q = ((char *) buffer) + len - 1;
98 int iterations = (len + 1) >> 1;
99 int i;
100 for (p; p < q; p++, q--;)
101 {
102 tmp = *q;
103 *q = *p;
104 *p = tmp;
105 }
106 }
107 Note that:
108 start value = p = &buffer + current_iteration
109 end value = q = &buffer + len - 1 - current_iteration
110 Given the loop exit test of "p < q", then there must be "q - p" iterations,
111 set equal to zero and solve for number of iterations:
112 q - p = len - 1 - 2*current_iteration = 0
113 current_iteration = (len - 1) / 2
114 Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
115 iterations of this loop. */
116
117 /* ??? Currently, no labels are marked as loop invariant when doing loop
118 unrolling. This is because an insn inside the loop, that loads the address
119 of a label inside the loop into a register, could be moved outside the loop
120 by the invariant code motion pass if labels were invariant. If the loop
121 is subsequently unrolled, the code will be wrong because each unrolled
122 body of the loop will use the same address, whereas each actually needs a
123 different address. A case where this happens is when a loop containing
124 a switch statement is unrolled.
125
126 It would be better to let labels be considered invariant. When we
127 unroll loops here, check to see if any insns using a label local to the
128 loop were moved before the loop. If so, then correct the problem, by
129 moving the insn back into the loop, or perhaps replicate the insn before
130 the loop, one copy for each time the loop is unrolled. */
131
132 /* The prime factors looked for when trying to unroll a loop by some
133 number which is modulo the total number of iterations. Just checking
134 for these 4 prime factors will find at least one factor for 75% of
135 all numbers theoretically. Practically speaking, this will succeed
136 almost all of the time since loops are generally a multiple of 2
137 and/or 5. */
138
139 #define NUM_FACTORS 4
140
141 struct _factor { int factor, count; } factors[NUM_FACTORS]
142 = { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
143
144 /* Describes the different types of loop unrolling performed. */
145
146 enum unroll_types { UNROLL_COMPLETELY, UNROLL_MODULO, UNROLL_NAIVE };
147
148 #include "config.h"
149 #include "rtl.h"
150 #include "insn-config.h"
151 #include "integrate.h"
152 #include "regs.h"
153 #include "flags.h"
154 #include "expr.h"
155 #include <stdio.h>
156 #include "loop.h"
157
158 /* This controls which loops are unrolled, and by how much we unroll
159 them. */
160
161 #ifndef MAX_UNROLLED_INSNS
162 #define MAX_UNROLLED_INSNS 100
163 #endif
164
165 /* Indexed by register number, if non-zero, then it contains a pointer
166 to a struct induction for a DEST_REG giv which has been combined with
167 one of more address givs. This is needed because whenever such a DEST_REG
168 giv is modified, we must modify the value of all split address givs
169 that were combined with this DEST_REG giv. */
170
171 static struct induction **addr_combined_regs;
172
173 /* Indexed by register number, if this is a splittable induction variable,
174 then this will hold the current value of the register, which depends on the
175 iteration number. */
176
177 static rtx *splittable_regs;
178
179 /* Indexed by register number, if this is a splittable induction variable,
180 then this will hold the number of instructions in the loop that modify
181 the induction variable. Used to ensure that only the last insn modifying
182 a split iv will update the original iv of the dest. */
183
184 static int *splittable_regs_updates;
185
186 /* Values describing the current loop's iteration variable. These are set up
187 by loop_iterations, and used by precondition_loop_p. */
188
189 static rtx loop_iteration_var;
190 static rtx loop_initial_value;
191 static rtx loop_increment;
192 static rtx loop_final_value;
193
194 /* Forward declarations. */
195
196 static void init_reg_map PROTO((struct inline_remap *, int));
197 static int precondition_loop_p PROTO((rtx *, rtx *, rtx *, rtx, rtx));
198 static rtx calculate_giv_inc PROTO((rtx, rtx, int));
199 static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *));
200 static void final_reg_note_copy PROTO((rtx, struct inline_remap *));
201 static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int,
202 enum unroll_types, rtx, rtx, rtx, rtx));
203 static int back_branch_in_range_p PROTO((rtx, rtx, rtx));
204 static void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
205 static rtx approx_final_value PROTO((enum rtx_code, rtx, int *, int *));
206 static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int));
207 static int find_splittable_givs PROTO((struct iv_class *,enum unroll_types,
208 rtx, rtx, rtx, int));
209 static int reg_dead_after_loop PROTO((rtx, rtx, rtx));
210 static rtx fold_rtx_mult_add PROTO((rtx, rtx, rtx, enum machine_mode));
211 static rtx remap_split_bivs PROTO((rtx));
212
213 /* Try to unroll one loop and split induction variables in the loop.
214
215 The loop is described by the arguments LOOP_END, INSN_COUNT, and
216 LOOP_START. END_INSERT_BEFORE indicates where insns should be added
217 which need to be executed when the loop falls through. STRENGTH_REDUCTION_P
218 indicates whether information generated in the strength reduction pass
219 is available.
220
221 This function is intended to be called from within `strength_reduce'
222 in loop.c. */
223
224 void
225 unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
226 strength_reduce_p)
227 rtx loop_end;
228 int insn_count;
229 rtx loop_start;
230 rtx end_insert_before;
231 int strength_reduce_p;
232 {
233 int i, j, temp;
234 int unroll_number = 1;
235 rtx copy_start, copy_end;
236 rtx insn, copy, sequence, pattern, tem;
237 int max_labelno, max_insnno;
238 rtx insert_before;
239 struct inline_remap *map;
240 char *local_label;
241 char *local_regno;
242 int maxregnum;
243 int new_maxregnum;
244 rtx exit_label = 0;
245 rtx start_label;
246 struct iv_class *bl;
247 int splitting_not_safe = 0;
248 enum unroll_types unroll_type;
249 int loop_preconditioned = 0;
250 rtx safety_label;
251 /* This points to the last real insn in the loop, which should be either
252 a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
253 jumps). */
254 rtx last_loop_insn;
255
256 /* Don't bother unrolling huge loops. Since the minimum factor is
257 two, loops greater than one half of MAX_UNROLLED_INSNS will never
258 be unrolled. */
259 if (insn_count > MAX_UNROLLED_INSNS / 2)
260 {
261 if (loop_dump_stream)
262 fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
263 return;
264 }
265
266 /* When emitting debugger info, we can't unroll loops with unequal numbers
267 of block_beg and block_end notes, because that would unbalance the block
268 structure of the function. This can happen as a result of the
269 "if (foo) bar; else break;" optimization in jump.c. */
270
271 if (write_symbols != NO_DEBUG)
272 {
273 int block_begins = 0;
274 int block_ends = 0;
275
276 for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn))
277 {
278 if (GET_CODE (insn) == NOTE)
279 {
280 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
281 block_begins++;
282 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
283 block_ends++;
284 }
285 }
286
287 if (block_begins != block_ends)
288 {
289 if (loop_dump_stream)
290 fprintf (loop_dump_stream,
291 "Unrolling failure: Unbalanced block notes.\n");
292 return;
293 }
294 }
295
296 /* Determine type of unroll to perform. Depends on the number of iterations
297 and the size of the loop. */
298
299 /* If there is no strength reduce info, then set loop_n_iterations to zero.
300 This can happen if strength_reduce can't find any bivs in the loop.
301 A value of zero indicates that the number of iterations could not be
302 calculated. */
303
304 if (! strength_reduce_p)
305 loop_n_iterations = 0;
306
307 if (loop_dump_stream && loop_n_iterations > 0)
308 fprintf (loop_dump_stream,
309 "Loop unrolling: %d iterations.\n", loop_n_iterations);
310
311 /* Find and save a pointer to the last nonnote insn in the loop. */
312
313 last_loop_insn = prev_nonnote_insn (loop_end);
314
315 /* Calculate how many times to unroll the loop. Indicate whether or
316 not the loop is being completely unrolled. */
317
318 if (loop_n_iterations == 1)
319 {
320 /* If number of iterations is exactly 1, then eliminate the compare and
321 branch at the end of the loop since they will never be taken.
322 Then return, since no other action is needed here. */
323
324 /* If the last instruction is not a BARRIER or a JUMP_INSN, then
325 don't do anything. */
326
327 if (GET_CODE (last_loop_insn) == BARRIER)
328 {
329 /* Delete the jump insn. This will delete the barrier also. */
330 delete_insn (PREV_INSN (last_loop_insn));
331 }
332 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
333 {
334 #ifdef HAVE_cc0
335 /* The immediately preceding insn is a compare which must be
336 deleted. */
337 delete_insn (last_loop_insn);
338 delete_insn (PREV_INSN (last_loop_insn));
339 #else
340 /* The immediately preceding insn may not be the compare, so don't
341 delete it. */
342 delete_insn (last_loop_insn);
343 #endif
344 }
345 return;
346 }
347 else if (loop_n_iterations > 0
348 && loop_n_iterations * insn_count < MAX_UNROLLED_INSNS)
349 {
350 unroll_number = loop_n_iterations;
351 unroll_type = UNROLL_COMPLETELY;
352 }
353 else if (loop_n_iterations > 0)
354 {
355 /* Try to factor the number of iterations. Don't bother with the
356 general case, only using 2, 3, 5, and 7 will get 75% of all
357 numbers theoretically, and almost all in practice. */
358
359 for (i = 0; i < NUM_FACTORS; i++)
360 factors[i].count = 0;
361
362 temp = loop_n_iterations;
363 for (i = NUM_FACTORS - 1; i >= 0; i--)
364 while (temp % factors[i].factor == 0)
365 {
366 factors[i].count++;
367 temp = temp / factors[i].factor;
368 }
369
370 /* Start with the larger factors first so that we generally
371 get lots of unrolling. */
372
373 unroll_number = 1;
374 temp = insn_count;
375 for (i = 3; i >= 0; i--)
376 while (factors[i].count--)
377 {
378 if (temp * factors[i].factor < MAX_UNROLLED_INSNS)
379 {
380 unroll_number *= factors[i].factor;
381 temp *= factors[i].factor;
382 }
383 else
384 break;
385 }
386
387 /* If we couldn't find any factors, then unroll as in the normal
388 case. */
389 if (unroll_number == 1)
390 {
391 if (loop_dump_stream)
392 fprintf (loop_dump_stream,
393 "Loop unrolling: No factors found.\n");
394 }
395 else
396 unroll_type = UNROLL_MODULO;
397 }
398
399
400 /* Default case, calculate number of times to unroll loop based on its
401 size. */
402 if (unroll_number == 1)
403 {
404 if (8 * insn_count < MAX_UNROLLED_INSNS)
405 unroll_number = 8;
406 else if (4 * insn_count < MAX_UNROLLED_INSNS)
407 unroll_number = 4;
408 else
409 unroll_number = 2;
410
411 unroll_type = UNROLL_NAIVE;
412 }
413
414 /* Now we know how many times to unroll the loop. */
415
416 if (loop_dump_stream)
417 fprintf (loop_dump_stream,
418 "Unrolling loop %d times.\n", unroll_number);
419
420
421 if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO)
422 {
423 /* Loops of these types should never start with a jump down to
424 the exit condition test. For now, check for this case just to
425 be sure. UNROLL_NAIVE loops can be of this form, this case is
426 handled below. */
427 insn = loop_start;
428 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
429 insn = NEXT_INSN (insn);
430 if (GET_CODE (insn) == JUMP_INSN)
431 abort ();
432 }
433
434 if (unroll_type == UNROLL_COMPLETELY)
435 {
436 /* Completely unrolling the loop: Delete the compare and branch at
437 the end (the last two instructions). This delete must done at the
438 very end of loop unrolling, to avoid problems with calls to
439 back_branch_in_range_p, which is called by find_splittable_regs.
440 All increments of splittable bivs/givs are changed to load constant
441 instructions. */
442
443 copy_start = loop_start;
444
445 /* Set insert_before to the instruction immediately after the JUMP_INSN
446 (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
447 the loop will be correctly handled by copy_loop_body. */
448 insert_before = NEXT_INSN (last_loop_insn);
449
450 /* Set copy_end to the insn before the jump at the end of the loop. */
451 if (GET_CODE (last_loop_insn) == BARRIER)
452 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
453 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
454 {
455 #ifdef HAVE_cc0
456 /* The instruction immediately before the JUMP_INSN is a compare
457 instruction which we do not want to copy. */
458 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
459 #else
460 /* The instruction immediately before the JUMP_INSN may not be the
461 compare, so we must copy it. */
462 copy_end = PREV_INSN (last_loop_insn);
463 #endif
464 }
465 else
466 {
467 /* We currently can't unroll a loop if it doesn't end with a
468 JUMP_INSN. There would need to be a mechanism that recognizes
469 this case, and then inserts a jump after each loop body, which
470 jumps to after the last loop body. */
471 if (loop_dump_stream)
472 fprintf (loop_dump_stream,
473 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
474 return;
475 }
476 }
477 else if (unroll_type == UNROLL_MODULO)
478 {
479 /* Partially unrolling the loop: The compare and branch at the end
480 (the last two instructions) must remain. Don't copy the compare
481 and branch instructions at the end of the loop. Insert the unrolled
482 code immediately before the compare/branch at the end so that the
483 code will fall through to them as before. */
484
485 copy_start = loop_start;
486
487 /* Set insert_before to the jump insn at the end of the loop.
488 Set copy_end to before the jump insn at the end of the loop. */
489 if (GET_CODE (last_loop_insn) == BARRIER)
490 {
491 insert_before = PREV_INSN (last_loop_insn);
492 copy_end = PREV_INSN (insert_before);
493 }
494 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
495 {
496 #ifdef HAVE_cc0
497 /* The instruction immediately before the JUMP_INSN is a compare
498 instruction which we do not want to copy or delete. */
499 insert_before = PREV_INSN (last_loop_insn);
500 copy_end = PREV_INSN (insert_before);
501 #else
502 /* The instruction immediately before the JUMP_INSN may not be the
503 compare, so we must copy it. */
504 insert_before = last_loop_insn;
505 copy_end = PREV_INSN (last_loop_insn);
506 #endif
507 }
508 else
509 {
510 /* We currently can't unroll a loop if it doesn't end with a
511 JUMP_INSN. There would need to be a mechanism that recognizes
512 this case, and then inserts a jump after each loop body, which
513 jumps to after the last loop body. */
514 if (loop_dump_stream)
515 fprintf (loop_dump_stream,
516 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
517 return;
518 }
519 }
520 else
521 {
522 /* Normal case: Must copy the compare and branch instructions at the
523 end of the loop. */
524
525 if (GET_CODE (last_loop_insn) == BARRIER)
526 {
527 /* Loop ends with an unconditional jump and a barrier.
528 Handle this like above, don't copy jump and barrier.
529 This is not strictly necessary, but doing so prevents generating
530 unconditional jumps to an immediately following label.
531
532 This will be corrected below if the target of this jump is
533 not the start_label. */
534
535 insert_before = PREV_INSN (last_loop_insn);
536 copy_end = PREV_INSN (insert_before);
537 }
538 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
539 {
540 /* Set insert_before to immediately after the JUMP_INSN, so that
541 NOTEs at the end of the loop will be correctly handled by
542 copy_loop_body. */
543 insert_before = NEXT_INSN (last_loop_insn);
544 copy_end = last_loop_insn;
545 }
546 else
547 {
548 /* We currently can't unroll a loop if it doesn't end with a
549 JUMP_INSN. There would need to be a mechanism that recognizes
550 this case, and then inserts a jump after each loop body, which
551 jumps to after the last loop body. */
552 if (loop_dump_stream)
553 fprintf (loop_dump_stream,
554 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
555 return;
556 }
557
558 /* If copying exit test branches because they can not be eliminated,
559 then must convert the fall through case of the branch to a jump past
560 the end of the loop. Create a label to emit after the loop and save
561 it for later use. Do not use the label after the loop, if any, since
562 it might be used by insns outside the loop, or there might be insns
563 added before it later by final_[bg]iv_value which must be after
564 the real exit label. */
565 exit_label = gen_label_rtx ();
566
567 insn = loop_start;
568 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
569 insn = NEXT_INSN (insn);
570
571 if (GET_CODE (insn) == JUMP_INSN)
572 {
573 /* The loop starts with a jump down to the exit condition test.
574 Start copying the loop after the barrier following this
575 jump insn. */
576 copy_start = NEXT_INSN (insn);
577
578 /* Splitting induction variables doesn't work when the loop is
579 entered via a jump to the bottom, because then we end up doing
580 a comparison against a new register for a split variable, but
581 we did not execute the set insn for the new register because
582 it was skipped over. */
583 splitting_not_safe = 1;
584 if (loop_dump_stream)
585 fprintf (loop_dump_stream,
586 "Splitting not safe, because loop not entered at top.\n");
587 }
588 else
589 copy_start = loop_start;
590 }
591
592 /* This should always be the first label in the loop. */
593 start_label = NEXT_INSN (copy_start);
594 /* There may be a line number note and/or a loop continue note here. */
595 while (GET_CODE (start_label) == NOTE)
596 start_label = NEXT_INSN (start_label);
597 if (GET_CODE (start_label) != CODE_LABEL)
598 {
599 /* This can happen as a result of jump threading. If the first insns in
600 the loop test the same condition as the loop's backward jump, or the
601 opposite condition, then the backward jump will be modified to point
602 to elsewhere, and the loop's start label is deleted.
603
604 This case currently can not be handled by the loop unrolling code. */
605
606 if (loop_dump_stream)
607 fprintf (loop_dump_stream,
608 "Unrolling failure: unknown insns between BEG note and loop label.\n");
609 return;
610 }
611 if (LABEL_NAME (start_label))
612 {
613 /* The jump optimization pass must have combined the original start label
614 with a named label for a goto. We can't unroll this case because
615 jumps which go to the named label must be handled differently than
616 jumps to the loop start, and it is impossible to differentiate them
617 in this case. */
618 if (loop_dump_stream)
619 fprintf (loop_dump_stream,
620 "Unrolling failure: loop start label is gone\n");
621 return;
622 }
623
624 if (unroll_type == UNROLL_NAIVE
625 && GET_CODE (last_loop_insn) == BARRIER
626 && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
627 {
628 /* In this case, we must copy the jump and barrier, because they will
629 not be converted to jumps to an immediately following label. */
630
631 insert_before = NEXT_INSN (last_loop_insn);
632 copy_end = last_loop_insn;
633 }
634
635 /* Allocate a translation table for the labels and insn numbers.
636 They will be filled in as we copy the insns in the loop. */
637
638 max_labelno = max_label_num ();
639 max_insnno = get_max_uid ();
640
641 map = (struct inline_remap *) alloca (sizeof (struct inline_remap));
642
643 map->integrating = 0;
644
645 /* Allocate the label map. */
646
647 if (max_labelno > 0)
648 {
649 map->label_map = (rtx *) alloca (max_labelno * sizeof (rtx));
650
651 local_label = (char *) alloca (max_labelno);
652 bzero (local_label, max_labelno);
653 }
654 else
655 map->label_map = 0;
656
657 /* Search the loop and mark all local labels, i.e. the ones which have to
658 be distinct labels when copied. For all labels which might be
659 non-local, set their label_map entries to point to themselves.
660 If they happen to be local their label_map entries will be overwritten
661 before the loop body is copied. The label_map entries for local labels
662 will be set to a different value each time the loop body is copied. */
663
664 for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
665 {
666 if (GET_CODE (insn) == CODE_LABEL)
667 local_label[CODE_LABEL_NUMBER (insn)] = 1;
668 else if (GET_CODE (insn) == JUMP_INSN)
669 {
670 if (JUMP_LABEL (insn))
671 map->label_map[CODE_LABEL_NUMBER (JUMP_LABEL (insn))]
672 = JUMP_LABEL (insn);
673 else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
674 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
675 {
676 rtx pat = PATTERN (insn);
677 int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
678 int len = XVECLEN (pat, diff_vec_p);
679 rtx label;
680
681 for (i = 0; i < len; i++)
682 {
683 label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
684 map->label_map[CODE_LABEL_NUMBER (label)] = label;
685 }
686 }
687 }
688 }
689
690 /* Allocate space for the insn map. */
691
692 map->insn_map = (rtx *) alloca (max_insnno * sizeof (rtx));
693
694 /* Set this to zero, to indicate that we are doing loop unrolling,
695 not function inlining. */
696 map->inline_target = 0;
697
698 /* The register and constant maps depend on the number of registers
699 present, so the final maps can't be created until after
700 find_splittable_regs is called. However, they are needed for
701 preconditioning, so we create temporary maps when preconditioning
702 is performed. */
703
704 /* The preconditioning code may allocate two new pseudo registers. */
705 maxregnum = max_reg_num ();
706
707 /* Allocate and zero out the splittable_regs and addr_combined_regs
708 arrays. These must be zeroed here because they will be used if
709 loop preconditioning is performed, and must be zero for that case.
710
711 It is safe to do this here, since the extra registers created by the
712 preconditioning code and find_splittable_regs will never be used
713 to access the splittable_regs[] and addr_combined_regs[] arrays. */
714
715 splittable_regs = (rtx *) alloca (maxregnum * sizeof (rtx));
716 bzero ((char *) splittable_regs, maxregnum * sizeof (rtx));
717 splittable_regs_updates = (int *) alloca (maxregnum * sizeof (int));
718 bzero ((char *) splittable_regs_updates, maxregnum * sizeof (int));
719 addr_combined_regs
720 = (struct induction **) alloca (maxregnum * sizeof (struct induction *));
721 bzero ((char *) addr_combined_regs, maxregnum * sizeof (struct induction *));
722 local_regno = (char *) alloca (maxregnum);
723 bzero (local_regno, maxregnum);
724
725 /* Mark all local registers, i.e. the ones which are referenced only
726 inside the loop. */
727 {
728 int copy_start_luid = INSN_LUID (copy_start);
729 int copy_end_luid = INSN_LUID (copy_end);
730
731 /* If a register is used in the jump insn, we must not duplicate it
732 since it will also be used outside the loop. */
733 if (GET_CODE (copy_end) == JUMP_INSN)
734 copy_end_luid--;
735
736 for (j = FIRST_PSEUDO_REGISTER; j < maxregnum; ++j)
737 if (regno_first_uid[j] > 0 && regno_first_uid[j] <= max_uid_for_loop
738 && uid_luid[regno_first_uid[j]] >= copy_start_luid
739 && regno_last_uid[j] > 0 && regno_last_uid[j] <= max_uid_for_loop
740 && uid_luid[regno_last_uid[j]] <= copy_end_luid)
741 local_regno[j] = 1;
742 }
743
744 /* If this loop requires exit tests when unrolled, check to see if we
745 can precondition the loop so as to make the exit tests unnecessary.
746 Just like variable splitting, this is not safe if the loop is entered
747 via a jump to the bottom. Also, can not do this if no strength
748 reduce info, because precondition_loop_p uses this info. */
749
750 /* Must copy the loop body for preconditioning before the following
751 find_splittable_regs call since that will emit insns which need to
752 be after the preconditioned loop copies, but immediately before the
753 unrolled loop copies. */
754
755 /* Also, it is not safe to split induction variables for the preconditioned
756 copies of the loop body. If we split induction variables, then the code
757 assumes that each induction variable can be represented as a function
758 of its initial value and the loop iteration number. This is not true
759 in this case, because the last preconditioned copy of the loop body
760 could be any iteration from the first up to the `unroll_number-1'th,
761 depending on the initial value of the iteration variable. Therefore
762 we can not split induction variables here, because we can not calculate
763 their value. Hence, this code must occur before find_splittable_regs
764 is called. */
765
766 if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
767 {
768 rtx initial_value, final_value, increment;
769
770 if (precondition_loop_p (&initial_value, &final_value, &increment,
771 loop_start, loop_end))
772 {
773 register rtx diff, temp;
774 enum machine_mode mode;
775 rtx *labels;
776 int abs_inc, neg_inc;
777
778 map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
779
780 map->const_equiv_map = (rtx *) alloca (maxregnum * sizeof (rtx));
781 map->const_age_map = (unsigned *) alloca (maxregnum
782 * sizeof (unsigned));
783 map->const_equiv_map_size = maxregnum;
784 global_const_equiv_map = map->const_equiv_map;
785 global_const_equiv_map_size = maxregnum;
786
787 init_reg_map (map, maxregnum);
788
789 /* Limit loop unrolling to 4, since this will make 7 copies of
790 the loop body. */
791 if (unroll_number > 4)
792 unroll_number = 4;
793
794 /* Save the absolute value of the increment, and also whether or
795 not it is negative. */
796 neg_inc = 0;
797 abs_inc = INTVAL (increment);
798 if (abs_inc < 0)
799 {
800 abs_inc = - abs_inc;
801 neg_inc = 1;
802 }
803
804 start_sequence ();
805
806 /* Decide what mode to do these calculations in. Choose the larger
807 of final_value's mode and initial_value's mode, or a full-word if
808 both are constants. */
809 mode = GET_MODE (final_value);
810 if (mode == VOIDmode)
811 {
812 mode = GET_MODE (initial_value);
813 if (mode == VOIDmode)
814 mode = word_mode;
815 }
816 else if (mode != GET_MODE (initial_value)
817 && (GET_MODE_SIZE (mode)
818 < GET_MODE_SIZE (GET_MODE (initial_value))))
819 mode = GET_MODE (initial_value);
820
821 /* Calculate the difference between the final and initial values.
822 Final value may be a (plus (reg x) (const_int 1)) rtx.
823 Let the following cse pass simplify this if initial value is
824 a constant.
825
826 We must copy the final and initial values here to avoid
827 improperly shared rtl. */
828
829 diff = expand_binop (mode, sub_optab, copy_rtx (final_value),
830 copy_rtx (initial_value), NULL_RTX, 0,
831 OPTAB_LIB_WIDEN);
832
833 /* Now calculate (diff % (unroll * abs (increment))) by using an
834 and instruction. */
835 diff = expand_binop (GET_MODE (diff), and_optab, diff,
836 GEN_INT (unroll_number * abs_inc - 1),
837 NULL_RTX, 0, OPTAB_LIB_WIDEN);
838
839 /* Now emit a sequence of branches to jump to the proper precond
840 loop entry point. */
841
842 labels = (rtx *) alloca (sizeof (rtx) * unroll_number);
843 for (i = 0; i < unroll_number; i++)
844 labels[i] = gen_label_rtx ();
845
846 /* Assuming the unroll_number is 4, and the increment is 2, then
847 for a negative increment: for a positive increment:
848 diff = 0,1 precond 0 diff = 0,7 precond 0
849 diff = 2,3 precond 3 diff = 1,2 precond 1
850 diff = 4,5 precond 2 diff = 3,4 precond 2
851 diff = 6,7 precond 1 diff = 5,6 precond 3 */
852
853 /* We only need to emit (unroll_number - 1) branches here, the
854 last case just falls through to the following code. */
855
856 /* ??? This would give better code if we emitted a tree of branches
857 instead of the current linear list of branches. */
858
859 for (i = 0; i < unroll_number - 1; i++)
860 {
861 int cmp_const;
862
863 /* For negative increments, must invert the constant compared
864 against, except when comparing against zero. */
865 if (i == 0)
866 cmp_const = 0;
867 else if (neg_inc)
868 cmp_const = unroll_number - i;
869 else
870 cmp_const = i;
871
872 emit_cmp_insn (diff, GEN_INT (abs_inc * cmp_const),
873 EQ, NULL_RTX, mode, 0, 0);
874
875 if (i == 0)
876 emit_jump_insn (gen_beq (labels[i]));
877 else if (neg_inc)
878 emit_jump_insn (gen_bge (labels[i]));
879 else
880 emit_jump_insn (gen_ble (labels[i]));
881 JUMP_LABEL (get_last_insn ()) = labels[i];
882 LABEL_NUSES (labels[i])++;
883 }
884
885 /* If the increment is greater than one, then we need another branch,
886 to handle other cases equivalent to 0. */
887
888 /* ??? This should be merged into the code above somehow to help
889 simplify the code here, and reduce the number of branches emitted.
890 For the negative increment case, the branch here could easily
891 be merged with the `0' case branch above. For the positive
892 increment case, it is not clear how this can be simplified. */
893
894 if (abs_inc != 1)
895 {
896 int cmp_const;
897
898 if (neg_inc)
899 cmp_const = abs_inc - 1;
900 else
901 cmp_const = abs_inc * (unroll_number - 1) + 1;
902
903 emit_cmp_insn (diff, GEN_INT (cmp_const), EQ, NULL_RTX,
904 mode, 0, 0);
905
906 if (neg_inc)
907 emit_jump_insn (gen_ble (labels[0]));
908 else
909 emit_jump_insn (gen_bge (labels[0]));
910 JUMP_LABEL (get_last_insn ()) = labels[0];
911 LABEL_NUSES (labels[0])++;
912 }
913
914 sequence = gen_sequence ();
915 end_sequence ();
916 emit_insn_before (sequence, loop_start);
917
918 /* Only the last copy of the loop body here needs the exit
919 test, so set copy_end to exclude the compare/branch here,
920 and then reset it inside the loop when get to the last
921 copy. */
922
923 if (GET_CODE (last_loop_insn) == BARRIER)
924 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
925 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
926 {
927 #ifdef HAVE_cc0
928 /* The immediately preceding insn is a compare which we do not
929 want to copy. */
930 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
931 #else
932 /* The immediately preceding insn may not be a compare, so we
933 must copy it. */
934 copy_end = PREV_INSN (last_loop_insn);
935 #endif
936 }
937 else
938 abort ();
939
940 for (i = 1; i < unroll_number; i++)
941 {
942 emit_label_after (labels[unroll_number - i],
943 PREV_INSN (loop_start));
944
945 bzero ((char *) map->insn_map, max_insnno * sizeof (rtx));
946 bzero ((char *) map->const_equiv_map, maxregnum * sizeof (rtx));
947 bzero ((char *) map->const_age_map,
948 maxregnum * sizeof (unsigned));
949 map->const_age = 0;
950
951 for (j = 0; j < max_labelno; j++)
952 if (local_label[j])
953 map->label_map[j] = gen_label_rtx ();
954
955 for (j = FIRST_PSEUDO_REGISTER; j < maxregnum; j++)
956 if (local_regno[j])
957 map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
958
959 /* The last copy needs the compare/branch insns at the end,
960 so reset copy_end here if the loop ends with a conditional
961 branch. */
962
963 if (i == unroll_number - 1)
964 {
965 if (GET_CODE (last_loop_insn) == BARRIER)
966 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
967 else
968 copy_end = last_loop_insn;
969 }
970
971 /* None of the copies are the `last_iteration', so just
972 pass zero for that parameter. */
973 copy_loop_body (copy_start, copy_end, map, exit_label, 0,
974 unroll_type, start_label, loop_end,
975 loop_start, copy_end);
976 }
977 emit_label_after (labels[0], PREV_INSN (loop_start));
978
979 if (GET_CODE (last_loop_insn) == BARRIER)
980 {
981 insert_before = PREV_INSN (last_loop_insn);
982 copy_end = PREV_INSN (insert_before);
983 }
984 else
985 {
986 #ifdef HAVE_cc0
987 /* The immediately preceding insn is a compare which we do not
988 want to copy. */
989 insert_before = PREV_INSN (last_loop_insn);
990 copy_end = PREV_INSN (insert_before);
991 #else
992 /* The immediately preceding insn may not be a compare, so we
993 must copy it. */
994 insert_before = last_loop_insn;
995 copy_end = PREV_INSN (last_loop_insn);
996 #endif
997 }
998
999 /* Set unroll type to MODULO now. */
1000 unroll_type = UNROLL_MODULO;
1001 loop_preconditioned = 1;
1002 }
1003 }
1004
1005 /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
1006 the loop unless all loops are being unrolled. */
1007 if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
1008 {
1009 if (loop_dump_stream)
1010 fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n");
1011 return;
1012 }
1013
1014 /* At this point, we are guaranteed to unroll the loop. */
1015
1016 /* For each biv and giv, determine whether it can be safely split into
1017 a different variable for each unrolled copy of the loop body.
1018 We precalculate and save this info here, since computing it is
1019 expensive.
1020
1021 Do this before deleting any instructions from the loop, so that
1022 back_branch_in_range_p will work correctly. */
1023
1024 if (splitting_not_safe)
1025 temp = 0;
1026 else
1027 temp = find_splittable_regs (unroll_type, loop_start, loop_end,
1028 end_insert_before, unroll_number);
1029
1030 /* find_splittable_regs may have created some new registers, so must
1031 reallocate the reg_map with the new larger size, and must realloc
1032 the constant maps also. */
1033
1034 maxregnum = max_reg_num ();
1035 map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
1036
1037 init_reg_map (map, maxregnum);
1038
1039 /* Space is needed in some of the map for new registers, so new_maxregnum
1040 is an (over)estimate of how many registers will exist at the end. */
1041 new_maxregnum = maxregnum + (temp * unroll_number * 2);
1042
1043 /* Must realloc space for the constant maps, because the number of registers
1044 may have changed. */
1045
1046 map->const_equiv_map = (rtx *) alloca (new_maxregnum * sizeof (rtx));
1047 map->const_age_map = (unsigned *) alloca (new_maxregnum * sizeof (unsigned));
1048
1049 map->const_equiv_map_size = new_maxregnum;
1050 global_const_equiv_map = map->const_equiv_map;
1051 global_const_equiv_map_size = new_maxregnum;
1052
1053 /* Search the list of bivs and givs to find ones which need to be remapped
1054 when split, and set their reg_map entry appropriately. */
1055
1056 for (bl = loop_iv_list; bl; bl = bl->next)
1057 {
1058 if (REGNO (bl->biv->src_reg) != bl->regno)
1059 map->reg_map[bl->regno] = bl->biv->src_reg;
1060 #if 0
1061 /* Currently, non-reduced/final-value givs are never split. */
1062 for (v = bl->giv; v; v = v->next_iv)
1063 if (REGNO (v->src_reg) != bl->regno)
1064 map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
1065 #endif
1066 }
1067
1068 /* If the loop is being partially unrolled, and the iteration variables
1069 are being split, and are being renamed for the split, then must fix up
1070 the compare/jump instruction at the end of the loop to refer to the new
1071 registers. This compare isn't copied, so the registers used in it
1072 will never be replaced if it isn't done here. */
1073
1074 if (unroll_type == UNROLL_MODULO)
1075 {
1076 insn = NEXT_INSN (copy_end);
1077 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
1078 PATTERN (insn) = remap_split_bivs (PATTERN (insn));
1079 }
1080
1081 /* For unroll_number - 1 times, make a copy of each instruction
1082 between copy_start and copy_end, and insert these new instructions
1083 before the end of the loop. */
1084
1085 for (i = 0; i < unroll_number; i++)
1086 {
1087 bzero ((char *) map->insn_map, max_insnno * sizeof (rtx));
1088 bzero ((char *) map->const_equiv_map, new_maxregnum * sizeof (rtx));
1089 bzero ((char *) map->const_age_map, new_maxregnum * sizeof (unsigned));
1090 map->const_age = 0;
1091
1092 for (j = 0; j < max_labelno; j++)
1093 if (local_label[j])
1094 map->label_map[j] = gen_label_rtx ();
1095
1096 for (j = FIRST_PSEUDO_REGISTER; j < maxregnum; j++)
1097 if (local_regno[j])
1098 map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1099
1100 /* If loop starts with a branch to the test, then fix it so that
1101 it points to the test of the first unrolled copy of the loop. */
1102 if (i == 0 && loop_start != copy_start)
1103 {
1104 insn = PREV_INSN (copy_start);
1105 pattern = PATTERN (insn);
1106
1107 tem = map->label_map[CODE_LABEL_NUMBER
1108 (XEXP (SET_SRC (pattern), 0))];
1109 SET_SRC (pattern) = gen_rtx (LABEL_REF, VOIDmode, tem);
1110
1111 /* Set the jump label so that it can be used by later loop unrolling
1112 passes. */
1113 JUMP_LABEL (insn) = tem;
1114 LABEL_NUSES (tem)++;
1115 }
1116
1117 copy_loop_body (copy_start, copy_end, map, exit_label,
1118 i == unroll_number - 1, unroll_type, start_label,
1119 loop_end, insert_before, insert_before);
1120 }
1121
1122 /* Before deleting any insns, emit a CODE_LABEL immediately after the last
1123 insn to be deleted. This prevents any runaway delete_insn call from
1124 more insns that it should, as it always stops at a CODE_LABEL. */
1125
1126 /* Delete the compare and branch at the end of the loop if completely
1127 unrolling the loop. Deleting the backward branch at the end also
1128 deletes the code label at the start of the loop. This is done at
1129 the very end to avoid problems with back_branch_in_range_p. */
1130
1131 if (unroll_type == UNROLL_COMPLETELY)
1132 safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
1133 else
1134 safety_label = emit_label_after (gen_label_rtx (), copy_end);
1135
1136 /* Delete all of the original loop instructions. Don't delete the
1137 LOOP_BEG note, or the first code label in the loop. */
1138
1139 insn = NEXT_INSN (copy_start);
1140 while (insn != safety_label)
1141 {
1142 if (insn != start_label)
1143 insn = delete_insn (insn);
1144 else
1145 insn = NEXT_INSN (insn);
1146 }
1147
1148 /* Can now delete the 'safety' label emitted to protect us from runaway
1149 delete_insn calls. */
1150 if (INSN_DELETED_P (safety_label))
1151 abort ();
1152 delete_insn (safety_label);
1153
1154 /* If exit_label exists, emit it after the loop. Doing the emit here
1155 forces it to have a higher INSN_UID than any insn in the unrolled loop.
1156 This is needed so that mostly_true_jump in reorg.c will treat jumps
1157 to this loop end label correctly, i.e. predict that they are usually
1158 not taken. */
1159 if (exit_label)
1160 emit_label_after (exit_label, loop_end);
1161 }
1162 \f
1163 /* Return true if the loop can be safely, and profitably, preconditioned
1164 so that the unrolled copies of the loop body don't need exit tests.
1165
1166 This only works if final_value, initial_value and increment can be
1167 determined, and if increment is a constant power of 2.
1168 If increment is not a power of 2, then the preconditioning modulo
1169 operation would require a real modulo instead of a boolean AND, and this
1170 is not considered `profitable'. */
1171
1172 /* ??? If the loop is known to be executed very many times, or the machine
1173 has a very cheap divide instruction, then preconditioning is a win even
1174 when the increment is not a power of 2. Use RTX_COST to compute
1175 whether divide is cheap. */
1176
1177 static int
1178 precondition_loop_p (initial_value, final_value, increment, loop_start,
1179 loop_end)
1180 rtx *initial_value, *final_value, *increment;
1181 rtx loop_start, loop_end;
1182 {
1183
1184 if (loop_n_iterations > 0)
1185 {
1186 *initial_value = const0_rtx;
1187 *increment = const1_rtx;
1188 *final_value = GEN_INT (loop_n_iterations);
1189
1190 if (loop_dump_stream)
1191 fprintf (loop_dump_stream,
1192 "Preconditioning: Success, number of iterations known, %d.\n",
1193 loop_n_iterations);
1194 return 1;
1195 }
1196
1197 if (loop_initial_value == 0)
1198 {
1199 if (loop_dump_stream)
1200 fprintf (loop_dump_stream,
1201 "Preconditioning: Could not find initial value.\n");
1202 return 0;
1203 }
1204 else if (loop_increment == 0)
1205 {
1206 if (loop_dump_stream)
1207 fprintf (loop_dump_stream,
1208 "Preconditioning: Could not find increment value.\n");
1209 return 0;
1210 }
1211 else if (GET_CODE (loop_increment) != CONST_INT)
1212 {
1213 if (loop_dump_stream)
1214 fprintf (loop_dump_stream,
1215 "Preconditioning: Increment not a constant.\n");
1216 return 0;
1217 }
1218 else if ((exact_log2 (INTVAL (loop_increment)) < 0)
1219 && (exact_log2 (- INTVAL (loop_increment)) < 0))
1220 {
1221 if (loop_dump_stream)
1222 fprintf (loop_dump_stream,
1223 "Preconditioning: Increment not a constant power of 2.\n");
1224 return 0;
1225 }
1226
1227 /* Unsigned_compare and compare_dir can be ignored here, since they do
1228 not matter for preconditioning. */
1229
1230 if (loop_final_value == 0)
1231 {
1232 if (loop_dump_stream)
1233 fprintf (loop_dump_stream,
1234 "Preconditioning: EQ comparison loop.\n");
1235 return 0;
1236 }
1237
1238 /* Must ensure that final_value is invariant, so call invariant_p to
1239 check. Before doing so, must check regno against max_reg_before_loop
1240 to make sure that the register is in the range covered by invariant_p.
1241 If it isn't, then it is most likely a biv/giv which by definition are
1242 not invariant. */
1243 if ((GET_CODE (loop_final_value) == REG
1244 && REGNO (loop_final_value) >= max_reg_before_loop)
1245 || (GET_CODE (loop_final_value) == PLUS
1246 && REGNO (XEXP (loop_final_value, 0)) >= max_reg_before_loop)
1247 || ! invariant_p (loop_final_value))
1248 {
1249 if (loop_dump_stream)
1250 fprintf (loop_dump_stream,
1251 "Preconditioning: Final value not invariant.\n");
1252 return 0;
1253 }
1254
1255 /* Fail for floating point values, since the caller of this function
1256 does not have code to deal with them. */
1257 if (GET_MODE_CLASS (GET_MODE (loop_final_value)) == MODE_FLOAT
1258 || GET_MODE_CLASS (GET_MODE (loop_initial_value)) == MODE_FLOAT)
1259 {
1260 if (loop_dump_stream)
1261 fprintf (loop_dump_stream,
1262 "Preconditioning: Floating point final or initial value.\n");
1263 return 0;
1264 }
1265
1266 /* Now set initial_value to be the iteration_var, since that may be a
1267 simpler expression, and is guaranteed to be correct if all of the
1268 above tests succeed.
1269
1270 We can not use the initial_value as calculated, because it will be
1271 one too small for loops of the form "while (i-- > 0)". We can not
1272 emit code before the loop_skip_over insns to fix this problem as this
1273 will then give a number one too large for loops of the form
1274 "while (--i > 0)".
1275
1276 Note that all loops that reach here are entered at the top, because
1277 this function is not called if the loop starts with a jump. */
1278
1279 /* Fail if loop_iteration_var is not live before loop_start, since we need
1280 to test its value in the preconditioning code. */
1281
1282 if (uid_luid[regno_first_uid[REGNO (loop_iteration_var)]]
1283 > INSN_LUID (loop_start))
1284 {
1285 if (loop_dump_stream)
1286 fprintf (loop_dump_stream,
1287 "Preconditioning: Iteration var not live before loop start.\n");
1288 return 0;
1289 }
1290
1291 *initial_value = loop_iteration_var;
1292 *increment = loop_increment;
1293 *final_value = loop_final_value;
1294
1295 /* Success! */
1296 if (loop_dump_stream)
1297 fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
1298 return 1;
1299 }
1300
1301
1302 /* All pseudo-registers must be mapped to themselves. Two hard registers
1303 must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
1304 REGNUM, to avoid function-inlining specific conversions of these
1305 registers. All other hard regs can not be mapped because they may be
1306 used with different
1307 modes. */
1308
1309 static void
1310 init_reg_map (map, maxregnum)
1311 struct inline_remap *map;
1312 int maxregnum;
1313 {
1314 int i;
1315
1316 for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
1317 map->reg_map[i] = regno_reg_rtx[i];
1318 /* Just clear the rest of the entries. */
1319 for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
1320 map->reg_map[i] = 0;
1321
1322 map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
1323 = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
1324 map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
1325 = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
1326 }
1327 \f
1328 /* Strength-reduction will often emit code for optimized biv/givs which
1329 calculates their value in a temporary register, and then copies the result
1330 to the iv. This procedure reconstructs the pattern computing the iv;
1331 verifying that all operands are of the proper form.
1332
1333 The return value is the amount that the giv is incremented by. */
1334
1335 static rtx
1336 calculate_giv_inc (pattern, src_insn, regno)
1337 rtx pattern, src_insn;
1338 int regno;
1339 {
1340 rtx increment;
1341 rtx increment_total = 0;
1342 int tries = 0;
1343
1344 retry:
1345 /* Verify that we have an increment insn here. First check for a plus
1346 as the set source. */
1347 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1348 {
1349 /* SR sometimes computes the new giv value in a temp, then copies it
1350 to the new_reg. */
1351 src_insn = PREV_INSN (src_insn);
1352 pattern = PATTERN (src_insn);
1353 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1354 abort ();
1355
1356 /* The last insn emitted is not needed, so delete it to avoid confusing
1357 the second cse pass. This insn sets the giv unnecessarily. */
1358 delete_insn (get_last_insn ());
1359 }
1360
1361 /* Verify that we have a constant as the second operand of the plus. */
1362 increment = XEXP (SET_SRC (pattern), 1);
1363 if (GET_CODE (increment) != CONST_INT)
1364 {
1365 /* SR sometimes puts the constant in a register, especially if it is
1366 too big to be an add immed operand. */
1367 src_insn = PREV_INSN (src_insn);
1368 increment = SET_SRC (PATTERN (src_insn));
1369
1370 /* SR may have used LO_SUM to compute the constant if it is too large
1371 for a load immed operand. In this case, the constant is in operand
1372 one of the LO_SUM rtx. */
1373 if (GET_CODE (increment) == LO_SUM)
1374 increment = XEXP (increment, 1);
1375 else if (GET_CODE (increment) == IOR)
1376 {
1377 /* The rs6000 port loads some constants with IOR. */
1378 rtx second_part = XEXP (increment, 1);
1379
1380 src_insn = PREV_INSN (src_insn);
1381 increment = SET_SRC (PATTERN (src_insn));
1382 /* Don't need the last insn anymore. */
1383 delete_insn (get_last_insn ());
1384
1385 if (GET_CODE (second_part) != CONST_INT
1386 || GET_CODE (increment) != CONST_INT)
1387 abort ();
1388
1389 increment = GEN_INT (INTVAL (increment) | INTVAL (second_part));
1390 }
1391
1392 if (GET_CODE (increment) != CONST_INT)
1393 abort ();
1394
1395 /* The insn loading the constant into a register is no longer needed,
1396 so delete it. */
1397 delete_insn (get_last_insn ());
1398 }
1399
1400 if (increment_total)
1401 increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment));
1402 else
1403 increment_total = increment;
1404
1405 /* Check that the source register is the same as the register we expected
1406 to see as the source. If not, something is seriously wrong. */
1407 if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
1408 || REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
1409 {
1410 /* Some machines (e.g. the romp), may emit two add instructions for
1411 certain constants, so lets try looking for another add immediately
1412 before this one if we have only seen one add insn so far. */
1413
1414 if (tries == 0)
1415 {
1416 tries++;
1417
1418 src_insn = PREV_INSN (src_insn);
1419 pattern = PATTERN (src_insn);
1420
1421 delete_insn (get_last_insn ());
1422
1423 goto retry;
1424 }
1425
1426 abort ();
1427 }
1428
1429 return increment_total;
1430 }
1431
1432 /* Copy REG_NOTES, except for insn references, because not all insn_map
1433 entries are valid yet. We do need to copy registers now though, because
1434 the reg_map entries can change during copying. */
1435
1436 static rtx
1437 initial_reg_note_copy (notes, map)
1438 rtx notes;
1439 struct inline_remap *map;
1440 {
1441 rtx copy;
1442
1443 if (notes == 0)
1444 return 0;
1445
1446 copy = rtx_alloc (GET_CODE (notes));
1447 PUT_MODE (copy, GET_MODE (notes));
1448
1449 if (GET_CODE (notes) == EXPR_LIST)
1450 XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map);
1451 else if (GET_CODE (notes) == INSN_LIST)
1452 /* Don't substitute for these yet. */
1453 XEXP (copy, 0) = XEXP (notes, 0);
1454 else
1455 abort ();
1456
1457 XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map);
1458
1459 return copy;
1460 }
1461
1462 /* Fixup insn references in copied REG_NOTES. */
1463
1464 static void
1465 final_reg_note_copy (notes, map)
1466 rtx notes;
1467 struct inline_remap *map;
1468 {
1469 rtx note;
1470
1471 for (note = notes; note; note = XEXP (note, 1))
1472 if (GET_CODE (note) == INSN_LIST)
1473 XEXP (note, 0) = map->insn_map[INSN_UID (XEXP (note, 0))];
1474 }
1475
1476 /* Copy each instruction in the loop, substituting from map as appropriate.
1477 This is very similar to a loop in expand_inline_function. */
1478
1479 static void
1480 copy_loop_body (copy_start, copy_end, map, exit_label, last_iteration,
1481 unroll_type, start_label, loop_end, insert_before,
1482 copy_notes_from)
1483 rtx copy_start, copy_end;
1484 struct inline_remap *map;
1485 rtx exit_label;
1486 int last_iteration;
1487 enum unroll_types unroll_type;
1488 rtx start_label, loop_end, insert_before, copy_notes_from;
1489 {
1490 rtx insn, pattern;
1491 rtx tem, copy;
1492 int dest_reg_was_split, i;
1493 rtx cc0_insn = 0;
1494 rtx final_label = 0;
1495 rtx giv_inc, giv_dest_reg, giv_src_reg;
1496
1497 /* If this isn't the last iteration, then map any references to the
1498 start_label to final_label. Final label will then be emitted immediately
1499 after the end of this loop body if it was ever used.
1500
1501 If this is the last iteration, then map references to the start_label
1502 to itself. */
1503 if (! last_iteration)
1504 {
1505 final_label = gen_label_rtx ();
1506 map->label_map[CODE_LABEL_NUMBER (start_label)] = final_label;
1507 }
1508 else
1509 map->label_map[CODE_LABEL_NUMBER (start_label)] = start_label;
1510
1511 start_sequence ();
1512
1513 insn = copy_start;
1514 do
1515 {
1516 insn = NEXT_INSN (insn);
1517
1518 map->orig_asm_operands_vector = 0;
1519
1520 switch (GET_CODE (insn))
1521 {
1522 case INSN:
1523 pattern = PATTERN (insn);
1524 copy = 0;
1525 giv_inc = 0;
1526
1527 /* Check to see if this is a giv that has been combined with
1528 some split address givs. (Combined in the sense that
1529 `combine_givs' in loop.c has put two givs in the same register.)
1530 In this case, we must search all givs based on the same biv to
1531 find the address givs. Then split the address givs.
1532 Do this before splitting the giv, since that may map the
1533 SET_DEST to a new register. */
1534
1535 if (GET_CODE (pattern) == SET
1536 && GET_CODE (SET_DEST (pattern)) == REG
1537 && addr_combined_regs[REGNO (SET_DEST (pattern))])
1538 {
1539 struct iv_class *bl;
1540 struct induction *v, *tv;
1541 int regno = REGNO (SET_DEST (pattern));
1542
1543 v = addr_combined_regs[REGNO (SET_DEST (pattern))];
1544 bl = reg_biv_class[REGNO (v->src_reg)];
1545
1546 /* Although the giv_inc amount is not needed here, we must call
1547 calculate_giv_inc here since it might try to delete the
1548 last insn emitted. If we wait until later to call it,
1549 we might accidentally delete insns generated immediately
1550 below by emit_unrolled_add. */
1551
1552 giv_inc = calculate_giv_inc (pattern, insn, regno);
1553
1554 /* Now find all address giv's that were combined with this
1555 giv 'v'. */
1556 for (tv = bl->giv; tv; tv = tv->next_iv)
1557 if (tv->giv_type == DEST_ADDR && tv->same == v)
1558 {
1559 int this_giv_inc = INTVAL (giv_inc);
1560
1561 /* Scale this_giv_inc if the multiplicative factors of
1562 the two givs are different. */
1563 if (tv->mult_val != v->mult_val)
1564 this_giv_inc = (this_giv_inc / INTVAL (v->mult_val)
1565 * INTVAL (tv->mult_val));
1566
1567 tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc);
1568 *tv->location = tv->dest_reg;
1569
1570 if (last_iteration && unroll_type != UNROLL_COMPLETELY)
1571 {
1572 /* Must emit an insn to increment the split address
1573 giv. Add in the const_adjust field in case there
1574 was a constant eliminated from the address. */
1575 rtx value, dest_reg;
1576
1577 /* tv->dest_reg will be either a bare register,
1578 or else a register plus a constant. */
1579 if (GET_CODE (tv->dest_reg) == REG)
1580 dest_reg = tv->dest_reg;
1581 else
1582 dest_reg = XEXP (tv->dest_reg, 0);
1583
1584 /* Check for shared address givs, and avoid
1585 incrementing the shared psuedo reg more than
1586 once. */
1587 if (! tv->same_insn)
1588 {
1589 /* tv->dest_reg may actually be a (PLUS (REG)
1590 (CONST)) here, so we must call plus_constant
1591 to add the const_adjust amount before calling
1592 emit_unrolled_add below. */
1593 value = plus_constant (tv->dest_reg,
1594 tv->const_adjust);
1595
1596 /* The constant could be too large for an add
1597 immediate, so can't directly emit an insn
1598 here. */
1599 emit_unrolled_add (dest_reg, XEXP (value, 0),
1600 XEXP (value, 1));
1601 }
1602
1603 /* Reset the giv to be just the register again, in case
1604 it is used after the set we have just emitted.
1605 We must subtract the const_adjust factor added in
1606 above. */
1607 tv->dest_reg = plus_constant (dest_reg,
1608 - tv->const_adjust);
1609 *tv->location = tv->dest_reg;
1610 }
1611 }
1612 }
1613
1614 /* If this is a setting of a splittable variable, then determine
1615 how to split the variable, create a new set based on this split,
1616 and set up the reg_map so that later uses of the variable will
1617 use the new split variable. */
1618
1619 dest_reg_was_split = 0;
1620
1621 if (GET_CODE (pattern) == SET
1622 && GET_CODE (SET_DEST (pattern)) == REG
1623 && splittable_regs[REGNO (SET_DEST (pattern))])
1624 {
1625 int regno = REGNO (SET_DEST (pattern));
1626
1627 dest_reg_was_split = 1;
1628
1629 /* Compute the increment value for the giv, if it wasn't
1630 already computed above. */
1631
1632 if (giv_inc == 0)
1633 giv_inc = calculate_giv_inc (pattern, insn, regno);
1634 giv_dest_reg = SET_DEST (pattern);
1635 giv_src_reg = SET_DEST (pattern);
1636
1637 if (unroll_type == UNROLL_COMPLETELY)
1638 {
1639 /* Completely unrolling the loop. Set the induction
1640 variable to a known constant value. */
1641
1642 /* The value in splittable_regs may be an invariant
1643 value, so we must use plus_constant here. */
1644 splittable_regs[regno]
1645 = plus_constant (splittable_regs[regno], INTVAL (giv_inc));
1646
1647 if (GET_CODE (splittable_regs[regno]) == PLUS)
1648 {
1649 giv_src_reg = XEXP (splittable_regs[regno], 0);
1650 giv_inc = XEXP (splittable_regs[regno], 1);
1651 }
1652 else
1653 {
1654 /* The splittable_regs value must be a REG or a
1655 CONST_INT, so put the entire value in the giv_src_reg
1656 variable. */
1657 giv_src_reg = splittable_regs[regno];
1658 giv_inc = const0_rtx;
1659 }
1660 }
1661 else
1662 {
1663 /* Partially unrolling loop. Create a new pseudo
1664 register for the iteration variable, and set it to
1665 be a constant plus the original register. Except
1666 on the last iteration, when the result has to
1667 go back into the original iteration var register. */
1668
1669 /* Handle bivs which must be mapped to a new register
1670 when split. This happens for bivs which need their
1671 final value set before loop entry. The new register
1672 for the biv was stored in the biv's first struct
1673 induction entry by find_splittable_regs. */
1674
1675 if (regno < max_reg_before_loop
1676 && reg_iv_type[regno] == BASIC_INDUCT)
1677 {
1678 giv_src_reg = reg_biv_class[regno]->biv->src_reg;
1679 giv_dest_reg = giv_src_reg;
1680 }
1681
1682 #if 0
1683 /* If non-reduced/final-value givs were split, then
1684 this would have to remap those givs also. See
1685 find_splittable_regs. */
1686 #endif
1687
1688 splittable_regs[regno]
1689 = GEN_INT (INTVAL (giv_inc)
1690 + INTVAL (splittable_regs[regno]));
1691 giv_inc = splittable_regs[regno];
1692
1693 /* Now split the induction variable by changing the dest
1694 of this insn to a new register, and setting its
1695 reg_map entry to point to this new register.
1696
1697 If this is the last iteration, and this is the last insn
1698 that will update the iv, then reuse the original dest,
1699 to ensure that the iv will have the proper value when
1700 the loop exits or repeats.
1701
1702 Using splittable_regs_updates here like this is safe,
1703 because it can only be greater than one if all
1704 instructions modifying the iv are always executed in
1705 order. */
1706
1707 if (! last_iteration
1708 || (splittable_regs_updates[regno]-- != 1))
1709 {
1710 tem = gen_reg_rtx (GET_MODE (giv_src_reg));
1711 giv_dest_reg = tem;
1712 map->reg_map[regno] = tem;
1713 }
1714 else
1715 map->reg_map[regno] = giv_src_reg;
1716 }
1717
1718 /* The constant being added could be too large for an add
1719 immediate, so can't directly emit an insn here. */
1720 emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
1721 copy = get_last_insn ();
1722 pattern = PATTERN (copy);
1723 }
1724 else
1725 {
1726 pattern = copy_rtx_and_substitute (pattern, map);
1727 copy = emit_insn (pattern);
1728 }
1729 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1730
1731 #ifdef HAVE_cc0
1732 /* If this insn is setting CC0, it may need to look at
1733 the insn that uses CC0 to see what type of insn it is.
1734 In that case, the call to recog via validate_change will
1735 fail. So don't substitute constants here. Instead,
1736 do it when we emit the following insn.
1737
1738 For example, see the pyr.md file. That machine has signed and
1739 unsigned compares. The compare patterns must check the
1740 following branch insn to see which what kind of compare to
1741 emit.
1742
1743 If the previous insn set CC0, substitute constants on it as
1744 well. */
1745 if (sets_cc0_p (copy) != 0)
1746 cc0_insn = copy;
1747 else
1748 {
1749 if (cc0_insn)
1750 try_constants (cc0_insn, map);
1751 cc0_insn = 0;
1752 try_constants (copy, map);
1753 }
1754 #else
1755 try_constants (copy, map);
1756 #endif
1757
1758 /* Make split induction variable constants `permanent' since we
1759 know there are no backward branches across iteration variable
1760 settings which would invalidate this. */
1761 if (dest_reg_was_split)
1762 {
1763 int regno = REGNO (SET_DEST (pattern));
1764
1765 if (regno < map->const_equiv_map_size
1766 && map->const_age_map[regno] == map->const_age)
1767 map->const_age_map[regno] = -1;
1768 }
1769 break;
1770
1771 case JUMP_INSN:
1772 pattern = copy_rtx_and_substitute (PATTERN (insn), map);
1773 copy = emit_jump_insn (pattern);
1774 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1775
1776 if (JUMP_LABEL (insn) == start_label && insn == copy_end
1777 && ! last_iteration)
1778 {
1779 /* This is a branch to the beginning of the loop; this is the
1780 last insn being copied; and this is not the last iteration.
1781 In this case, we want to change the original fall through
1782 case to be a branch past the end of the loop, and the
1783 original jump label case to fall_through. */
1784
1785 if (invert_exp (pattern, copy))
1786 {
1787 if (! redirect_exp (&pattern,
1788 map->label_map[CODE_LABEL_NUMBER
1789 (JUMP_LABEL (insn))],
1790 exit_label, copy))
1791 abort ();
1792 }
1793 else
1794 {
1795 rtx jmp;
1796 rtx lab = gen_label_rtx ();
1797 /* Can't do it by reversing the jump (probably becasue we
1798 couln't reverse the conditions), so emit a new
1799 jump_insn after COPY, and redirect the jump around
1800 that. */
1801 jmp = emit_jump_insn_after (gen_jump (exit_label), copy);
1802 jmp = emit_barrier_after (jmp);
1803 emit_label_after (lab, jmp);
1804 LABEL_NUSES (lab) = 0;
1805 if (! redirect_exp (&pattern,
1806 map->label_map[CODE_LABEL_NUMBER
1807 (JUMP_LABEL (insn))],
1808 lab, copy))
1809 abort ();
1810 }
1811 }
1812
1813 #ifdef HAVE_cc0
1814 if (cc0_insn)
1815 try_constants (cc0_insn, map);
1816 cc0_insn = 0;
1817 #endif
1818 try_constants (copy, map);
1819
1820 /* Set the jump label of COPY correctly to avoid problems with
1821 later passes of unroll_loop, if INSN had jump label set. */
1822 if (JUMP_LABEL (insn))
1823 {
1824 rtx label = 0;
1825
1826 /* Can't use the label_map for every insn, since this may be
1827 the backward branch, and hence the label was not mapped. */
1828 if (GET_CODE (pattern) == SET)
1829 {
1830 tem = SET_SRC (pattern);
1831 if (GET_CODE (tem) == LABEL_REF)
1832 label = XEXP (tem, 0);
1833 else if (GET_CODE (tem) == IF_THEN_ELSE)
1834 {
1835 if (XEXP (tem, 1) != pc_rtx)
1836 label = XEXP (XEXP (tem, 1), 0);
1837 else
1838 label = XEXP (XEXP (tem, 2), 0);
1839 }
1840 }
1841
1842 if (label && GET_CODE (label) == CODE_LABEL)
1843 JUMP_LABEL (copy) = label;
1844 else
1845 {
1846 /* An unrecognizable jump insn, probably the entry jump
1847 for a switch statement. This label must have been mapped,
1848 so just use the label_map to get the new jump label. */
1849 JUMP_LABEL (copy)
1850 = map->label_map[CODE_LABEL_NUMBER (JUMP_LABEL (insn))];
1851 }
1852
1853 /* If this is a non-local jump, then must increase the label
1854 use count so that the label will not be deleted when the
1855 original jump is deleted. */
1856 LABEL_NUSES (JUMP_LABEL (copy))++;
1857 }
1858 else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
1859 || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
1860 {
1861 rtx pat = PATTERN (copy);
1862 int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
1863 int len = XVECLEN (pat, diff_vec_p);
1864 int i;
1865
1866 for (i = 0; i < len; i++)
1867 LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
1868 }
1869
1870 /* If this used to be a conditional jump insn but whose branch
1871 direction is now known, we must do something special. */
1872 if (condjump_p (insn) && !simplejump_p (insn) && map->last_pc_value)
1873 {
1874 #ifdef HAVE_cc0
1875 /* The previous insn set cc0 for us. So delete it. */
1876 delete_insn (PREV_INSN (copy));
1877 #endif
1878
1879 /* If this is now a no-op, delete it. */
1880 if (map->last_pc_value == pc_rtx)
1881 {
1882 /* Don't let delete_insn delete the label referenced here,
1883 because we might possibly need it later for some other
1884 instruction in the loop. */
1885 if (JUMP_LABEL (copy))
1886 LABEL_NUSES (JUMP_LABEL (copy))++;
1887 delete_insn (copy);
1888 if (JUMP_LABEL (copy))
1889 LABEL_NUSES (JUMP_LABEL (copy))--;
1890 copy = 0;
1891 }
1892 else
1893 /* Otherwise, this is unconditional jump so we must put a
1894 BARRIER after it. We could do some dead code elimination
1895 here, but jump.c will do it just as well. */
1896 emit_barrier ();
1897 }
1898 break;
1899
1900 case CALL_INSN:
1901 pattern = copy_rtx_and_substitute (PATTERN (insn), map);
1902 copy = emit_call_insn (pattern);
1903 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1904
1905 /* Because the USAGE information potentially contains objects other
1906 than hard registers, we need to copy it. */
1907 CALL_INSN_FUNCTION_USAGE (copy) =
1908 copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), map);
1909
1910 #ifdef HAVE_cc0
1911 if (cc0_insn)
1912 try_constants (cc0_insn, map);
1913 cc0_insn = 0;
1914 #endif
1915 try_constants (copy, map);
1916
1917 /* Be lazy and assume CALL_INSNs clobber all hard registers. */
1918 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1919 map->const_equiv_map[i] = 0;
1920 break;
1921
1922 case CODE_LABEL:
1923 /* If this is the loop start label, then we don't need to emit a
1924 copy of this label since no one will use it. */
1925
1926 if (insn != start_label)
1927 {
1928 copy = emit_label (map->label_map[CODE_LABEL_NUMBER (insn)]);
1929 map->const_age++;
1930 }
1931 break;
1932
1933 case BARRIER:
1934 copy = emit_barrier ();
1935 break;
1936
1937 case NOTE:
1938 /* VTOP notes are valid only before the loop exit test. If placed
1939 anywhere else, loop may generate bad code. */
1940
1941 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
1942 && (NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
1943 || (last_iteration && unroll_type != UNROLL_COMPLETELY)))
1944 copy = emit_note (NOTE_SOURCE_FILE (insn),
1945 NOTE_LINE_NUMBER (insn));
1946 else
1947 copy = 0;
1948 break;
1949
1950 default:
1951 abort ();
1952 break;
1953 }
1954
1955 map->insn_map[INSN_UID (insn)] = copy;
1956 }
1957 while (insn != copy_end);
1958
1959 /* Now finish coping the REG_NOTES. */
1960 insn = copy_start;
1961 do
1962 {
1963 insn = NEXT_INSN (insn);
1964 if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN
1965 || GET_CODE (insn) == CALL_INSN)
1966 && map->insn_map[INSN_UID (insn)])
1967 final_reg_note_copy (REG_NOTES (map->insn_map[INSN_UID (insn)]), map);
1968 }
1969 while (insn != copy_end);
1970
1971 /* There may be notes between copy_notes_from and loop_end. Emit a copy of
1972 each of these notes here, since there may be some important ones, such as
1973 NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last
1974 iteration, because the original notes won't be deleted.
1975
1976 We can't use insert_before here, because when from preconditioning,
1977 insert_before points before the loop. We can't use copy_end, because
1978 there may be insns already inserted after it (which we don't want to
1979 copy) when not from preconditioning code. */
1980
1981 if (! last_iteration)
1982 {
1983 for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
1984 {
1985 if (GET_CODE (insn) == NOTE
1986 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED)
1987 emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
1988 }
1989 }
1990
1991 if (final_label && LABEL_NUSES (final_label) > 0)
1992 emit_label (final_label);
1993
1994 tem = gen_sequence ();
1995 end_sequence ();
1996 emit_insn_before (tem, insert_before);
1997 }
1998 \f
1999 /* Emit an insn, using the expand_binop to ensure that a valid insn is
2000 emitted. This will correctly handle the case where the increment value
2001 won't fit in the immediate field of a PLUS insns. */
2002
2003 void
2004 emit_unrolled_add (dest_reg, src_reg, increment)
2005 rtx dest_reg, src_reg, increment;
2006 {
2007 rtx result;
2008
2009 result = expand_binop (GET_MODE (dest_reg), add_optab, src_reg, increment,
2010 dest_reg, 0, OPTAB_LIB_WIDEN);
2011
2012 if (dest_reg != result)
2013 emit_move_insn (dest_reg, result);
2014 }
2015 \f
2016 /* Searches the insns between INSN and LOOP_END. Returns 1 if there
2017 is a backward branch in that range that branches to somewhere between
2018 LOOP_START and INSN. Returns 0 otherwise. */
2019
2020 /* ??? This is quadratic algorithm. Could be rewritten to be linear.
2021 In practice, this is not a problem, because this function is seldom called,
2022 and uses a negligible amount of CPU time on average. */
2023
2024 static int
2025 back_branch_in_range_p (insn, loop_start, loop_end)
2026 rtx insn;
2027 rtx loop_start, loop_end;
2028 {
2029 rtx p, q, target_insn;
2030
2031 /* Stop before we get to the backward branch at the end of the loop. */
2032 loop_end = prev_nonnote_insn (loop_end);
2033 if (GET_CODE (loop_end) == BARRIER)
2034 loop_end = PREV_INSN (loop_end);
2035
2036 /* Check in case insn has been deleted, search forward for first non
2037 deleted insn following it. */
2038 while (INSN_DELETED_P (insn))
2039 insn = NEXT_INSN (insn);
2040
2041 /* Check for the case where insn is the last insn in the loop. */
2042 if (insn == loop_end)
2043 return 0;
2044
2045 for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
2046 {
2047 if (GET_CODE (p) == JUMP_INSN)
2048 {
2049 target_insn = JUMP_LABEL (p);
2050
2051 /* Search from loop_start to insn, to see if one of them is
2052 the target_insn. We can't use INSN_LUID comparisons here,
2053 since insn may not have an LUID entry. */
2054 for (q = loop_start; q != insn; q = NEXT_INSN (q))
2055 if (q == target_insn)
2056 return 1;
2057 }
2058 }
2059
2060 return 0;
2061 }
2062
2063 /* Try to generate the simplest rtx for the expression
2064 (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
2065 value of giv's. */
2066
2067 static rtx
2068 fold_rtx_mult_add (mult1, mult2, add1, mode)
2069 rtx mult1, mult2, add1;
2070 enum machine_mode mode;
2071 {
2072 rtx temp, mult_res;
2073 rtx result;
2074
2075 /* The modes must all be the same. This should always be true. For now,
2076 check to make sure. */
2077 if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
2078 || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
2079 || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
2080 abort ();
2081
2082 /* Ensure that if at least one of mult1/mult2 are constant, then mult2
2083 will be a constant. */
2084 if (GET_CODE (mult1) == CONST_INT)
2085 {
2086 temp = mult2;
2087 mult2 = mult1;
2088 mult1 = temp;
2089 }
2090
2091 mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
2092 if (! mult_res)
2093 mult_res = gen_rtx (MULT, mode, mult1, mult2);
2094
2095 /* Again, put the constant second. */
2096 if (GET_CODE (add1) == CONST_INT)
2097 {
2098 temp = add1;
2099 add1 = mult_res;
2100 mult_res = temp;
2101 }
2102
2103 result = simplify_binary_operation (PLUS, mode, add1, mult_res);
2104 if (! result)
2105 result = gen_rtx (PLUS, mode, add1, mult_res);
2106
2107 return result;
2108 }
2109
2110 /* Searches the list of induction struct's for the biv BL, to try to calculate
2111 the total increment value for one iteration of the loop as a constant.
2112
2113 Returns the increment value as an rtx, simplified as much as possible,
2114 if it can be calculated. Otherwise, returns 0. */
2115
2116 rtx
2117 biv_total_increment (bl, loop_start, loop_end)
2118 struct iv_class *bl;
2119 rtx loop_start, loop_end;
2120 {
2121 struct induction *v;
2122 rtx result;
2123
2124 /* For increment, must check every instruction that sets it. Each
2125 instruction must be executed only once each time through the loop.
2126 To verify this, we check that the the insn is always executed, and that
2127 there are no backward branches after the insn that branch to before it.
2128 Also, the insn must have a mult_val of one (to make sure it really is
2129 an increment). */
2130
2131 result = const0_rtx;
2132 for (v = bl->biv; v; v = v->next_iv)
2133 {
2134 if (v->always_computable && v->mult_val == const1_rtx
2135 && ! back_branch_in_range_p (v->insn, loop_start, loop_end))
2136 result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
2137 else
2138 return 0;
2139 }
2140
2141 return result;
2142 }
2143
2144 /* Determine the initial value of the iteration variable, and the amount
2145 that it is incremented each loop. Use the tables constructed by
2146 the strength reduction pass to calculate these values.
2147
2148 Initial_value and/or increment are set to zero if their values could not
2149 be calculated. */
2150
2151 static void
2152 iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
2153 rtx iteration_var, *initial_value, *increment;
2154 rtx loop_start, loop_end;
2155 {
2156 struct iv_class *bl;
2157 struct induction *v, *b;
2158
2159 /* Clear the result values, in case no answer can be found. */
2160 *initial_value = 0;
2161 *increment = 0;
2162
2163 /* The iteration variable can be either a giv or a biv. Check to see
2164 which it is, and compute the variable's initial value, and increment
2165 value if possible. */
2166
2167 /* If this is a new register, can't handle it since we don't have any
2168 reg_iv_type entry for it. */
2169 if (REGNO (iteration_var) >= max_reg_before_loop)
2170 {
2171 if (loop_dump_stream)
2172 fprintf (loop_dump_stream,
2173 "Loop unrolling: No reg_iv_type entry for iteration var.\n");
2174 return;
2175 }
2176 /* Reject iteration variables larger than the host long size, since they
2177 could result in a number of iterations greater than the range of our
2178 `unsigned long' variable loop_n_iterations. */
2179 else if (GET_MODE_BITSIZE (GET_MODE (iteration_var)) > HOST_BITS_PER_LONG)
2180 {
2181 if (loop_dump_stream)
2182 fprintf (loop_dump_stream,
2183 "Loop unrolling: Iteration var rejected because mode larger than host long.\n");
2184 return;
2185 }
2186 else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
2187 {
2188 if (loop_dump_stream)
2189 fprintf (loop_dump_stream,
2190 "Loop unrolling: Iteration var not an integer.\n");
2191 return;
2192 }
2193 else if (reg_iv_type[REGNO (iteration_var)] == BASIC_INDUCT)
2194 {
2195 /* Grab initial value, only useful if it is a constant. */
2196 bl = reg_biv_class[REGNO (iteration_var)];
2197 *initial_value = bl->initial_value;
2198
2199 *increment = biv_total_increment (bl, loop_start, loop_end);
2200 }
2201 else if (reg_iv_type[REGNO (iteration_var)] == GENERAL_INDUCT)
2202 {
2203 #if 1
2204 /* ??? The code below does not work because the incorrect number of
2205 iterations is calculated when the biv is incremented after the giv
2206 is set (which is the usual case). This can probably be accounted
2207 for by biasing the initial_value by subtracting the amount of the
2208 increment that occurs between the giv set and the giv test. However,
2209 a giv as an iterator is very rare, so it does not seem worthwhile
2210 to handle this. */
2211 /* ??? An example failure is: i = 6; do {;} while (i++ < 9). */
2212 if (loop_dump_stream)
2213 fprintf (loop_dump_stream,
2214 "Loop unrolling: Giv iterators are not handled.\n");
2215 return;
2216 #else
2217 /* Initial value is mult_val times the biv's initial value plus
2218 add_val. Only useful if it is a constant. */
2219 v = reg_iv_info[REGNO (iteration_var)];
2220 bl = reg_biv_class[REGNO (v->src_reg)];
2221 *initial_value = fold_rtx_mult_add (v->mult_val, bl->initial_value,
2222 v->add_val, v->mode);
2223
2224 /* Increment value is mult_val times the increment value of the biv. */
2225
2226 *increment = biv_total_increment (bl, loop_start, loop_end);
2227 if (*increment)
2228 *increment = fold_rtx_mult_add (v->mult_val, *increment, const0_rtx,
2229 v->mode);
2230 #endif
2231 }
2232 else
2233 {
2234 if (loop_dump_stream)
2235 fprintf (loop_dump_stream,
2236 "Loop unrolling: Not basic or general induction var.\n");
2237 return;
2238 }
2239 }
2240
2241 /* Calculate the approximate final value of the iteration variable
2242 which has an loop exit test with code COMPARISON_CODE and comparison value
2243 of COMPARISON_VALUE. Also returns an indication of whether the comparison
2244 was signed or unsigned, and the direction of the comparison. This info is
2245 needed to calculate the number of loop iterations. */
2246
2247 static rtx
2248 approx_final_value (comparison_code, comparison_value, unsigned_p, compare_dir)
2249 enum rtx_code comparison_code;
2250 rtx comparison_value;
2251 int *unsigned_p;
2252 int *compare_dir;
2253 {
2254 /* Calculate the final value of the induction variable.
2255 The exact final value depends on the branch operator, and increment sign.
2256 This is only an approximate value. It will be wrong if the iteration
2257 variable is not incremented by one each time through the loop, and
2258 approx final value - start value % increment != 0. */
2259
2260 *unsigned_p = 0;
2261 switch (comparison_code)
2262 {
2263 case LEU:
2264 *unsigned_p = 1;
2265 case LE:
2266 *compare_dir = 1;
2267 return plus_constant (comparison_value, 1);
2268 case GEU:
2269 *unsigned_p = 1;
2270 case GE:
2271 *compare_dir = -1;
2272 return plus_constant (comparison_value, -1);
2273 case EQ:
2274 /* Can not calculate a final value for this case. */
2275 *compare_dir = 0;
2276 return 0;
2277 case LTU:
2278 *unsigned_p = 1;
2279 case LT:
2280 *compare_dir = 1;
2281 return comparison_value;
2282 break;
2283 case GTU:
2284 *unsigned_p = 1;
2285 case GT:
2286 *compare_dir = -1;
2287 return comparison_value;
2288 case NE:
2289 *compare_dir = 0;
2290 return comparison_value;
2291 default:
2292 abort ();
2293 }
2294 }
2295
2296 /* For each biv and giv, determine whether it can be safely split into
2297 a different variable for each unrolled copy of the loop body. If it
2298 is safe to split, then indicate that by saving some useful info
2299 in the splittable_regs array.
2300
2301 If the loop is being completely unrolled, then splittable_regs will hold
2302 the current value of the induction variable while the loop is unrolled.
2303 It must be set to the initial value of the induction variable here.
2304 Otherwise, splittable_regs will hold the difference between the current
2305 value of the induction variable and the value the induction variable had
2306 at the top of the loop. It must be set to the value 0 here.
2307
2308 Returns the total number of instructions that set registers that are
2309 splittable. */
2310
2311 /* ?? If the loop is only unrolled twice, then most of the restrictions to
2312 constant values are unnecessary, since we can easily calculate increment
2313 values in this case even if nothing is constant. The increment value
2314 should not involve a multiply however. */
2315
2316 /* ?? Even if the biv/giv increment values aren't constant, it may still
2317 be beneficial to split the variable if the loop is only unrolled a few
2318 times, since multiplies by small integers (1,2,3,4) are very cheap. */
2319
2320 static int
2321 find_splittable_regs (unroll_type, loop_start, loop_end, end_insert_before,
2322 unroll_number)
2323 enum unroll_types unroll_type;
2324 rtx loop_start, loop_end;
2325 rtx end_insert_before;
2326 int unroll_number;
2327 {
2328 struct iv_class *bl;
2329 struct induction *v;
2330 rtx increment, tem;
2331 rtx biv_final_value;
2332 int biv_splittable;
2333 int result = 0;
2334
2335 for (bl = loop_iv_list; bl; bl = bl->next)
2336 {
2337 /* Biv_total_increment must return a constant value,
2338 otherwise we can not calculate the split values. */
2339
2340 increment = biv_total_increment (bl, loop_start, loop_end);
2341 if (! increment || GET_CODE (increment) != CONST_INT)
2342 continue;
2343
2344 /* The loop must be unrolled completely, or else have a known number
2345 of iterations and only one exit, or else the biv must be dead
2346 outside the loop, or else the final value must be known. Otherwise,
2347 it is unsafe to split the biv since it may not have the proper
2348 value on loop exit. */
2349
2350 /* loop_number_exit_labels is non-zero if the loop has an exit other than
2351 a fall through at the end. */
2352
2353 biv_splittable = 1;
2354 biv_final_value = 0;
2355 if (unroll_type != UNROLL_COMPLETELY
2356 && (loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]]
2357 || unroll_type == UNROLL_NAIVE)
2358 && (uid_luid[regno_last_uid[bl->regno]] >= INSN_LUID (loop_end)
2359 || ! bl->init_insn
2360 || INSN_UID (bl->init_insn) >= max_uid_for_loop
2361 || (uid_luid[regno_first_uid[bl->regno]]
2362 < INSN_LUID (bl->init_insn))
2363 || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
2364 && ! (biv_final_value = final_biv_value (bl, loop_start, loop_end)))
2365 biv_splittable = 0;
2366
2367 /* If any of the insns setting the BIV don't do so with a simple
2368 PLUS, we don't know how to split it. */
2369 for (v = bl->biv; biv_splittable && v; v = v->next_iv)
2370 if ((tem = single_set (v->insn)) == 0
2371 || GET_CODE (SET_DEST (tem)) != REG
2372 || REGNO (SET_DEST (tem)) != bl->regno
2373 || GET_CODE (SET_SRC (tem)) != PLUS)
2374 biv_splittable = 0;
2375
2376 /* If final value is non-zero, then must emit an instruction which sets
2377 the value of the biv to the proper value. This is done after
2378 handling all of the givs, since some of them may need to use the
2379 biv's value in their initialization code. */
2380
2381 /* This biv is splittable. If completely unrolling the loop, save
2382 the biv's initial value. Otherwise, save the constant zero. */
2383
2384 if (biv_splittable == 1)
2385 {
2386 if (unroll_type == UNROLL_COMPLETELY)
2387 {
2388 /* If the initial value of the biv is itself (i.e. it is too
2389 complicated for strength_reduce to compute), or is a hard
2390 register, then we must create a new pseudo reg to hold the
2391 initial value of the biv. */
2392
2393 if (GET_CODE (bl->initial_value) == REG
2394 && (REGNO (bl->initial_value) == bl->regno
2395 || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER))
2396 {
2397 rtx tem = gen_reg_rtx (bl->biv->mode);
2398
2399 emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2400 loop_start);
2401
2402 if (loop_dump_stream)
2403 fprintf (loop_dump_stream, "Biv %d initial value remapped to %d.\n",
2404 bl->regno, REGNO (tem));
2405
2406 splittable_regs[bl->regno] = tem;
2407 }
2408 else
2409 splittable_regs[bl->regno] = bl->initial_value;
2410 }
2411 else
2412 splittable_regs[bl->regno] = const0_rtx;
2413
2414 /* Save the number of instructions that modify the biv, so that
2415 we can treat the last one specially. */
2416
2417 splittable_regs_updates[bl->regno] = bl->biv_count;
2418 result += bl->biv_count;
2419
2420 if (loop_dump_stream)
2421 fprintf (loop_dump_stream,
2422 "Biv %d safe to split.\n", bl->regno);
2423 }
2424
2425 /* Check every giv that depends on this biv to see whether it is
2426 splittable also. Even if the biv isn't splittable, givs which
2427 depend on it may be splittable if the biv is live outside the
2428 loop, and the givs aren't. */
2429
2430 result += find_splittable_givs (bl, unroll_type, loop_start, loop_end,
2431 increment, unroll_number);
2432
2433 /* If final value is non-zero, then must emit an instruction which sets
2434 the value of the biv to the proper value. This is done after
2435 handling all of the givs, since some of them may need to use the
2436 biv's value in their initialization code. */
2437 if (biv_final_value)
2438 {
2439 /* If the loop has multiple exits, emit the insns before the
2440 loop to ensure that it will always be executed no matter
2441 how the loop exits. Otherwise emit the insn after the loop,
2442 since this is slightly more efficient. */
2443 if (! loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]])
2444 emit_insn_before (gen_move_insn (bl->biv->src_reg,
2445 biv_final_value),
2446 end_insert_before);
2447 else
2448 {
2449 /* Create a new register to hold the value of the biv, and then
2450 set the biv to its final value before the loop start. The biv
2451 is set to its final value before loop start to ensure that
2452 this insn will always be executed, no matter how the loop
2453 exits. */
2454 rtx tem = gen_reg_rtx (bl->biv->mode);
2455 emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2456 loop_start);
2457 emit_insn_before (gen_move_insn (bl->biv->src_reg,
2458 biv_final_value),
2459 loop_start);
2460
2461 if (loop_dump_stream)
2462 fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
2463 REGNO (bl->biv->src_reg), REGNO (tem));
2464
2465 /* Set up the mapping from the original biv register to the new
2466 register. */
2467 bl->biv->src_reg = tem;
2468 }
2469 }
2470 }
2471 return result;
2472 }
2473
2474 /* For every giv based on the biv BL, check to determine whether it is
2475 splittable. This is a subroutine to find_splittable_regs ().
2476
2477 Return the number of instructions that set splittable registers. */
2478
2479 static int
2480 find_splittable_givs (bl, unroll_type, loop_start, loop_end, increment,
2481 unroll_number)
2482 struct iv_class *bl;
2483 enum unroll_types unroll_type;
2484 rtx loop_start, loop_end;
2485 rtx increment;
2486 int unroll_number;
2487 {
2488 struct induction *v, *v2;
2489 rtx final_value;
2490 rtx tem;
2491 int result = 0;
2492
2493 /* Scan the list of givs, and set the same_insn field when there are
2494 multiple identical givs in the same insn. */
2495 for (v = bl->giv; v; v = v->next_iv)
2496 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2497 if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg)
2498 && ! v2->same_insn)
2499 v2->same_insn = v;
2500
2501 for (v = bl->giv; v; v = v->next_iv)
2502 {
2503 rtx giv_inc, value;
2504
2505 /* Only split the giv if it has already been reduced, or if the loop is
2506 being completely unrolled. */
2507 if (unroll_type != UNROLL_COMPLETELY && v->ignore)
2508 continue;
2509
2510 /* The giv can be split if the insn that sets the giv is executed once
2511 and only once on every iteration of the loop. */
2512 /* An address giv can always be split. v->insn is just a use not a set,
2513 and hence it does not matter whether it is always executed. All that
2514 matters is that all the biv increments are always executed, and we
2515 won't reach here if they aren't. */
2516 if (v->giv_type != DEST_ADDR
2517 && (! v->always_computable
2518 || back_branch_in_range_p (v->insn, loop_start, loop_end)))
2519 continue;
2520
2521 /* The giv increment value must be a constant. */
2522 giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2523 v->mode);
2524 if (! giv_inc || GET_CODE (giv_inc) != CONST_INT)
2525 continue;
2526
2527 /* The loop must be unrolled completely, or else have a known number of
2528 iterations and only one exit, or else the giv must be dead outside
2529 the loop, or else the final value of the giv must be known.
2530 Otherwise, it is not safe to split the giv since it may not have the
2531 proper value on loop exit. */
2532
2533 /* The used outside loop test will fail for DEST_ADDR givs. They are
2534 never used outside the loop anyways, so it is always safe to split a
2535 DEST_ADDR giv. */
2536
2537 final_value = 0;
2538 if (unroll_type != UNROLL_COMPLETELY
2539 && (loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]]
2540 || unroll_type == UNROLL_NAIVE)
2541 && v->giv_type != DEST_ADDR
2542 && ((regno_first_uid[REGNO (v->dest_reg)] != INSN_UID (v->insn)
2543 /* Check for the case where the pseudo is set by a shift/add
2544 sequence, in which case the first insn setting the pseudo
2545 is the first insn of the shift/add sequence. */
2546 && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX))
2547 || (regno_first_uid[REGNO (v->dest_reg)]
2548 != INSN_UID (XEXP (tem, 0)))))
2549 /* Line above always fails if INSN was moved by loop opt. */
2550 || (uid_luid[regno_last_uid[REGNO (v->dest_reg)]]
2551 >= INSN_LUID (loop_end)))
2552 && ! (final_value = v->final_value))
2553 continue;
2554
2555 #if 0
2556 /* Currently, non-reduced/final-value givs are never split. */
2557 /* Should emit insns after the loop if possible, as the biv final value
2558 code below does. */
2559
2560 /* If the final value is non-zero, and the giv has not been reduced,
2561 then must emit an instruction to set the final value. */
2562 if (final_value && !v->new_reg)
2563 {
2564 /* Create a new register to hold the value of the giv, and then set
2565 the giv to its final value before the loop start. The giv is set
2566 to its final value before loop start to ensure that this insn
2567 will always be executed, no matter how we exit. */
2568 tem = gen_reg_rtx (v->mode);
2569 emit_insn_before (gen_move_insn (tem, v->dest_reg), loop_start);
2570 emit_insn_before (gen_move_insn (v->dest_reg, final_value),
2571 loop_start);
2572
2573 if (loop_dump_stream)
2574 fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
2575 REGNO (v->dest_reg), REGNO (tem));
2576
2577 v->src_reg = tem;
2578 }
2579 #endif
2580
2581 /* This giv is splittable. If completely unrolling the loop, save the
2582 giv's initial value. Otherwise, save the constant zero for it. */
2583
2584 if (unroll_type == UNROLL_COMPLETELY)
2585 {
2586 /* It is not safe to use bl->initial_value here, because it may not
2587 be invariant. It is safe to use the initial value stored in
2588 the splittable_regs array if it is set. In rare cases, it won't
2589 be set, so then we do exactly the same thing as
2590 find_splittable_regs does to get a safe value. */
2591 rtx biv_initial_value;
2592
2593 if (splittable_regs[bl->regno])
2594 biv_initial_value = splittable_regs[bl->regno];
2595 else if (GET_CODE (bl->initial_value) != REG
2596 || (REGNO (bl->initial_value) != bl->regno
2597 && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER))
2598 biv_initial_value = bl->initial_value;
2599 else
2600 {
2601 rtx tem = gen_reg_rtx (bl->biv->mode);
2602
2603 emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2604 loop_start);
2605 biv_initial_value = tem;
2606 }
2607 value = fold_rtx_mult_add (v->mult_val, biv_initial_value,
2608 v->add_val, v->mode);
2609 }
2610 else
2611 value = const0_rtx;
2612
2613 if (v->new_reg)
2614 {
2615 /* If a giv was combined with another giv, then we can only split
2616 this giv if the giv it was combined with was reduced. This
2617 is because the value of v->new_reg is meaningless in this
2618 case. */
2619 if (v->same && ! v->same->new_reg)
2620 {
2621 if (loop_dump_stream)
2622 fprintf (loop_dump_stream,
2623 "giv combined with unreduced giv not split.\n");
2624 continue;
2625 }
2626 /* If the giv is an address destination, it could be something other
2627 than a simple register, these have to be treated differently. */
2628 else if (v->giv_type == DEST_REG)
2629 {
2630 /* If value is not a constant, register, or register plus
2631 constant, then compute its value into a register before
2632 loop start. This prevents invalid rtx sharing, and should
2633 generate better code. We can use bl->initial_value here
2634 instead of splittable_regs[bl->regno] because this code
2635 is going before the loop start. */
2636 if (unroll_type == UNROLL_COMPLETELY
2637 && GET_CODE (value) != CONST_INT
2638 && GET_CODE (value) != REG
2639 && (GET_CODE (value) != PLUS
2640 || GET_CODE (XEXP (value, 0)) != REG
2641 || GET_CODE (XEXP (value, 1)) != CONST_INT))
2642 {
2643 rtx tem = gen_reg_rtx (v->mode);
2644 emit_iv_add_mult (bl->initial_value, v->mult_val,
2645 v->add_val, tem, loop_start);
2646 value = tem;
2647 }
2648
2649 splittable_regs[REGNO (v->new_reg)] = value;
2650 }
2651 else
2652 {
2653 /* Splitting address givs is useful since it will often allow us
2654 to eliminate some increment insns for the base giv as
2655 unnecessary. */
2656
2657 /* If the addr giv is combined with a dest_reg giv, then all
2658 references to that dest reg will be remapped, which is NOT
2659 what we want for split addr regs. We always create a new
2660 register for the split addr giv, just to be safe. */
2661
2662 /* ??? If there are multiple address givs which have been
2663 combined with the same dest_reg giv, then we may only need
2664 one new register for them. Pulling out constants below will
2665 catch some of the common cases of this. Currently, I leave
2666 the work of simplifying multiple address givs to the
2667 following cse pass. */
2668
2669 /* As a special case, if we have multiple identical address givs
2670 within a single instruction, then we do use a single psuedo
2671 reg for both. This is necessary in case one is a match_dup
2672 of the other. */
2673
2674 v->const_adjust = 0;
2675
2676 if (v->same_insn)
2677 {
2678 v->dest_reg = v->same_insn->dest_reg;
2679 if (loop_dump_stream)
2680 fprintf (loop_dump_stream,
2681 "Sharing address givs in insn %d\n",
2682 INSN_UID (v->insn));
2683 }
2684 else if (unroll_type != UNROLL_COMPLETELY)
2685 {
2686 /* If not completely unrolling the loop, then create a new
2687 register to hold the split value of the DEST_ADDR giv.
2688 Emit insn to initialize its value before loop start. */
2689 tem = gen_reg_rtx (v->mode);
2690
2691 /* If the address giv has a constant in its new_reg value,
2692 then this constant can be pulled out and put in value,
2693 instead of being part of the initialization code. */
2694
2695 if (GET_CODE (v->new_reg) == PLUS
2696 && GET_CODE (XEXP (v->new_reg, 1)) == CONST_INT)
2697 {
2698 v->dest_reg
2699 = plus_constant (tem, INTVAL (XEXP (v->new_reg,1)));
2700
2701 /* Only succeed if this will give valid addresses.
2702 Try to validate both the first and the last
2703 address resulting from loop unrolling, if
2704 one fails, then can't do const elim here. */
2705 if (memory_address_p (v->mem_mode, v->dest_reg)
2706 && memory_address_p (v->mem_mode,
2707 plus_constant (v->dest_reg,
2708 INTVAL (giv_inc)
2709 * (unroll_number - 1))))
2710 {
2711 /* Save the negative of the eliminated const, so
2712 that we can calculate the dest_reg's increment
2713 value later. */
2714 v->const_adjust = - INTVAL (XEXP (v->new_reg, 1));
2715
2716 v->new_reg = XEXP (v->new_reg, 0);
2717 if (loop_dump_stream)
2718 fprintf (loop_dump_stream,
2719 "Eliminating constant from giv %d\n",
2720 REGNO (tem));
2721 }
2722 else
2723 v->dest_reg = tem;
2724 }
2725 else
2726 v->dest_reg = tem;
2727
2728 /* If the address hasn't been checked for validity yet, do so
2729 now, and fail completely if either the first or the last
2730 unrolled copy of the address is not a valid address. */
2731 if (v->dest_reg == tem
2732 && (! memory_address_p (v->mem_mode, v->dest_reg)
2733 || ! memory_address_p (v->mem_mode,
2734 plus_constant (v->dest_reg,
2735 INTVAL (giv_inc)
2736 * (unroll_number -1)))))
2737 {
2738 if (loop_dump_stream)
2739 fprintf (loop_dump_stream,
2740 "Invalid address for giv at insn %d\n",
2741 INSN_UID (v->insn));
2742 continue;
2743 }
2744
2745 /* To initialize the new register, just move the value of
2746 new_reg into it. This is not guaranteed to give a valid
2747 instruction on machines with complex addressing modes.
2748 If we can't recognize it, then delete it and emit insns
2749 to calculate the value from scratch. */
2750 emit_insn_before (gen_rtx (SET, VOIDmode, tem,
2751 copy_rtx (v->new_reg)),
2752 loop_start);
2753 if (recog_memoized (PREV_INSN (loop_start)) < 0)
2754 {
2755 rtx sequence, ret;
2756
2757 /* We can't use bl->initial_value to compute the initial
2758 value, because the loop may have been preconditioned.
2759 We must calculate it from NEW_REG. Try using
2760 force_operand instead of emit_iv_add_mult. */
2761 delete_insn (PREV_INSN (loop_start));
2762
2763 start_sequence ();
2764 ret = force_operand (v->new_reg, tem);
2765 if (ret != tem)
2766 emit_move_insn (tem, ret);
2767 sequence = gen_sequence ();
2768 end_sequence ();
2769 emit_insn_before (sequence, loop_start);
2770
2771 if (loop_dump_stream)
2772 fprintf (loop_dump_stream,
2773 "Invalid init insn, rewritten.\n");
2774 }
2775 }
2776 else
2777 {
2778 v->dest_reg = value;
2779
2780 /* Check the resulting address for validity, and fail
2781 if the resulting address would be invalid. */
2782 if (! memory_address_p (v->mem_mode, v->dest_reg)
2783 || ! memory_address_p (v->mem_mode,
2784 plus_constant (v->dest_reg,
2785 INTVAL (giv_inc) *
2786 (unroll_number -1))))
2787 {
2788 if (loop_dump_stream)
2789 fprintf (loop_dump_stream,
2790 "Invalid address for giv at insn %d\n",
2791 INSN_UID (v->insn));
2792 continue;
2793 }
2794 }
2795
2796 /* Store the value of dest_reg into the insn. This sharing
2797 will not be a problem as this insn will always be copied
2798 later. */
2799
2800 *v->location = v->dest_reg;
2801
2802 /* If this address giv is combined with a dest reg giv, then
2803 save the base giv's induction pointer so that we will be
2804 able to handle this address giv properly. The base giv
2805 itself does not have to be splittable. */
2806
2807 if (v->same && v->same->giv_type == DEST_REG)
2808 addr_combined_regs[REGNO (v->same->new_reg)] = v->same;
2809
2810 if (GET_CODE (v->new_reg) == REG)
2811 {
2812 /* This giv maybe hasn't been combined with any others.
2813 Make sure that it's giv is marked as splittable here. */
2814
2815 splittable_regs[REGNO (v->new_reg)] = value;
2816
2817 /* Make it appear to depend upon itself, so that the
2818 giv will be properly split in the main loop above. */
2819 if (! v->same)
2820 {
2821 v->same = v;
2822 addr_combined_regs[REGNO (v->new_reg)] = v;
2823 }
2824 }
2825
2826 if (loop_dump_stream)
2827 fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n");
2828 }
2829 }
2830 else
2831 {
2832 #if 0
2833 /* Currently, unreduced giv's can't be split. This is not too much
2834 of a problem since unreduced giv's are not live across loop
2835 iterations anyways. When unrolling a loop completely though,
2836 it makes sense to reduce&split givs when possible, as this will
2837 result in simpler instructions, and will not require that a reg
2838 be live across loop iterations. */
2839
2840 splittable_regs[REGNO (v->dest_reg)] = value;
2841 fprintf (stderr, "Giv %d at insn %d not reduced\n",
2842 REGNO (v->dest_reg), INSN_UID (v->insn));
2843 #else
2844 continue;
2845 #endif
2846 }
2847
2848 /* Givs are only updated once by definition. Mark it so if this is
2849 a splittable register. Don't need to do anything for address givs
2850 where this may not be a register. */
2851
2852 if (GET_CODE (v->new_reg) == REG)
2853 splittable_regs_updates[REGNO (v->new_reg)] = 1;
2854
2855 result++;
2856
2857 if (loop_dump_stream)
2858 {
2859 int regnum;
2860
2861 if (GET_CODE (v->dest_reg) == CONST_INT)
2862 regnum = -1;
2863 else if (GET_CODE (v->dest_reg) != REG)
2864 regnum = REGNO (XEXP (v->dest_reg, 0));
2865 else
2866 regnum = REGNO (v->dest_reg);
2867 fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
2868 regnum, INSN_UID (v->insn));
2869 }
2870 }
2871
2872 return result;
2873 }
2874 \f
2875 /* Try to prove that the register is dead after the loop exits. Trace every
2876 loop exit looking for an insn that will always be executed, which sets
2877 the register to some value, and appears before the first use of the register
2878 is found. If successful, then return 1, otherwise return 0. */
2879
2880 /* ?? Could be made more intelligent in the handling of jumps, so that
2881 it can search past if statements and other similar structures. */
2882
2883 static int
2884 reg_dead_after_loop (reg, loop_start, loop_end)
2885 rtx reg, loop_start, loop_end;
2886 {
2887 rtx insn, label;
2888 enum rtx_code code;
2889 int jump_count = 0;
2890
2891 /* HACK: Must also search the loop fall through exit, create a label_ref
2892 here which points to the loop_end, and append the loop_number_exit_labels
2893 list to it. */
2894 label = gen_rtx (LABEL_REF, VOIDmode, loop_end);
2895 LABEL_NEXTREF (label)
2896 = loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]];
2897
2898 for ( ; label; label = LABEL_NEXTREF (label))
2899 {
2900 /* Succeed if find an insn which sets the biv or if reach end of
2901 function. Fail if find an insn that uses the biv, or if come to
2902 a conditional jump. */
2903
2904 insn = NEXT_INSN (XEXP (label, 0));
2905 while (insn)
2906 {
2907 code = GET_CODE (insn);
2908 if (GET_RTX_CLASS (code) == 'i')
2909 {
2910 rtx set;
2911
2912 if (reg_referenced_p (reg, PATTERN (insn)))
2913 return 0;
2914
2915 set = single_set (insn);
2916 if (set && rtx_equal_p (SET_DEST (set), reg))
2917 break;
2918 }
2919
2920 if (code == JUMP_INSN)
2921 {
2922 if (GET_CODE (PATTERN (insn)) == RETURN)
2923 break;
2924 else if (! simplejump_p (insn)
2925 /* Prevent infinite loop following infinite loops. */
2926 || jump_count++ > 20)
2927 return 0;
2928 else
2929 insn = JUMP_LABEL (insn);
2930 }
2931
2932 insn = NEXT_INSN (insn);
2933 }
2934 }
2935
2936 /* Success, the register is dead on all loop exits. */
2937 return 1;
2938 }
2939
2940 /* Try to calculate the final value of the biv, the value it will have at
2941 the end of the loop. If we can do it, return that value. */
2942
2943 rtx
2944 final_biv_value (bl, loop_start, loop_end)
2945 struct iv_class *bl;
2946 rtx loop_start, loop_end;
2947 {
2948 rtx increment, tem;
2949
2950 /* ??? This only works for MODE_INT biv's. Reject all others for now. */
2951
2952 if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
2953 return 0;
2954
2955 /* The final value for reversed bivs must be calculated differently than
2956 for ordinary bivs. In this case, there is already an insn after the
2957 loop which sets this biv's final value (if necessary), and there are
2958 no other loop exits, so we can return any value. */
2959 if (bl->reversed)
2960 {
2961 if (loop_dump_stream)
2962 fprintf (loop_dump_stream,
2963 "Final biv value for %d, reversed biv.\n", bl->regno);
2964
2965 return const0_rtx;
2966 }
2967
2968 /* Try to calculate the final value as initial value + (number of iterations
2969 * increment). For this to work, increment must be invariant, the only
2970 exit from the loop must be the fall through at the bottom (otherwise
2971 it may not have its final value when the loop exits), and the initial
2972 value of the biv must be invariant. */
2973
2974 if (loop_n_iterations != 0
2975 && ! loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]]
2976 && invariant_p (bl->initial_value))
2977 {
2978 increment = biv_total_increment (bl, loop_start, loop_end);
2979
2980 if (increment && invariant_p (increment))
2981 {
2982 /* Can calculate the loop exit value, emit insns after loop
2983 end to calculate this value into a temporary register in
2984 case it is needed later. */
2985
2986 tem = gen_reg_rtx (bl->biv->mode);
2987 /* Make sure loop_end is not the last insn. */
2988 if (NEXT_INSN (loop_end) == 0)
2989 emit_note_after (NOTE_INSN_DELETED, loop_end);
2990 emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
2991 bl->initial_value, tem, NEXT_INSN (loop_end));
2992
2993 if (loop_dump_stream)
2994 fprintf (loop_dump_stream,
2995 "Final biv value for %d, calculated.\n", bl->regno);
2996
2997 return tem;
2998 }
2999 }
3000
3001 /* Check to see if the biv is dead at all loop exits. */
3002 if (reg_dead_after_loop (bl->biv->src_reg, loop_start, loop_end))
3003 {
3004 if (loop_dump_stream)
3005 fprintf (loop_dump_stream,
3006 "Final biv value for %d, biv dead after loop exit.\n",
3007 bl->regno);
3008
3009 return const0_rtx;
3010 }
3011
3012 return 0;
3013 }
3014
3015 /* Try to calculate the final value of the giv, the value it will have at
3016 the end of the loop. If we can do it, return that value. */
3017
3018 rtx
3019 final_giv_value (v, loop_start, loop_end)
3020 struct induction *v;
3021 rtx loop_start, loop_end;
3022 {
3023 struct iv_class *bl;
3024 rtx insn;
3025 rtx increment, tem;
3026 rtx insert_before, seq;
3027
3028 bl = reg_biv_class[REGNO (v->src_reg)];
3029
3030 /* The final value for givs which depend on reversed bivs must be calculated
3031 differently than for ordinary givs. In this case, there is already an
3032 insn after the loop which sets this giv's final value (if necessary),
3033 and there are no other loop exits, so we can return any value. */
3034 if (bl->reversed)
3035 {
3036 if (loop_dump_stream)
3037 fprintf (loop_dump_stream,
3038 "Final giv value for %d, depends on reversed biv\n",
3039 REGNO (v->dest_reg));
3040 return const0_rtx;
3041 }
3042
3043 /* Try to calculate the final value as a function of the biv it depends
3044 upon. The only exit from the loop must be the fall through at the bottom
3045 (otherwise it may not have its final value when the loop exits). */
3046
3047 /* ??? Can calculate the final giv value by subtracting off the
3048 extra biv increments times the giv's mult_val. The loop must have
3049 only one exit for this to work, but the loop iterations does not need
3050 to be known. */
3051
3052 if (loop_n_iterations != 0
3053 && ! loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]])
3054 {
3055 /* ?? It is tempting to use the biv's value here since these insns will
3056 be put after the loop, and hence the biv will have its final value
3057 then. However, this fails if the biv is subsequently eliminated.
3058 Perhaps determine whether biv's are eliminable before trying to
3059 determine whether giv's are replaceable so that we can use the
3060 biv value here if it is not eliminable. */
3061
3062 increment = biv_total_increment (bl, loop_start, loop_end);
3063
3064 if (increment && invariant_p (increment))
3065 {
3066 /* Can calculate the loop exit value of its biv as
3067 (loop_n_iterations * increment) + initial_value */
3068
3069 /* The loop exit value of the giv is then
3070 (final_biv_value - extra increments) * mult_val + add_val.
3071 The extra increments are any increments to the biv which
3072 occur in the loop after the giv's value is calculated.
3073 We must search from the insn that sets the giv to the end
3074 of the loop to calculate this value. */
3075
3076 insert_before = NEXT_INSN (loop_end);
3077
3078 /* Put the final biv value in tem. */
3079 tem = gen_reg_rtx (bl->biv->mode);
3080 emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
3081 bl->initial_value, tem, insert_before);
3082
3083 /* Subtract off extra increments as we find them. */
3084 for (insn = NEXT_INSN (v->insn); insn != loop_end;
3085 insn = NEXT_INSN (insn))
3086 {
3087 struct induction *biv;
3088
3089 for (biv = bl->biv; biv; biv = biv->next_iv)
3090 if (biv->insn == insn)
3091 {
3092 start_sequence ();
3093 tem = expand_binop (GET_MODE (tem), sub_optab, tem,
3094 biv->add_val, NULL_RTX, 0,
3095 OPTAB_LIB_WIDEN);
3096 seq = gen_sequence ();
3097 end_sequence ();
3098 emit_insn_before (seq, insert_before);
3099 }
3100 }
3101
3102 /* Now calculate the giv's final value. */
3103 emit_iv_add_mult (tem, v->mult_val, v->add_val, tem,
3104 insert_before);
3105
3106 if (loop_dump_stream)
3107 fprintf (loop_dump_stream,
3108 "Final giv value for %d, calc from biv's value.\n",
3109 REGNO (v->dest_reg));
3110
3111 return tem;
3112 }
3113 }
3114
3115 /* Replaceable giv's should never reach here. */
3116 if (v->replaceable)
3117 abort ();
3118
3119 /* Check to see if the biv is dead at all loop exits. */
3120 if (reg_dead_after_loop (v->dest_reg, loop_start, loop_end))
3121 {
3122 if (loop_dump_stream)
3123 fprintf (loop_dump_stream,
3124 "Final giv value for %d, giv dead after loop exit.\n",
3125 REGNO (v->dest_reg));
3126
3127 return const0_rtx;
3128 }
3129
3130 return 0;
3131 }
3132
3133
3134 /* Calculate the number of loop iterations. Returns the exact number of loop
3135 iterations if it can be calculated, otherwise returns zero. */
3136
3137 unsigned HOST_WIDE_INT
3138 loop_iterations (loop_start, loop_end)
3139 rtx loop_start, loop_end;
3140 {
3141 rtx comparison, comparison_value;
3142 rtx iteration_var, initial_value, increment, final_value;
3143 enum rtx_code comparison_code;
3144 HOST_WIDE_INT i;
3145 int increment_dir;
3146 int unsigned_compare, compare_dir, final_larger;
3147 unsigned long tempu;
3148 rtx last_loop_insn;
3149
3150 /* First find the iteration variable. If the last insn is a conditional
3151 branch, and the insn before tests a register value, make that the
3152 iteration variable. */
3153
3154 loop_initial_value = 0;
3155 loop_increment = 0;
3156 loop_final_value = 0;
3157 loop_iteration_var = 0;
3158
3159 /* We used to use pren_nonnote_insn here, but that fails because it might
3160 accidentally get the branch for a contained loop if the branch for this
3161 loop was deleted. We can only trust branches immediately before the
3162 loop_end. */
3163 last_loop_insn = PREV_INSN (loop_end);
3164
3165 comparison = get_condition_for_loop (last_loop_insn);
3166 if (comparison == 0)
3167 {
3168 if (loop_dump_stream)
3169 fprintf (loop_dump_stream,
3170 "Loop unrolling: No final conditional branch found.\n");
3171 return 0;
3172 }
3173
3174 /* ??? Get_condition may switch position of induction variable and
3175 invariant register when it canonicalizes the comparison. */
3176
3177 comparison_code = GET_CODE (comparison);
3178 iteration_var = XEXP (comparison, 0);
3179 comparison_value = XEXP (comparison, 1);
3180
3181 if (GET_CODE (iteration_var) != REG)
3182 {
3183 if (loop_dump_stream)
3184 fprintf (loop_dump_stream,
3185 "Loop unrolling: Comparison not against register.\n");
3186 return 0;
3187 }
3188
3189 /* Loop iterations is always called before any new registers are created
3190 now, so this should never occur. */
3191
3192 if (REGNO (iteration_var) >= max_reg_before_loop)
3193 abort ();
3194
3195 iteration_info (iteration_var, &initial_value, &increment,
3196 loop_start, loop_end);
3197 if (initial_value == 0)
3198 /* iteration_info already printed a message. */
3199 return 0;
3200
3201 /* If the comparison value is an invariant register, then try to find
3202 its value from the insns before the start of the loop. */
3203
3204 if (GET_CODE (comparison_value) == REG && invariant_p (comparison_value))
3205 {
3206 rtx insn, set;
3207
3208 for (insn = PREV_INSN (loop_start); insn ; insn = PREV_INSN (insn))
3209 {
3210 if (GET_CODE (insn) == CODE_LABEL)
3211 break;
3212
3213 else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
3214 && reg_set_p (comparison_value, insn))
3215 {
3216 /* We found the last insn before the loop that sets the register.
3217 If it sets the entire register, and has a REG_EQUAL note,
3218 then use the value of the REG_EQUAL note. */
3219 if ((set = single_set (insn))
3220 && (SET_DEST (set) == comparison_value))
3221 {
3222 rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3223
3224 /* Only use the REG_EQUAL note if it is a constant.
3225 Other things, divide in particular, will cause
3226 problems later if we use them. */
3227 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
3228 && CONSTANT_P (XEXP (note, 0)))
3229 comparison_value = XEXP (note, 0);
3230 }
3231 break;
3232 }
3233 }
3234 }
3235
3236 final_value = approx_final_value (comparison_code, comparison_value,
3237 &unsigned_compare, &compare_dir);
3238
3239 /* Save the calculated values describing this loop's bounds, in case
3240 precondition_loop_p will need them later. These values can not be
3241 recalculated inside precondition_loop_p because strength reduction
3242 optimizations may obscure the loop's structure. */
3243
3244 loop_iteration_var = iteration_var;
3245 loop_initial_value = initial_value;
3246 loop_increment = increment;
3247 loop_final_value = final_value;
3248
3249 if (increment == 0)
3250 {
3251 if (loop_dump_stream)
3252 fprintf (loop_dump_stream,
3253 "Loop unrolling: Increment value can't be calculated.\n");
3254 return 0;
3255 }
3256 else if (GET_CODE (increment) != CONST_INT)
3257 {
3258 if (loop_dump_stream)
3259 fprintf (loop_dump_stream,
3260 "Loop unrolling: Increment value not constant.\n");
3261 return 0;
3262 }
3263 else if (GET_CODE (initial_value) != CONST_INT)
3264 {
3265 if (loop_dump_stream)
3266 fprintf (loop_dump_stream,
3267 "Loop unrolling: Initial value not constant.\n");
3268 return 0;
3269 }
3270 else if (final_value == 0)
3271 {
3272 if (loop_dump_stream)
3273 fprintf (loop_dump_stream,
3274 "Loop unrolling: EQ comparison loop.\n");
3275 return 0;
3276 }
3277 else if (GET_CODE (final_value) != CONST_INT)
3278 {
3279 if (loop_dump_stream)
3280 fprintf (loop_dump_stream,
3281 "Loop unrolling: Final value not constant.\n");
3282 return 0;
3283 }
3284
3285 /* ?? Final value and initial value do not have to be constants.
3286 Only their difference has to be constant. When the iteration variable
3287 is an array address, the final value and initial value might both
3288 be addresses with the same base but different constant offsets.
3289 Final value must be invariant for this to work.
3290
3291 To do this, need some way to find the values of registers which are
3292 invariant. */
3293
3294 /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
3295 if (unsigned_compare)
3296 final_larger
3297 = ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3298 > (unsigned HOST_WIDE_INT) INTVAL (initial_value))
3299 - ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3300 < (unsigned HOST_WIDE_INT) INTVAL (initial_value));
3301 else
3302 final_larger = (INTVAL (final_value) > INTVAL (initial_value))
3303 - (INTVAL (final_value) < INTVAL (initial_value));
3304
3305 if (INTVAL (increment) > 0)
3306 increment_dir = 1;
3307 else if (INTVAL (increment) == 0)
3308 increment_dir = 0;
3309 else
3310 increment_dir = -1;
3311
3312 /* There are 27 different cases: compare_dir = -1, 0, 1;
3313 final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
3314 There are 4 normal cases, 4 reverse cases (where the iteration variable
3315 will overflow before the loop exits), 4 infinite loop cases, and 15
3316 immediate exit (0 or 1 iteration depending on loop type) cases.
3317 Only try to optimize the normal cases. */
3318
3319 /* (compare_dir/final_larger/increment_dir)
3320 Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
3321 Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
3322 Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
3323 Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
3324
3325 /* ?? If the meaning of reverse loops (where the iteration variable
3326 will overflow before the loop exits) is undefined, then could
3327 eliminate all of these special checks, and just always assume
3328 the loops are normal/immediate/infinite. Note that this means
3329 the sign of increment_dir does not have to be known. Also,
3330 since it does not really hurt if immediate exit loops or infinite loops
3331 are optimized, then that case could be ignored also, and hence all
3332 loops can be optimized.
3333
3334 According to ANSI Spec, the reverse loop case result is undefined,
3335 because the action on overflow is undefined.
3336
3337 See also the special test for NE loops below. */
3338
3339 if (final_larger == increment_dir && final_larger != 0
3340 && (final_larger == compare_dir || compare_dir == 0))
3341 /* Normal case. */
3342 ;
3343 else
3344 {
3345 if (loop_dump_stream)
3346 fprintf (loop_dump_stream,
3347 "Loop unrolling: Not normal loop.\n");
3348 return 0;
3349 }
3350
3351 /* Calculate the number of iterations, final_value is only an approximation,
3352 so correct for that. Note that tempu and loop_n_iterations are
3353 unsigned, because they can be as large as 2^n - 1. */
3354
3355 i = INTVAL (increment);
3356 if (i > 0)
3357 tempu = INTVAL (final_value) - INTVAL (initial_value);
3358 else if (i < 0)
3359 {
3360 tempu = INTVAL (initial_value) - INTVAL (final_value);
3361 i = -i;
3362 }
3363 else
3364 abort ();
3365
3366 /* For NE tests, make sure that the iteration variable won't miss the
3367 final value. If tempu mod i is not zero, then the iteration variable
3368 will overflow before the loop exits, and we can not calculate the
3369 number of iterations. */
3370 if (compare_dir == 0 && (tempu % i) != 0)
3371 return 0;
3372
3373 return tempu / i + ((tempu % i) != 0);
3374 }
3375
3376 /* Replace uses of split bivs with their split psuedo register. This is
3377 for original instructions which remain after loop unrolling without
3378 copying. */
3379
3380 static rtx
3381 remap_split_bivs (x)
3382 rtx x;
3383 {
3384 register enum rtx_code code;
3385 register int i;
3386 register char *fmt;
3387
3388 if (x == 0)
3389 return x;
3390
3391 code = GET_CODE (x);
3392 switch (code)
3393 {
3394 case SCRATCH:
3395 case PC:
3396 case CC0:
3397 case CONST_INT:
3398 case CONST_DOUBLE:
3399 case CONST:
3400 case SYMBOL_REF:
3401 case LABEL_REF:
3402 return x;
3403
3404 case REG:
3405 #if 0
3406 /* If non-reduced/final-value givs were split, then this would also
3407 have to remap those givs also. */
3408 #endif
3409 if (REGNO (x) < max_reg_before_loop
3410 && reg_iv_type[REGNO (x)] == BASIC_INDUCT)
3411 return reg_biv_class[REGNO (x)]->biv->src_reg;
3412 }
3413
3414 fmt = GET_RTX_FORMAT (code);
3415 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3416 {
3417 if (fmt[i] == 'e')
3418 XEXP (x, i) = remap_split_bivs (XEXP (x, i));
3419 if (fmt[i] == 'E')
3420 {
3421 register int j;
3422 for (j = 0; j < XVECLEN (x, i); j++)
3423 XVECEXP (x, i, j) = remap_split_bivs (XVECEXP (x, i, j));
3424 }
3425 }
3426 return x;
3427 }