gcc/unroll.c

   1 /* Try to unroll loops, and split induction variables.
   2    Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
   3    Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* Try to unroll a loop, and split induction variables.
  23
  24    Loops for which the number of iterations can be calculated exactly are
  25    handled specially.  If the number of iterations times the insn_count is
  26    less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
  27    Otherwise, we try to unroll the loop a number of times modulo the number
  28    of iterations, so that only one exit test will be needed.  It is unrolled
  29    a number of times approximately equal to MAX_UNROLLED_INSNS divided by
  30    the insn count.
  31
  32    Otherwise, if the number of iterations can be calculated exactly at
  33    run time, and the loop is always entered at the top, then we try to
  34    precondition the loop.  That is, at run time, calculate how many times
  35    the loop will execute, and then execute the loop body a few times so
  36    that the remaining iterations will be some multiple of 4 (or 2 if the
  37    loop is large).  Then fall through to a loop unrolled 4 (or 2) times,
  38    with only one exit test needed at the end of the loop.
  39
  40    Otherwise, if the number of iterations can not be calculated exactly,
  41    not even at run time, then we still unroll the loop a number of times
  42    approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
  43    but there must be an exit test after each copy of the loop body.
  44
  45    For each induction variable, which is dead outside the loop (replaceable)
  46    or for which we can easily calculate the final value, if we can easily
  47    calculate its value at each place where it is set as a function of the
  48    current loop unroll count and the variable's value at loop entry, then
  49    the induction variable is split into `N' different variables, one for
  50    each copy of the loop body.  One variable is live across the backward
  51    branch, and the others are all calculated as a function of this variable.
  52    This helps eliminate data dependencies, and leads to further opportunities
  53    for cse.  */
  54
  55 /* Possible improvements follow:  */
  56
  57 /* ??? Add an extra pass somewhere to determine whether unrolling will
  58    give any benefit.  E.g. after generating all unrolled insns, compute the
  59    cost of all insns and compare against cost of insns in rolled loop.
  60
  61    - On traditional architectures, unrolling a non-constant bound loop
  62      is a win if there is a giv whose only use is in memory addresses, the
  63      memory addresses can be split, and hence giv increments can be
  64      eliminated.
  65    - It is also a win if the loop is executed many times, and preconditioning
  66      can be performed for the loop.
  67    Add code to check for these and similar cases.  */
  68
  69 /* ??? Improve control of which loops get unrolled.  Could use profiling
  70    info to only unroll the most commonly executed loops.  Perhaps have
  71    a user specifyable option to control the amount of code expansion,
  72    or the percent of loops to consider for unrolling.  Etc.  */
  73
  74 /* ??? Look at the register copies inside the loop to see if they form a
  75    simple permutation.  If so, iterate the permutation until it gets back to
  76    the start state.  This is how many times we should unroll the loop, for
  77    best results, because then all register copies can be eliminated.
  78    For example, the lisp nreverse function should be unrolled 3 times
  79    while (this)
  80      {
  81        next = this->cdr;
  82        this->cdr = prev;
  83        prev = this;
  84        this = next;
  85      }
  86
  87    ??? The number of times to unroll the loop may also be based on data
  88    references in the loop.  For example, if we have a loop that references
  89    x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times.  */
  90
  91 /* ??? Add some simple linear equation solving capability so that we can
  92    determine the number of loop iterations for more complex loops.
  93    For example, consider this loop from gdb
  94    #define SWAP_TARGET_AND_HOST(buffer,len)
  95      {
  96        char tmp;
  97        char *p = (char *) buffer;
  98        char *q = ((char *) buffer) + len - 1;
  99        int iterations = (len + 1) >> 1;
 100        int i;
 101        for (p; p < q; p++, q--;)
 102          {
 103            tmp = *q;
 104            *q = *p;
 105            *p = tmp;
 106          }
 107      }
 108    Note that:
 109      start value = p = &buffer + current_iteration
 110      end value   = q = &buffer + len - 1 - current_iteration
 111    Given the loop exit test of "p < q", then there must be "q - p" iterations,
 112    set equal to zero and solve for number of iterations:
 113      q - p = len - 1 - 2*current_iteration = 0
 114      current_iteration = (len - 1) / 2
 115    Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
 116    iterations of this loop.  */
 117
 118 /* ??? Currently, no labels are marked as loop invariant when doing loop
 119    unrolling.  This is because an insn inside the loop, that loads the address
 120    of a label inside the loop into a register, could be moved outside the loop
 121    by the invariant code motion pass if labels were invariant.  If the loop
 122    is subsequently unrolled, the code will be wrong because each unrolled
 123    body of the loop will use the same address, whereas each actually needs a
 124    different address.  A case where this happens is when a loop containing
 125    a switch statement is unrolled.
 126
 127    It would be better to let labels be considered invariant.  When we
 128    unroll loops here, check to see if any insns using a label local to the
 129    loop were moved before the loop.  If so, then correct the problem, by
 130    moving the insn back into the loop, or perhaps replicate the insn before
 131    the loop, one copy for each time the loop is unrolled.  */
 132
 133 /* The prime factors looked for when trying to unroll a loop by some
 134    number which is modulo the total number of iterations.  Just checking
 135    for these 4 prime factors will find at least one factor for 75% of
 136    all numbers theoretically.  Practically speaking, this will succeed
 137    almost all of the time since loops are generally a multiple of 2
 138    and/or 5.  */
 139
 140 #define NUM_FACTORS 4
 141
 142 struct _factor { int factor, count; } factors[NUM_FACTORS]
 143   = { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
 144
 145 /* Describes the different types of loop unrolling performed.  */
 146
 147 enum unroll_types { UNROLL_COMPLETELY, UNROLL_MODULO, UNROLL_NAIVE };
 148
 149 #include "config.h"
 150 #include "rtl.h"
 151 #include "insn-config.h"
 152 #include "integrate.h"
 153 #include "regs.h"
 154 #include "flags.h"
 155 #include "expr.h"
 156 #include <stdio.h>
 157 #include "loop.h"
 158
 159 /* This controls which loops are unrolled, and by how much we unroll
 160    them.  */
 161
 162 #ifndef MAX_UNROLLED_INSNS
 163 #define MAX_UNROLLED_INSNS 100
 164 #endif
 165
 166 /* Indexed by register number, if non-zero, then it contains a pointer
 167    to a struct induction for a DEST_REG giv which has been combined with
 168    one of more address givs.  This is needed because whenever such a DEST_REG
 169    giv is modified, we must modify the value of all split address givs
 170    that were combined with this DEST_REG giv.  */
 171
 172 static struct induction **addr_combined_regs;
 173
 174 /* Indexed by register number, if this is a splittable induction variable,
 175    then this will hold the current value of the register, which depends on the
 176    iteration number.  */
 177
 178 static rtx *splittable_regs;
 179
 180 /* Indexed by register number, if this is a splittable induction variable,
 181    then this will hold the number of instructions in the loop that modify
 182    the induction variable.  Used to ensure that only the last insn modifying
 183    a split iv will update the original iv of the dest.  */
 184
 185 static int *splittable_regs_updates;
 186
 187 /* Values describing the current loop's iteration variable.  These are set up
 188    by loop_iterations, and used by precondition_loop_p.  */
 189
 190 static rtx loop_iteration_var;
 191 static rtx loop_initial_value;
 192 static rtx loop_increment;
 193 static rtx loop_final_value;
 194
 195 /* Forward declarations.  */
 196
 197 static void init_reg_map PROTO((struct inline_remap *, int));
 198 static int precondition_loop_p PROTO((rtx *, rtx *, rtx *, rtx, rtx));
 199 static rtx calculate_giv_inc PROTO((rtx, rtx, int));
 200 static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *));
 201 static void final_reg_note_copy PROTO((rtx, struct inline_remap *));
 202 static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int,
 203                                   enum unroll_types, rtx, rtx, rtx, rtx));
 204 static void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
 205 static rtx approx_final_value PROTO((enum rtx_code, rtx, int *, int *));
 206 static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int));
 207 static int find_splittable_givs PROTO((struct iv_class *,enum unroll_types,
 208                                        rtx, rtx, rtx, int));
 209 static int reg_dead_after_loop PROTO((rtx, rtx, rtx));
 210 static rtx fold_rtx_mult_add PROTO((rtx, rtx, rtx, enum machine_mode));
 211 static rtx remap_split_bivs PROTO((rtx));
 212
 213 /* Try to unroll one loop and split induction variables in the loop.
 214
 215    The loop is described by the arguments LOOP_END, INSN_COUNT, and
 216    LOOP_START.  END_INSERT_BEFORE indicates where insns should be added
 217    which need to be executed when the loop falls through.  STRENGTH_REDUCTION_P
 218    indicates whether information generated in the strength reduction pass
 219    is available.
 220
 221    This function is intended to be called from within `strength_reduce'
 222    in loop.c.  */
 223
 224 void
 225 unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
 226              strength_reduce_p)
 227      rtx loop_end;
 228      int insn_count;
 229      rtx loop_start;
 230      rtx end_insert_before;
 231      int strength_reduce_p;
 232 {
 233   int i, j, temp;
 234   int unroll_number = 1;
 235   rtx copy_start, copy_end;
 236   rtx insn, copy, sequence, pattern, tem;
 237   int max_labelno, max_insnno;
 238   rtx insert_before;
 239   struct inline_remap *map;
 240   char *local_label;
 241   char *local_regno;
 242   int maxregnum;
 243   int new_maxregnum;
 244   rtx exit_label = 0;
 245   rtx start_label;
 246   struct iv_class *bl;
 247   int splitting_not_safe = 0;
 248   enum unroll_types unroll_type;
 249   int loop_preconditioned = 0;
 250   rtx safety_label;
 251   /* This points to the last real insn in the loop, which should be either
 252      a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
 253      jumps).  */
 254   rtx last_loop_insn;
 255
 256   /* Don't bother unrolling huge loops.  Since the minimum factor is
 257      two, loops greater than one half of MAX_UNROLLED_INSNS will never
 258      be unrolled.  */
 259   if (insn_count > MAX_UNROLLED_INSNS / 2)
 260     {
 261       if (loop_dump_stream)
 262         fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
 263       return;
 264     }
 265
 266   /* When emitting debugger info, we can't unroll loops with unequal numbers
 267      of block_beg and block_end notes, because that would unbalance the block
 268      structure of the function.  This can happen as a result of the
 269      "if (foo) bar; else break;" optimization in jump.c.  */
 270
 271   if (write_symbols != NO_DEBUG)
 272     {
 273       int block_begins = 0;
 274       int block_ends = 0;
 275
 276       for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn))
 277         {
 278           if (GET_CODE (insn) == NOTE)
 279             {
 280               if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
 281                 block_begins++;
 282               else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
 283                 block_ends++;
 284             }
 285         }
 286
 287       if (block_begins != block_ends)
 288         {
 289           if (loop_dump_stream)
 290             fprintf (loop_dump_stream,
 291                      "Unrolling failure: Unbalanced block notes.\n");
 292           return;
 293         }
 294     }
 295
 296   /* Determine type of unroll to perform.  Depends on the number of iterations
 297      and the size of the loop.  */
 298
 299   /* If there is no strength reduce info, then set loop_n_iterations to zero.
 300      This can happen if strength_reduce can't find any bivs in the loop.
 301      A value of zero indicates that the number of iterations could not be
 302      calculated.  */
 303
 304   if (! strength_reduce_p)
 305     loop_n_iterations = 0;
 306
 307   if (loop_dump_stream && loop_n_iterations > 0)
 308     fprintf (loop_dump_stream,
 309              "Loop unrolling: %d iterations.\n", loop_n_iterations);
 310
 311   /* Find and save a pointer to the last nonnote insn in the loop.  */
 312
 313   last_loop_insn = prev_nonnote_insn (loop_end);
 314
 315   /* Calculate how many times to unroll the loop.  Indicate whether or
 316      not the loop is being completely unrolled.  */
 317
 318   if (loop_n_iterations == 1)
 319     {
 320       /* If number of iterations is exactly 1, then eliminate the compare and
 321          branch at the end of the loop since they will never be taken.
 322          Then return, since no other action is needed here.  */
 323
 324       /* If the last instruction is not a BARRIER or a JUMP_INSN, then
 325          don't do anything.  */
 326
 327       if (GET_CODE (last_loop_insn) == BARRIER)
 328         {
 329           /* Delete the jump insn.  This will delete the barrier also.  */
 330           delete_insn (PREV_INSN (last_loop_insn));
 331         }
 332       else if (GET_CODE (last_loop_insn) == JUMP_INSN)
 333         {
 334 #ifdef HAVE_cc0
 335           /* The immediately preceding insn is a compare which must be
 336              deleted.  */
 337           delete_insn (last_loop_insn);
 338           delete_insn (PREV_INSN (last_loop_insn));
 339 #else
 340           /* The immediately preceding insn may not be the compare, so don't
 341              delete it.  */
 342           delete_insn (last_loop_insn);
 343 #endif
 344         }
 345       return;
 346     }
 347   else if (loop_n_iterations > 0
 348       && loop_n_iterations * insn_count < MAX_UNROLLED_INSNS)
 349     {
 350       unroll_number = loop_n_iterations;
 351       unroll_type = UNROLL_COMPLETELY;
 352     }
 353   else if (loop_n_iterations > 0)
 354     {
 355       /* Try to factor the number of iterations.  Don't bother with the
 356          general case, only using 2, 3, 5, and 7 will get 75% of all
 357          numbers theoretically, and almost all in practice.  */
 358
 359       for (i = 0; i < NUM_FACTORS; i++)
 360         factors[i].count = 0;
 361
 362       temp = loop_n_iterations;
 363       for (i = NUM_FACTORS - 1; i >= 0; i--)
 364         while (temp % factors[i].factor == 0)
 365           {
 366             factors[i].count++;
 367             temp = temp / factors[i].factor;
 368           }
 369
 370       /* Start with the larger factors first so that we generally
 371          get lots of unrolling.  */
 372
 373       unroll_number = 1;
 374       temp = insn_count;
 375       for (i = 3; i >= 0; i--)
 376         while (factors[i].count--)
 377           {
 378             if (temp * factors[i].factor < MAX_UNROLLED_INSNS)
 379               {
 380                 unroll_number *= factors[i].factor;
 381                 temp *= factors[i].factor;
 382               }
 383             else
 384               break;
 385           }
 386
 387       /* If we couldn't find any factors, then unroll as in the normal
 388          case.  */
 389       if (unroll_number == 1)
 390         {
 391           if (loop_dump_stream)
 392             fprintf (loop_dump_stream,
 393                      "Loop unrolling: No factors found.\n");
 394         }
 395       else
 396         unroll_type = UNROLL_MODULO;
 397     }
 398
 399
 400   /* Default case, calculate number of times to unroll loop based on its
 401      size.  */
 402   if (unroll_number == 1)
 403     {
 404       if (8 * insn_count < MAX_UNROLLED_INSNS)
 405         unroll_number = 8;
 406       else if (4 * insn_count < MAX_UNROLLED_INSNS)
 407         unroll_number = 4;
 408       else
 409         unroll_number = 2;
 410
 411       unroll_type = UNROLL_NAIVE;
 412     }
 413
 414   /* Now we know how many times to unroll the loop.  */
 415
 416   if (loop_dump_stream)
 417     fprintf (loop_dump_stream,
 418              "Unrolling loop %d times.\n", unroll_number);
 419
 420
 421   if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO)
 422     {
 423       /* Loops of these types should never start with a jump down to
 424          the exit condition test.  For now, check for this case just to
 425          be sure.  UNROLL_NAIVE loops can be of this form, this case is
 426          handled below.  */
 427       insn = loop_start;
 428       while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
 429         insn = NEXT_INSN (insn);
 430       if (GET_CODE (insn) == JUMP_INSN)
 431         abort ();
 432     }
 433
 434   if (unroll_type == UNROLL_COMPLETELY)
 435     {
 436       /* Completely unrolling the loop:  Delete the compare and branch at
 437          the end (the last two instructions).   This delete must done at the
 438          very end of loop unrolling, to avoid problems with calls to
 439          back_branch_in_range_p, which is called by find_splittable_regs.
 440          All increments of splittable bivs/givs are changed to load constant
 441          instructions.  */
 442
 443       copy_start = loop_start;
 444
 445       /* Set insert_before to the instruction immediately after the JUMP_INSN
 446          (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
 447          the loop will be correctly handled by copy_loop_body.  */
 448       insert_before = NEXT_INSN (last_loop_insn);
 449
 450       /* Set copy_end to the insn before the jump at the end of the loop.  */
 451       if (GET_CODE (last_loop_insn) == BARRIER)
 452         copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
 453       else if (GET_CODE (last_loop_insn) == JUMP_INSN)
 454         {
 455 #ifdef HAVE_cc0
 456           /* The instruction immediately before the JUMP_INSN is a compare
 457              instruction which we do not want to copy.  */
 458           copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
 459 #else
 460           /* The instruction immediately before the JUMP_INSN may not be the
 461              compare, so we must copy it.  */
 462           copy_end = PREV_INSN (last_loop_insn);
 463 #endif
 464         }
 465       else
 466         {
 467           /* We currently can't unroll a loop if it doesn't end with a
 468              JUMP_INSN.  There would need to be a mechanism that recognizes
 469              this case, and then inserts a jump after each loop body, which
 470              jumps to after the last loop body.  */
 471           if (loop_dump_stream)
 472             fprintf (loop_dump_stream,
 473                      "Unrolling failure: loop does not end with a JUMP_INSN.\n");
 474           return;
 475         }
 476     }
 477   else if (unroll_type == UNROLL_MODULO)
 478     {
 479       /* Partially unrolling the loop:  The compare and branch at the end
 480          (the last two instructions) must remain.  Don't copy the compare
 481          and branch instructions at the end of the loop.  Insert the unrolled
 482          code immediately before the compare/branch at the end so that the
 483          code will fall through to them as before.  */
 484
 485       copy_start = loop_start;
 486
 487       /* Set insert_before to the jump insn at the end of the loop.
 488          Set copy_end to before the jump insn at the end of the loop.  */
 489       if (GET_CODE (last_loop_insn) == BARRIER)
 490         {
 491           insert_before = PREV_INSN (last_loop_insn);
 492           copy_end = PREV_INSN (insert_before);
 493         }
 494       else if (GET_CODE (last_loop_insn) == JUMP_INSN)
 495         {
 496 #ifdef HAVE_cc0
 497           /* The instruction immediately before the JUMP_INSN is a compare
 498              instruction which we do not want to copy or delete.  */
 499           insert_before = PREV_INSN (last_loop_insn);
 500           copy_end = PREV_INSN (insert_before);
 501 #else
 502           /* The instruction immediately before the JUMP_INSN may not be the
 503              compare, so we must copy it.  */
 504           insert_before = last_loop_insn;
 505           copy_end = PREV_INSN (last_loop_insn);
 506 #endif
 507         }
 508       else
 509         {
 510           /* We currently can't unroll a loop if it doesn't end with a
 511              JUMP_INSN.  There would need to be a mechanism that recognizes
 512              this case, and then inserts a jump after each loop body, which
 513              jumps to after the last loop body.  */
 514           if (loop_dump_stream)
 515             fprintf (loop_dump_stream,
 516                      "Unrolling failure: loop does not end with a JUMP_INSN.\n");
 517           return;
 518         }
 519     }
 520   else
 521     {
 522       /* Normal case: Must copy the compare and branch instructions at the
 523          end of the loop.  */
 524
 525       if (GET_CODE (last_loop_insn) == BARRIER)
 526         {
 527           /* Loop ends with an unconditional jump and a barrier.
 528              Handle this like above, don't copy jump and barrier.
 529              This is not strictly necessary, but doing so prevents generating
 530              unconditional jumps to an immediately following label.
 531
 532              This will be corrected below if the target of this jump is
 533              not the start_label.  */
 534
 535           insert_before = PREV_INSN (last_loop_insn);
 536           copy_end = PREV_INSN (insert_before);
 537         }
 538       else if (GET_CODE (last_loop_insn) == JUMP_INSN)
 539         {
 540           /* Set insert_before to immediately after the JUMP_INSN, so that
 541              NOTEs at the end of the loop will be correctly handled by
 542              copy_loop_body.  */
 543           insert_before = NEXT_INSN (last_loop_insn);
 544           copy_end = last_loop_insn;
 545         }
 546       else
 547         {
 548           /* We currently can't unroll a loop if it doesn't end with a
 549              JUMP_INSN.  There would need to be a mechanism that recognizes
 550              this case, and then inserts a jump after each loop body, which
 551              jumps to after the last loop body.  */
 552           if (loop_dump_stream)
 553             fprintf (loop_dump_stream,
 554                      "Unrolling failure: loop does not end with a JUMP_INSN.\n");
 555           return;
 556         }
 557
 558       /* If copying exit test branches because they can not be eliminated,
 559          then must convert the fall through case of the branch to a jump past
 560          the end of the loop.  Create a label to emit after the loop and save
 561          it for later use.  Do not use the label after the loop, if any, since
 562          it might be used by insns outside the loop, or there might be insns
 563          added before it later by final_[bg]iv_value which must be after
 564          the real exit label.  */
 565       exit_label = gen_label_rtx ();
 566
 567       insn = loop_start;
 568       while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
 569         insn = NEXT_INSN (insn);
 570
 571       if (GET_CODE (insn) == JUMP_INSN)
 572         {
 573           /* The loop starts with a jump down to the exit condition test.
 574              Start copying the loop after the barrier following this
 575              jump insn.  */
 576           copy_start = NEXT_INSN (insn);
 577
 578           /* Splitting induction variables doesn't work when the loop is
 579              entered via a jump to the bottom, because then we end up doing
 580              a comparison against a new register for a split variable, but
 581              we did not execute the set insn for the new register because
 582              it was skipped over.  */
 583           splitting_not_safe = 1;
 584           if (loop_dump_stream)
 585             fprintf (loop_dump_stream,
 586                      "Splitting not safe, because loop not entered at top.\n");
 587         }
 588       else
 589         copy_start = loop_start;
 590     }
 591
 592   /* This should always be the first label in the loop.  */
 593   start_label = NEXT_INSN (copy_start);
 594   /* There may be a line number note and/or a loop continue note here.  */
 595   while (GET_CODE (start_label) == NOTE)
 596     start_label = NEXT_INSN (start_label);
 597   if (GET_CODE (start_label) != CODE_LABEL)
 598     {
 599       /* This can happen as a result of jump threading.  If the first insns in
 600          the loop test the same condition as the loop's backward jump, or the
 601          opposite condition, then the backward jump will be modified to point
 602          to elsewhere, and the loop's start label is deleted.
 603
 604          This case currently can not be handled by the loop unrolling code.  */
 605
 606       if (loop_dump_stream)
 607         fprintf (loop_dump_stream,
 608                  "Unrolling failure: unknown insns between BEG note and loop label.\n");
 609       return;
 610     }
 611   if (LABEL_NAME (start_label))
 612     {
 613       /* The jump optimization pass must have combined the original start label
 614          with a named label for a goto.  We can't unroll this case because
 615          jumps which go to the named label must be handled differently than
 616          jumps to the loop start, and it is impossible to differentiate them
 617          in this case.  */
 618       if (loop_dump_stream)
 619         fprintf (loop_dump_stream,
 620                  "Unrolling failure: loop start label is gone\n");
 621       return;
 622     }
 623
 624   if (unroll_type == UNROLL_NAIVE
 625       && GET_CODE (last_loop_insn) == BARRIER
 626       && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
 627     {
 628       /* In this case, we must copy the jump and barrier, because they will
 629          not be converted to jumps to an immediately following label.  */
 630
 631       insert_before = NEXT_INSN (last_loop_insn);
 632       copy_end = last_loop_insn;
 633     }
 634
 635   /* Allocate a translation table for the labels and insn numbers.
 636      They will be filled in as we copy the insns in the loop.  */
 637
 638   max_labelno = max_label_num ();
 639   max_insnno = get_max_uid ();
 640
 641   map = (struct inline_remap *) alloca (sizeof (struct inline_remap));
 642
 643   map->integrating = 0;
 644
 645   /* Allocate the label map.  */
 646
 647   if (max_labelno > 0)
 648     {
 649       map->label_map = (rtx *) alloca (max_labelno * sizeof (rtx));
 650
 651       local_label = (char *) alloca (max_labelno);
 652       bzero (local_label, max_labelno);
 653     }
 654   else
 655     map->label_map = 0;
 656
 657   /* Search the loop and mark all local labels, i.e. the ones which have to
 658      be distinct labels when copied.  For all labels which might be
 659      non-local, set their label_map entries to point to themselves.
 660      If they happen to be local their label_map entries will be overwritten
 661      before the loop body is copied.  The label_map entries for local labels
 662      will be set to a different value each time the loop body is copied.  */
 663
 664   for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
 665     {
 666       if (GET_CODE (insn) == CODE_LABEL)
 667         local_label[CODE_LABEL_NUMBER (insn)] = 1;
 668       else if (GET_CODE (insn) == JUMP_INSN)
 669         {
 670           if (JUMP_LABEL (insn))
 671             map->label_map[CODE_LABEL_NUMBER (JUMP_LABEL (insn))]
 672               = JUMP_LABEL (insn);
 673           else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
 674                    || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
 675             {
 676               rtx pat = PATTERN (insn);
 677               int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
 678               int len = XVECLEN (pat, diff_vec_p);
 679               rtx label;
 680
 681               for (i = 0; i < len; i++)
 682                 {
 683                   label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
 684                   map->label_map[CODE_LABEL_NUMBER (label)] = label;
 685                 }
 686             }
 687         }
 688     }
 689
 690   /* Allocate space for the insn map.  */
 691
 692   map->insn_map = (rtx *) alloca (max_insnno * sizeof (rtx));
 693
 694   /* Set this to zero, to indicate that we are doing loop unrolling,
 695      not function inlining.  */
 696   map->inline_target = 0;
 697
 698   /* The register and constant maps depend on the number of registers
 699      present, so the final maps can't be created until after
 700      find_splittable_regs is called.  However, they are needed for
 701      preconditioning, so we create temporary maps when preconditioning
 702      is performed.  */
 703
 704   /* The preconditioning code may allocate two new pseudo registers.  */
 705   maxregnum = max_reg_num ();
 706
 707   /* Allocate and zero out the splittable_regs and addr_combined_regs
 708      arrays.  These must be zeroed here because they will be used if
 709      loop preconditioning is performed, and must be zero for that case.
 710
 711      It is safe to do this here, since the extra registers created by the
 712      preconditioning code and find_splittable_regs will never be used
 713      to access the splittable_regs[] and addr_combined_regs[] arrays.  */
 714
 715   splittable_regs = (rtx *) alloca (maxregnum * sizeof (rtx));
 716   bzero ((char *) splittable_regs, maxregnum * sizeof (rtx));
 717   splittable_regs_updates = (int *) alloca (maxregnum * sizeof (int));
 718   bzero ((char *) splittable_regs_updates, maxregnum * sizeof (int));
 719   addr_combined_regs
 720     = (struct induction **) alloca (maxregnum * sizeof (struct induction *));
 721   bzero ((char *) addr_combined_regs, maxregnum * sizeof (struct induction *));
 722   /* We must limit it to max_reg_before_loop, because only these pseudo
 723      registers have valid regno_first_uid info.  Any register created after
 724      that is unlikely to be local to the loop anyways.  */
 725   local_regno = (char *) alloca (max_reg_before_loop);
 726   bzero (local_regno, max_reg_before_loop);
 727
 728   /* Mark all local registers, i.e. the ones which are referenced only
 729      inside the loop.  */
 730   if (INSN_UID (copy_end) < max_uid_for_loop)
 731   {
 732     int copy_start_luid = INSN_LUID (copy_start);
 733     int copy_end_luid = INSN_LUID (copy_end);
 734
 735     /* If a register is used in the jump insn, we must not duplicate it
 736        since it will also be used outside the loop.  */
 737     if (GET_CODE (copy_end) == JUMP_INSN)
 738       copy_end_luid--;
 739     /* If copy_start points to the NOTE that starts the loop, then we must
 740        use the next luid, because invariant pseudo-regs moved out of the loop
 741        have their lifetimes modified to start here, but they are not safe
 742        to duplicate.  */
 743     if (copy_start == loop_start)
 744       copy_start_luid++;
 745
 746     /* If a pseudo's lifetime is entirely contained within this loop, then we
 747        can use a different pseudo in each unrolled copy of the loop.  This
 748        results in better code.  */
 749     for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; ++j)
 750       if (regno_first_uid[j] > 0 && regno_first_uid[j] <= max_uid_for_loop
 751           && uid_luid[regno_first_uid[j]] >= copy_start_luid
 752           && regno_last_uid[j] > 0 && regno_last_uid[j] <= max_uid_for_loop
 753           && uid_luid[regno_last_uid[j]] <= copy_end_luid)
 754         {
 755           /* However, we must also check for loop-carried dependencies.
 756              If the value the pseudo has at the end of iteration X is
 757              used by iteration X+1, then we can not use a different pseudo
 758              for each unrolled copy of the loop.  */
 759           /* A pseudo is safe if regno_first_uid is a set, and this
 760              set dominates all instructions from regno_first_uid to
 761              regno_last_uid.  */
 762           /* ??? This check is simplistic.  We would get better code if
 763              this check was more sophisticated.  */
 764           if (set_dominates_use (j, regno_first_uid[j], regno_last_uid[j],
 765                                  copy_start, copy_end))
 766             local_regno[j] = 1;
 767
 768           if (loop_dump_stream)
 769             {
 770               if (local_regno[j])
 771                 fprintf (loop_dump_stream, "Marked reg %d as local\n", j);
 772               else
 773                 fprintf (loop_dump_stream, "Did not mark reg %d as local\n",
 774                          j);
 775             }
 776         }
 777   }
 778
 779   /* If this loop requires exit tests when unrolled, check to see if we
 780      can precondition the loop so as to make the exit tests unnecessary.
 781      Just like variable splitting, this is not safe if the loop is entered
 782      via a jump to the bottom.  Also, can not do this if no strength
 783      reduce info, because precondition_loop_p uses this info.  */
 784
 785   /* Must copy the loop body for preconditioning before the following
 786      find_splittable_regs call since that will emit insns which need to
 787      be after the preconditioned loop copies, but immediately before the
 788      unrolled loop copies.  */
 789
 790   /* Also, it is not safe to split induction variables for the preconditioned
 791      copies of the loop body.  If we split induction variables, then the code
 792      assumes that each induction variable can be represented as a function
 793      of its initial value and the loop iteration number.  This is not true
 794      in this case, because the last preconditioned copy of the loop body
 795      could be any iteration from the first up to the `unroll_number-1'th,
 796      depending on the initial value of the iteration variable.  Therefore
 797      we can not split induction variables here, because we can not calculate
 798      their value.  Hence, this code must occur before find_splittable_regs
 799      is called.  */
 800
 801   if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
 802     {
 803       rtx initial_value, final_value, increment;
 804
 805       if (precondition_loop_p (&initial_value, &final_value, &increment,
 806                                loop_start, loop_end))
 807         {
 808           register rtx diff, temp;
 809           enum machine_mode mode;
 810           rtx *labels;
 811           int abs_inc, neg_inc;
 812
 813           map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
 814
 815           map->const_equiv_map = (rtx *) alloca (maxregnum * sizeof (rtx));
 816           map->const_age_map = (unsigned *) alloca (maxregnum
 817                                                     * sizeof (unsigned));
 818           map->const_equiv_map_size = maxregnum;
 819           global_const_equiv_map = map->const_equiv_map;
 820           global_const_equiv_map_size = maxregnum;
 821
 822           init_reg_map (map, maxregnum);
 823
 824           /* Limit loop unrolling to 4, since this will make 7 copies of
 825              the loop body.  */
 826           if (unroll_number > 4)
 827             unroll_number = 4;
 828
 829           /* Save the absolute value of the increment, and also whether or
 830              not it is negative.  */
 831           neg_inc = 0;
 832           abs_inc = INTVAL (increment);
 833           if (abs_inc < 0)
 834             {
 835               abs_inc = - abs_inc;
 836               neg_inc = 1;
 837             }
 838
 839           start_sequence ();
 840
 841           /* Decide what mode to do these calculations in.  Choose the larger
 842              of final_value's mode and initial_value's mode, or a full-word if
 843              both are constants.  */
 844           mode = GET_MODE (final_value);
 845           if (mode == VOIDmode)
 846             {
 847               mode = GET_MODE (initial_value);
 848               if (mode == VOIDmode)
 849                 mode = word_mode;
 850             }
 851           else if (mode != GET_MODE (initial_value)
 852                    && (GET_MODE_SIZE (mode)
 853                        < GET_MODE_SIZE (GET_MODE (initial_value))))
 854             mode = GET_MODE (initial_value);
 855
 856           /* Calculate the difference between the final and initial values.
 857              Final value may be a (plus (reg x) (const_int 1)) rtx.
 858              Let the following cse pass simplify this if initial value is
 859              a constant.
 860
 861              We must copy the final and initial values here to avoid
 862              improperly shared rtl.  */
 863
 864           diff = expand_binop (mode, sub_optab, copy_rtx (final_value),
 865                                copy_rtx (initial_value), NULL_RTX, 0,
 866                                OPTAB_LIB_WIDEN);
 867
 868           /* Now calculate (diff % (unroll * abs (increment))) by using an
 869              and instruction.  */
 870           diff = expand_binop (GET_MODE (diff), and_optab, diff,
 871                                GEN_INT (unroll_number * abs_inc - 1),
 872                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 873
 874           /* Now emit a sequence of branches to jump to the proper precond
 875              loop entry point.  */
 876
 877           labels = (rtx *) alloca (sizeof (rtx) * unroll_number);
 878           for (i = 0; i < unroll_number; i++)
 879             labels[i] = gen_label_rtx ();
 880
 881           /* Check for the case where the initial value is greater than or equal
 882              to the final value.  In that case, we want to execute exactly
 883              one loop iteration.  The code below will fail for this case.  */
 884
 885           emit_cmp_insn (initial_value, final_value, neg_inc ? LE : GE,
 886                          NULL_RTX, mode, 0, 0);
 887           if (neg_inc)
 888             emit_jump_insn (gen_ble (labels[1]));
 889           else
 890             emit_jump_insn (gen_bge (labels[1]));
 891           JUMP_LABEL (get_last_insn ()) = labels[1];
 892           LABEL_NUSES (labels[1])++;
 893
 894           /* Assuming the unroll_number is 4, and the increment is 2, then
 895              for a negative increment:  for a positive increment:
 896              diff = 0,1   precond 0     diff = 0,7   precond 0
 897              diff = 2,3   precond 3     diff = 1,2   precond 1
 898              diff = 4,5   precond 2     diff = 3,4   precond 2
 899              diff = 6,7   precond 1     diff = 5,6   precond 3  */
 900
 901           /* We only need to emit (unroll_number - 1) branches here, the
 902              last case just falls through to the following code.  */
 903
 904           /* ??? This would give better code if we emitted a tree of branches
 905              instead of the current linear list of branches.  */
 906
 907           for (i = 0; i < unroll_number - 1; i++)
 908             {
 909               int cmp_const;
 910               enum rtx_code cmp_code;
 911
 912               /* For negative increments, must invert the constant compared
 913                  against, except when comparing against zero.  */
 914               if (i == 0)
 915                 {
 916                   cmp_const = 0;
 917                   cmp_code = EQ;
 918                 }
 919               else if (neg_inc)
 920                 {
 921                   cmp_const = unroll_number - i;
 922                   cmp_code = GE;
 923                 }
 924               else
 925                 {
 926                   cmp_const = i;
 927                   cmp_code = LE;
 928                 }
 929
 930               emit_cmp_insn (diff, GEN_INT (abs_inc * cmp_const),
 931                              cmp_code, NULL_RTX, mode, 0, 0);
 932
 933               if (i == 0)
 934                 emit_jump_insn (gen_beq (labels[i]));
 935               else if (neg_inc)
 936                 emit_jump_insn (gen_bge (labels[i]));
 937               else
 938                 emit_jump_insn (gen_ble (labels[i]));
 939               JUMP_LABEL (get_last_insn ()) = labels[i];
 940               LABEL_NUSES (labels[i])++;
 941             }
 942
 943           /* If the increment is greater than one, then we need another branch,
 944              to handle other cases equivalent to 0.  */
 945
 946           /* ??? This should be merged into the code above somehow to help
 947              simplify the code here, and reduce the number of branches emitted.
 948              For the negative increment case, the branch here could easily
 949              be merged with the `0' case branch above.  For the positive
 950              increment case, it is not clear how this can be simplified.  */
 951
 952           if (abs_inc != 1)
 953             {
 954               int cmp_const;
 955               enum rtx_code cmp_code;
 956
 957               if (neg_inc)
 958                 {
 959                   cmp_const = abs_inc - 1;
 960                   cmp_code = LE;
 961                 }
 962               else
 963                 {
 964                   cmp_const = abs_inc * (unroll_number - 1) + 1;
 965                   cmp_code = GE;
 966                 }
 967
 968               emit_cmp_insn (diff, GEN_INT (cmp_const), cmp_code, NULL_RTX,
 969                              mode, 0, 0);
 970
 971               if (neg_inc)
 972                 emit_jump_insn (gen_ble (labels[0]));
 973               else
 974                 emit_jump_insn (gen_bge (labels[0]));
 975               JUMP_LABEL (get_last_insn ()) = labels[0];
 976               LABEL_NUSES (labels[0])++;
 977             }
 978
 979           sequence = gen_sequence ();
 980           end_sequence ();
 981           emit_insn_before (sequence, loop_start);
 982
 983           /* Only the last copy of the loop body here needs the exit
 984              test, so set copy_end to exclude the compare/branch here,
 985              and then reset it inside the loop when get to the last
 986              copy.  */
 987
 988           if (GET_CODE (last_loop_insn) == BARRIER)
 989             copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
 990           else if (GET_CODE (last_loop_insn) == JUMP_INSN)
 991             {
 992 #ifdef HAVE_cc0
 993               /* The immediately preceding insn is a compare which we do not
 994                  want to copy.  */
 995               copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
 996 #else
 997               /* The immediately preceding insn may not be a compare, so we
 998                  must copy it.  */
 999               copy_end = PREV_INSN (last_loop_insn);
1000 #endif
1001             }
1002           else
1003             abort ();
1004
1005           for (i = 1; i < unroll_number; i++)
1006             {
1007               emit_label_after (labels[unroll_number - i],
1008                                 PREV_INSN (loop_start));
1009
1010               bzero ((char *) map->insn_map, max_insnno * sizeof (rtx));
1011               bzero ((char *) map->const_equiv_map, maxregnum * sizeof (rtx));
1012               bzero ((char *) map->const_age_map,
1013                      maxregnum * sizeof (unsigned));
1014               map->const_age = 0;
1015
1016               for (j = 0; j < max_labelno; j++)
1017                 if (local_label[j])
1018                   map->label_map[j] = gen_label_rtx ();
1019
1020               for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++)
1021                 if (local_regno[j])
1022                   map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1023
1024               /* The last copy needs the compare/branch insns at the end,
1025                  so reset copy_end here if the loop ends with a conditional
1026                  branch.  */
1027
1028               if (i == unroll_number - 1)
1029                 {
1030                   if (GET_CODE (last_loop_insn) == BARRIER)
1031                     copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1032                   else
1033                     copy_end = last_loop_insn;
1034                 }
1035
1036               /* None of the copies are the `last_iteration', so just
1037                  pass zero for that parameter.  */
1038               copy_loop_body (copy_start, copy_end, map, exit_label, 0,
1039                               unroll_type, start_label, loop_end,
1040                               loop_start, copy_end);
1041             }
1042           emit_label_after (labels[0], PREV_INSN (loop_start));
1043
1044           if (GET_CODE (last_loop_insn) == BARRIER)
1045             {
1046               insert_before = PREV_INSN (last_loop_insn);
1047               copy_end = PREV_INSN (insert_before);
1048             }
1049           else
1050             {
1051 #ifdef HAVE_cc0
1052               /* The immediately preceding insn is a compare which we do not
1053                  want to copy.  */
1054               insert_before = PREV_INSN (last_loop_insn);
1055               copy_end = PREV_INSN (insert_before);
1056 #else
1057               /* The immediately preceding insn may not be a compare, so we
1058                  must copy it.  */
1059               insert_before = last_loop_insn;
1060               copy_end = PREV_INSN (last_loop_insn);
1061 #endif
1062             }
1063
1064           /* Set unroll type to MODULO now.  */
1065           unroll_type = UNROLL_MODULO;
1066           loop_preconditioned = 1;
1067         }
1068     }
1069
1070   /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
1071      the loop unless all loops are being unrolled.  */
1072   if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
1073     {
1074       if (loop_dump_stream)
1075         fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n");
1076       return;
1077     }
1078
1079   /* At this point, we are guaranteed to unroll the loop.  */
1080
1081   /* For each biv and giv, determine whether it can be safely split into
1082      a different variable for each unrolled copy of the loop body.
1083      We precalculate and save this info here, since computing it is
1084      expensive.
1085
1086      Do this before deleting any instructions from the loop, so that
1087      back_branch_in_range_p will work correctly.  */
1088
1089   if (splitting_not_safe)
1090     temp = 0;
1091   else
1092     temp = find_splittable_regs (unroll_type, loop_start, loop_end,
1093                                 end_insert_before, unroll_number);
1094
1095   /* find_splittable_regs may have created some new registers, so must
1096      reallocate the reg_map with the new larger size, and must realloc
1097      the constant maps also.  */
1098
1099   maxregnum = max_reg_num ();
1100   map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
1101
1102   init_reg_map (map, maxregnum);
1103
1104   /* Space is needed in some of the map for new registers, so new_maxregnum
1105      is an (over)estimate of how many registers will exist at the end.  */
1106   new_maxregnum = maxregnum + (temp * unroll_number * 2);
1107
1108   /* Must realloc space for the constant maps, because the number of registers
1109      may have changed.  */
1110
1111   map->const_equiv_map = (rtx *) alloca (new_maxregnum * sizeof (rtx));
1112   map->const_age_map = (unsigned *) alloca (new_maxregnum * sizeof (unsigned));
1113
1114   map->const_equiv_map_size = new_maxregnum;
1115   global_const_equiv_map = map->const_equiv_map;
1116   global_const_equiv_map_size = new_maxregnum;
1117
1118   /* Search the list of bivs and givs to find ones which need to be remapped
1119      when split, and set their reg_map entry appropriately.  */
1120
1121   for (bl = loop_iv_list; bl; bl = bl->next)
1122     {
1123       if (REGNO (bl->biv->src_reg) != bl->regno)
1124         map->reg_map[bl->regno] = bl->biv->src_reg;
1125 #if 0
1126       /* Currently, non-reduced/final-value givs are never split.  */
1127       for (v = bl->giv; v; v = v->next_iv)
1128         if (REGNO (v->src_reg) != bl->regno)
1129           map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
1130 #endif
1131     }
1132
1133   /* Use our current register alignment and pointer flags.  */
1134   map->regno_pointer_flag = regno_pointer_flag;
1135   map->regno_pointer_align = regno_pointer_align;
1136
1137   /* If the loop is being partially unrolled, and the iteration variables
1138      are being split, and are being renamed for the split, then must fix up
1139      the compare/jump instruction at the end of the loop to refer to the new
1140      registers.  This compare isn't copied, so the registers used in it
1141      will never be replaced if it isn't done here.  */
1142
1143   if (unroll_type == UNROLL_MODULO)
1144     {
1145       insn = NEXT_INSN (copy_end);
1146       if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
1147         PATTERN (insn) = remap_split_bivs (PATTERN (insn));
1148     }
1149
1150   /* For unroll_number - 1 times, make a copy of each instruction
1151      between copy_start and copy_end, and insert these new instructions
1152      before the end of the loop.  */
1153
1154   for (i = 0; i < unroll_number; i++)
1155     {
1156       bzero ((char *) map->insn_map, max_insnno * sizeof (rtx));
1157       bzero ((char *) map->const_equiv_map, new_maxregnum * sizeof (rtx));
1158       bzero ((char *) map->const_age_map, new_maxregnum * sizeof (unsigned));
1159       map->const_age = 0;
1160
1161       for (j = 0; j < max_labelno; j++)
1162         if (local_label[j])
1163           map->label_map[j] = gen_label_rtx ();
1164
1165       for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++)
1166         if (local_regno[j])
1167           map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1168
1169       /* If loop starts with a branch to the test, then fix it so that
1170          it points to the test of the first unrolled copy of the loop.  */
1171       if (i == 0 && loop_start != copy_start)
1172         {
1173           insn = PREV_INSN (copy_start);
1174           pattern = PATTERN (insn);
1175
1176           tem = map->label_map[CODE_LABEL_NUMBER
1177                                (XEXP (SET_SRC (pattern), 0))];
1178           SET_SRC (pattern) = gen_rtx (LABEL_REF, VOIDmode, tem);
1179
1180           /* Set the jump label so that it can be used by later loop unrolling
1181              passes.  */
1182           JUMP_LABEL (insn) = tem;
1183           LABEL_NUSES (tem)++;
1184         }
1185
1186       copy_loop_body (copy_start, copy_end, map, exit_label,
1187                       i == unroll_number - 1, unroll_type, start_label,
1188                       loop_end, insert_before, insert_before);
1189     }
1190
1191   /* Before deleting any insns, emit a CODE_LABEL immediately after the last
1192      insn to be deleted.  This prevents any runaway delete_insn call from
1193      more insns that it should, as it always stops at a CODE_LABEL.  */
1194
1195   /* Delete the compare and branch at the end of the loop if completely
1196      unrolling the loop.  Deleting the backward branch at the end also
1197      deletes the code label at the start of the loop.  This is done at
1198      the very end to avoid problems with back_branch_in_range_p.  */
1199
1200   if (unroll_type == UNROLL_COMPLETELY)
1201     safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
1202   else
1203     safety_label = emit_label_after (gen_label_rtx (), copy_end);
1204
1205   /* Delete all of the original loop instructions.  Don't delete the
1206      LOOP_BEG note, or the first code label in the loop.  */
1207
1208   insn = NEXT_INSN (copy_start);
1209   while (insn != safety_label)
1210     {
1211       if (insn != start_label)
1212         insn = delete_insn (insn);
1213       else
1214         insn = NEXT_INSN (insn);
1215     }
1216
1217   /* Can now delete the 'safety' label emitted to protect us from runaway
1218      delete_insn calls.  */
1219   if (INSN_DELETED_P (safety_label))
1220     abort ();
1221   delete_insn (safety_label);
1222
1223   /* If exit_label exists, emit it after the loop.  Doing the emit here
1224      forces it to have a higher INSN_UID than any insn in the unrolled loop.
1225      This is needed so that mostly_true_jump in reorg.c will treat jumps
1226      to this loop end label correctly, i.e. predict that they are usually
1227      not taken.  */
1228   if (exit_label)
1229     emit_label_after (exit_label, loop_end);
1230 }
1231 \f
1232 /* Return true if the loop can be safely, and profitably, preconditioned
1233    so that the unrolled copies of the loop body don't need exit tests.
1234
1235    This only works if final_value, initial_value and increment can be
1236    determined, and if increment is a constant power of 2.
1237    If increment is not a power of 2, then the preconditioning modulo
1238    operation would require a real modulo instead of a boolean AND, and this
1239    is not considered `profitable'.  */
1240
1241 /* ??? If the loop is known to be executed very many times, or the machine
1242    has a very cheap divide instruction, then preconditioning is a win even
1243    when the increment is not a power of 2.  Use RTX_COST to compute
1244    whether divide is cheap.  */
1245
1246 static int
1247 precondition_loop_p (initial_value, final_value, increment, loop_start,
1248                      loop_end)
1249      rtx *initial_value, *final_value, *increment;
1250      rtx loop_start, loop_end;
1251 {
1252
1253   if (loop_n_iterations > 0)
1254     {
1255       *initial_value = const0_rtx;
1256       *increment = const1_rtx;
1257       *final_value = GEN_INT (loop_n_iterations);
1258
1259       if (loop_dump_stream)
1260         fprintf (loop_dump_stream,
1261                  "Preconditioning: Success, number of iterations known, %d.\n",
1262                  loop_n_iterations);
1263       return 1;
1264     }
1265
1266   if (loop_initial_value == 0)
1267     {
1268       if (loop_dump_stream)
1269         fprintf (loop_dump_stream,
1270                  "Preconditioning: Could not find initial value.\n");
1271       return 0;
1272     }
1273   else if (loop_increment == 0)
1274     {
1275       if (loop_dump_stream)
1276         fprintf (loop_dump_stream,
1277                  "Preconditioning: Could not find increment value.\n");
1278       return 0;
1279     }
1280   else if (GET_CODE (loop_increment) != CONST_INT)
1281     {
1282       if (loop_dump_stream)
1283         fprintf (loop_dump_stream,
1284                  "Preconditioning: Increment not a constant.\n");
1285       return 0;
1286     }
1287   else if ((exact_log2 (INTVAL (loop_increment)) < 0)
1288            && (exact_log2 (- INTVAL (loop_increment)) < 0))
1289     {
1290       if (loop_dump_stream)
1291         fprintf (loop_dump_stream,
1292                  "Preconditioning: Increment not a constant power of 2.\n");
1293       return 0;
1294     }
1295
1296   /* Unsigned_compare and compare_dir can be ignored here, since they do
1297      not matter for preconditioning.  */
1298
1299   if (loop_final_value == 0)
1300     {
1301       if (loop_dump_stream)
1302         fprintf (loop_dump_stream,
1303                  "Preconditioning: EQ comparison loop.\n");
1304       return 0;
1305     }
1306
1307   /* Must ensure that final_value is invariant, so call invariant_p to
1308      check.  Before doing so, must check regno against max_reg_before_loop
1309      to make sure that the register is in the range covered by invariant_p.
1310      If it isn't, then it is most likely a biv/giv which by definition are
1311      not invariant.  */
1312   if ((GET_CODE (loop_final_value) == REG
1313        && REGNO (loop_final_value) >= max_reg_before_loop)
1314       || (GET_CODE (loop_final_value) == PLUS
1315           && REGNO (XEXP (loop_final_value, 0)) >= max_reg_before_loop)
1316       || ! invariant_p (loop_final_value))
1317     {
1318       if (loop_dump_stream)
1319         fprintf (loop_dump_stream,
1320                  "Preconditioning: Final value not invariant.\n");
1321       return 0;
1322     }
1323
1324   /* Fail for floating point values, since the caller of this function
1325      does not have code to deal with them.  */
1326   if (GET_MODE_CLASS (GET_MODE (loop_final_value)) == MODE_FLOAT
1327       || GET_MODE_CLASS (GET_MODE (loop_initial_value)) == MODE_FLOAT)
1328     {
1329       if (loop_dump_stream)
1330         fprintf (loop_dump_stream,
1331                  "Preconditioning: Floating point final or initial value.\n");
1332       return 0;
1333     }
1334
1335   /* Now set initial_value to be the iteration_var, since that may be a
1336      simpler expression, and is guaranteed to be correct if all of the
1337      above tests succeed.
1338
1339      We can not use the initial_value as calculated, because it will be
1340      one too small for loops of the form "while (i-- > 0)".  We can not
1341      emit code before the loop_skip_over insns to fix this problem as this
1342      will then give a number one too large for loops of the form
1343      "while (--i > 0)".
1344
1345      Note that all loops that reach here are entered at the top, because
1346      this function is not called if the loop starts with a jump.  */
1347
1348   /* Fail if loop_iteration_var is not live before loop_start, since we need
1349      to test its value in the preconditioning code.  */
1350
1351   if (uid_luid[regno_first_uid[REGNO (loop_iteration_var)]]
1352       > INSN_LUID (loop_start))
1353     {
1354       if (loop_dump_stream)
1355         fprintf (loop_dump_stream,
1356                  "Preconditioning: Iteration var not live before loop start.\n");
1357       return 0;
1358     }
1359
1360   *initial_value = loop_iteration_var;
1361   *increment = loop_increment;
1362   *final_value = loop_final_value;
1363
1364   /* Success! */
1365   if (loop_dump_stream)
1366     fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
1367   return 1;
1368 }
1369
1370
1371 /* All pseudo-registers must be mapped to themselves.  Two hard registers
1372    must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
1373    REGNUM, to avoid function-inlining specific conversions of these
1374    registers.  All other hard regs can not be mapped because they may be
1375    used with different
1376    modes.  */
1377
1378 static void
1379 init_reg_map (map, maxregnum)
1380      struct inline_remap *map;
1381      int maxregnum;
1382 {
1383   int i;
1384
1385   for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
1386     map->reg_map[i] = regno_reg_rtx[i];
1387   /* Just clear the rest of the entries.  */
1388   for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
1389     map->reg_map[i] = 0;
1390
1391   map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
1392     = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
1393   map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
1394     = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
1395 }
1396 \f
1397 /* Strength-reduction will often emit code for optimized biv/givs which
1398    calculates their value in a temporary register, and then copies the result
1399    to the iv.  This procedure reconstructs the pattern computing the iv;
1400    verifying that all operands are of the proper form.
1401
1402    The return value is the amount that the giv is incremented by.  */
1403
1404 static rtx
1405 calculate_giv_inc (pattern, src_insn, regno)
1406      rtx pattern, src_insn;
1407      int regno;
1408 {
1409   rtx increment;
1410   rtx increment_total = 0;
1411   int tries = 0;
1412
1413  retry:
1414   /* Verify that we have an increment insn here.  First check for a plus
1415      as the set source.  */
1416   if (GET_CODE (SET_SRC (pattern)) != PLUS)
1417     {
1418       /* SR sometimes computes the new giv value in a temp, then copies it
1419          to the new_reg.  */
1420       src_insn = PREV_INSN (src_insn);
1421       pattern = PATTERN (src_insn);
1422       if (GET_CODE (SET_SRC (pattern)) != PLUS)
1423         abort ();
1424
1425       /* The last insn emitted is not needed, so delete it to avoid confusing
1426          the second cse pass.  This insn sets the giv unnecessarily.  */
1427       delete_insn (get_last_insn ());
1428     }
1429
1430   /* Verify that we have a constant as the second operand of the plus.  */
1431   increment = XEXP (SET_SRC (pattern), 1);
1432   if (GET_CODE (increment) != CONST_INT)
1433     {
1434       /* SR sometimes puts the constant in a register, especially if it is
1435          too big to be an add immed operand.  */
1436       src_insn = PREV_INSN (src_insn);
1437       increment = SET_SRC (PATTERN (src_insn));
1438
1439       /* SR may have used LO_SUM to compute the constant if it is too large
1440          for a load immed operand.  In this case, the constant is in operand
1441          one of the LO_SUM rtx.  */
1442       if (GET_CODE (increment) == LO_SUM)
1443         increment = XEXP (increment, 1);
1444       else if (GET_CODE (increment) == IOR
1445                || GET_CODE (increment) == ASHIFT)
1446         {
1447           /* The rs6000 port loads some constants with IOR.
1448              The alpha port loads some constants with ASHIFT.  */
1449           rtx second_part = XEXP (increment, 1);
1450           enum rtx_code code = GET_CODE (increment);
1451
1452           src_insn = PREV_INSN (src_insn);
1453           increment = SET_SRC (PATTERN (src_insn));
1454           /* Don't need the last insn anymore.  */
1455           delete_insn (get_last_insn ());
1456
1457           if (GET_CODE (second_part) != CONST_INT
1458               || GET_CODE (increment) != CONST_INT)
1459             abort ();
1460
1461           if (code == IOR)
1462             increment = GEN_INT (INTVAL (increment) | INTVAL (second_part));
1463           else
1464             increment = GEN_INT (INTVAL (increment) << INTVAL (second_part));
1465         }
1466
1467       if (GET_CODE (increment) != CONST_INT)
1468         abort ();
1469
1470       /* The insn loading the constant into a register is no longer needed,
1471          so delete it.  */
1472       delete_insn (get_last_insn ());
1473     }
1474
1475   if (increment_total)
1476     increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment));
1477   else
1478     increment_total = increment;
1479
1480   /* Check that the source register is the same as the register we expected
1481      to see as the source.  If not, something is seriously wrong.  */
1482   if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
1483       || REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
1484     {
1485       /* Some machines (e.g. the romp), may emit two add instructions for
1486          certain constants, so lets try looking for another add immediately
1487          before this one if we have only seen one add insn so far.  */
1488
1489       if (tries == 0)
1490         {
1491           tries++;
1492
1493           src_insn = PREV_INSN (src_insn);
1494           pattern = PATTERN (src_insn);
1495
1496           delete_insn (get_last_insn ());
1497
1498           goto retry;
1499         }
1500
1501       abort ();
1502     }
1503
1504   return increment_total;
1505 }
1506
1507 /* Copy REG_NOTES, except for insn references, because not all insn_map
1508    entries are valid yet.  We do need to copy registers now though, because
1509    the reg_map entries can change during copying.  */
1510
1511 static rtx
1512 initial_reg_note_copy (notes, map)
1513      rtx notes;
1514      struct inline_remap *map;
1515 {
1516   rtx copy;
1517
1518   if (notes == 0)
1519     return 0;
1520
1521   copy = rtx_alloc (GET_CODE (notes));
1522   PUT_MODE (copy, GET_MODE (notes));
1523
1524   if (GET_CODE (notes) == EXPR_LIST)
1525     XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map);
1526   else if (GET_CODE (notes) == INSN_LIST)
1527     /* Don't substitute for these yet.  */
1528     XEXP (copy, 0) = XEXP (notes, 0);
1529   else
1530     abort ();
1531
1532   XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map);
1533
1534   return copy;
1535 }
1536
1537 /* Fixup insn references in copied REG_NOTES.  */
1538
1539 static void
1540 final_reg_note_copy (notes, map)
1541      rtx notes;
1542      struct inline_remap *map;
1543 {
1544   rtx note;
1545
1546   for (note = notes; note; note = XEXP (note, 1))
1547     if (GET_CODE (note) == INSN_LIST)
1548       XEXP (note, 0) = map->insn_map[INSN_UID (XEXP (note, 0))];
1549 }
1550
1551 /* Copy each instruction in the loop, substituting from map as appropriate.
1552    This is very similar to a loop in expand_inline_function.  */
1553
1554 static void
1555 copy_loop_body (copy_start, copy_end, map, exit_label, last_iteration,
1556                 unroll_type, start_label, loop_end, insert_before,
1557                 copy_notes_from)
1558      rtx copy_start, copy_end;
1559      struct inline_remap *map;
1560      rtx exit_label;
1561      int last_iteration;
1562      enum unroll_types unroll_type;
1563      rtx start_label, loop_end, insert_before, copy_notes_from;
1564 {
1565   rtx insn, pattern;
1566   rtx tem, copy;
1567   int dest_reg_was_split, i;
1568   rtx cc0_insn = 0;
1569   rtx final_label = 0;
1570   rtx giv_inc, giv_dest_reg, giv_src_reg;
1571
1572   /* If this isn't the last iteration, then map any references to the
1573      start_label to final_label.  Final label will then be emitted immediately
1574      after the end of this loop body if it was ever used.
1575
1576      If this is the last iteration, then map references to the start_label
1577      to itself.  */
1578   if (! last_iteration)
1579     {
1580       final_label = gen_label_rtx ();
1581       map->label_map[CODE_LABEL_NUMBER (start_label)] = final_label;
1582     }
1583   else
1584     map->label_map[CODE_LABEL_NUMBER (start_label)] = start_label;
1585
1586   start_sequence ();
1587
1588   insn = copy_start;
1589   do
1590     {
1591       insn = NEXT_INSN (insn);
1592
1593       map->orig_asm_operands_vector = 0;
1594
1595       switch (GET_CODE (insn))
1596         {
1597         case INSN:
1598           pattern = PATTERN (insn);
1599           copy = 0;
1600           giv_inc = 0;
1601
1602           /* Check to see if this is a giv that has been combined with
1603              some split address givs.  (Combined in the sense that
1604              `combine_givs' in loop.c has put two givs in the same register.)
1605              In this case, we must search all givs based on the same biv to
1606              find the address givs.  Then split the address givs.
1607              Do this before splitting the giv, since that may map the
1608              SET_DEST to a new register.  */
1609
1610           if (GET_CODE (pattern) == SET
1611               && GET_CODE (SET_DEST (pattern)) == REG
1612               && addr_combined_regs[REGNO (SET_DEST (pattern))])
1613             {
1614               struct iv_class *bl;
1615               struct induction *v, *tv;
1616               int regno = REGNO (SET_DEST (pattern));
1617
1618               v = addr_combined_regs[REGNO (SET_DEST (pattern))];
1619               bl = reg_biv_class[REGNO (v->src_reg)];
1620
1621               /* Although the giv_inc amount is not needed here, we must call
1622                  calculate_giv_inc here since it might try to delete the
1623                  last insn emitted.  If we wait until later to call it,
1624                  we might accidentally delete insns generated immediately
1625                  below by emit_unrolled_add.  */
1626
1627               giv_inc = calculate_giv_inc (pattern, insn, regno);
1628
1629               /* Now find all address giv's that were combined with this
1630                  giv 'v'.  */
1631               for (tv = bl->giv; tv; tv = tv->next_iv)
1632                 if (tv->giv_type == DEST_ADDR && tv->same == v)
1633                   {
1634                     int this_giv_inc;
1635
1636                     /* If this DEST_ADDR giv was not split, then ignore it.  */
1637                     if (*tv->location != tv->dest_reg)
1638                       continue;
1639
1640                     /* Scale this_giv_inc if the multiplicative factors of
1641                        the two givs are different.  */
1642                     this_giv_inc = INTVAL (giv_inc);
1643                     if (tv->mult_val != v->mult_val)
1644                       this_giv_inc = (this_giv_inc / INTVAL (v->mult_val)
1645                                       * INTVAL (tv->mult_val));
1646
1647                     tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc);
1648                     *tv->location = tv->dest_reg;
1649
1650                     if (last_iteration && unroll_type != UNROLL_COMPLETELY)
1651                       {
1652                         /* Must emit an insn to increment the split address
1653                            giv.  Add in the const_adjust field in case there
1654                            was a constant eliminated from the address.  */
1655                         rtx value, dest_reg;
1656
1657                         /* tv->dest_reg will be either a bare register,
1658                            or else a register plus a constant.  */
1659                         if (GET_CODE (tv->dest_reg) == REG)
1660                           dest_reg = tv->dest_reg;
1661                         else
1662                           dest_reg = XEXP (tv->dest_reg, 0);
1663
1664                         /* Check for shared address givs, and avoid
1665                            incrementing the shared pseudo reg more than
1666                            once.  */
1667                         if (! tv->same_insn)
1668                           {
1669                             /* tv->dest_reg may actually be a (PLUS (REG)
1670                                (CONST)) here, so we must call plus_constant
1671                                to add the const_adjust amount before calling
1672                                emit_unrolled_add below.  */
1673                             value = plus_constant (tv->dest_reg,
1674                                                    tv->const_adjust);
1675
1676                             /* The constant could be too large for an add
1677                                immediate, so can't directly emit an insn
1678                                here.  */
1679                             emit_unrolled_add (dest_reg, XEXP (value, 0),
1680                                                XEXP (value, 1));
1681                           }
1682
1683                         /* Reset the giv to be just the register again, in case
1684                            it is used after the set we have just emitted.
1685                            We must subtract the const_adjust factor added in
1686                            above.  */
1687                         tv->dest_reg = plus_constant (dest_reg,
1688                                                       - tv->const_adjust);
1689                         *tv->location = tv->dest_reg;
1690                       }
1691                   }
1692             }
1693
1694           /* If this is a setting of a splittable variable, then determine
1695              how to split the variable, create a new set based on this split,
1696              and set up the reg_map so that later uses of the variable will
1697              use the new split variable.  */
1698
1699           dest_reg_was_split = 0;
1700
1701           if (GET_CODE (pattern) == SET
1702               && GET_CODE (SET_DEST (pattern)) == REG
1703               && splittable_regs[REGNO (SET_DEST (pattern))])
1704             {
1705               int regno = REGNO (SET_DEST (pattern));
1706
1707               dest_reg_was_split = 1;
1708
1709               /* Compute the increment value for the giv, if it wasn't
1710                  already computed above.  */
1711
1712               if (giv_inc == 0)
1713                 giv_inc = calculate_giv_inc (pattern, insn, regno);
1714               giv_dest_reg = SET_DEST (pattern);
1715               giv_src_reg = SET_DEST (pattern);
1716
1717               if (unroll_type == UNROLL_COMPLETELY)
1718                 {
1719                   /* Completely unrolling the loop.  Set the induction
1720                      variable to a known constant value.  */
1721
1722                   /* The value in splittable_regs may be an invariant
1723                      value, so we must use plus_constant here.  */
1724                   splittable_regs[regno]
1725                     = plus_constant (splittable_regs[regno], INTVAL (giv_inc));
1726
1727                   if (GET_CODE (splittable_regs[regno]) == PLUS)
1728                     {
1729                       giv_src_reg = XEXP (splittable_regs[regno], 0);
1730                       giv_inc = XEXP (splittable_regs[regno], 1);
1731                     }
1732                   else
1733                     {
1734                       /* The splittable_regs value must be a REG or a
1735                          CONST_INT, so put the entire value in the giv_src_reg
1736                          variable.  */
1737                       giv_src_reg = splittable_regs[regno];
1738                       giv_inc = const0_rtx;
1739                     }
1740                 }
1741               else
1742                 {
1743                   /* Partially unrolling loop.  Create a new pseudo
1744                      register for the iteration variable, and set it to
1745                      be a constant plus the original register.  Except
1746                      on the last iteration, when the result has to
1747                      go back into the original iteration var register.  */
1748
1749                   /* Handle bivs which must be mapped to a new register
1750                      when split.  This happens for bivs which need their
1751                      final value set before loop entry.  The new register
1752                      for the biv was stored in the biv's first struct
1753                      induction entry by find_splittable_regs.  */
1754
1755                   if (regno < max_reg_before_loop
1756                       && reg_iv_type[regno] == BASIC_INDUCT)
1757                     {
1758                       giv_src_reg = reg_biv_class[regno]->biv->src_reg;
1759                       giv_dest_reg = giv_src_reg;
1760                     }
1761
1762 #if 0
1763                   /* If non-reduced/final-value givs were split, then
1764                      this would have to remap those givs also.  See
1765                      find_splittable_regs.  */
1766 #endif
1767
1768                   splittable_regs[regno]
1769                     = GEN_INT (INTVAL (giv_inc)
1770                                + INTVAL (splittable_regs[regno]));
1771                   giv_inc = splittable_regs[regno];
1772
1773                   /* Now split the induction variable by changing the dest
1774                      of this insn to a new register, and setting its
1775                      reg_map entry to point to this new register.
1776
1777                      If this is the last iteration, and this is the last insn
1778                      that will update the iv, then reuse the original dest,
1779                      to ensure that the iv will have the proper value when
1780                      the loop exits or repeats.
1781
1782                      Using splittable_regs_updates here like this is safe,
1783                      because it can only be greater than one if all
1784                      instructions modifying the iv are always executed in
1785                      order.  */
1786
1787                   if (! last_iteration
1788                       || (splittable_regs_updates[regno]-- != 1))
1789                     {
1790                       tem = gen_reg_rtx (GET_MODE (giv_src_reg));
1791                       giv_dest_reg = tem;
1792                       map->reg_map[regno] = tem;
1793                     }
1794                   else
1795                     map->reg_map[regno] = giv_src_reg;
1796                 }
1797
1798               /* The constant being added could be too large for an add
1799                  immediate, so can't directly emit an insn here.  */
1800               emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
1801               copy = get_last_insn ();
1802               pattern = PATTERN (copy);
1803             }
1804           else
1805             {
1806               pattern = copy_rtx_and_substitute (pattern, map);
1807               copy = emit_insn (pattern);
1808             }
1809           REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1810
1811 #ifdef HAVE_cc0
1812           /* If this insn is setting CC0, it may need to look at
1813              the insn that uses CC0 to see what type of insn it is.
1814              In that case, the call to recog via validate_change will
1815              fail.  So don't substitute constants here.  Instead,
1816              do it when we emit the following insn.
1817
1818              For example, see the pyr.md file.  That machine has signed and
1819              unsigned compares.  The compare patterns must check the
1820              following branch insn to see which what kind of compare to
1821              emit.
1822
1823              If the previous insn set CC0, substitute constants on it as
1824              well.  */
1825           if (sets_cc0_p (PATTERN (copy)) != 0)
1826             cc0_insn = copy;
1827           else
1828             {
1829               if (cc0_insn)
1830                 try_constants (cc0_insn, map);
1831               cc0_insn = 0;
1832               try_constants (copy, map);
1833             }
1834 #else
1835           try_constants (copy, map);
1836 #endif
1837
1838           /* Make split induction variable constants `permanent' since we
1839              know there are no backward branches across iteration variable
1840              settings which would invalidate this.  */
1841           if (dest_reg_was_split)
1842             {
1843               int regno = REGNO (SET_DEST (pattern));
1844
1845               if (regno < map->const_equiv_map_size
1846                   && map->const_age_map[regno] == map->const_age)
1847                 map->const_age_map[regno] = -1;
1848             }
1849           break;
1850
1851         case JUMP_INSN:
1852           pattern = copy_rtx_and_substitute (PATTERN (insn), map);
1853           copy = emit_jump_insn (pattern);
1854           REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1855
1856           if (JUMP_LABEL (insn) == start_label && insn == copy_end
1857               && ! last_iteration)
1858             {
1859               /* This is a branch to the beginning of the loop; this is the
1860                  last insn being copied; and this is not the last iteration.
1861                  In this case, we want to change the original fall through
1862                  case to be a branch past the end of the loop, and the
1863                  original jump label case to fall_through.  */
1864
1865               if (invert_exp (pattern, copy))
1866                 {
1867                   if (! redirect_exp (&pattern,
1868                                       map->label_map[CODE_LABEL_NUMBER
1869                                                      (JUMP_LABEL (insn))],
1870                                       exit_label, copy))
1871                     abort ();
1872                 }
1873               else
1874                 {
1875                   rtx jmp;
1876                   rtx lab = gen_label_rtx ();
1877                   /* Can't do it by reversing the jump (probably because we
1878                      couldn't reverse the conditions), so emit a new
1879                      jump_insn after COPY, and redirect the jump around
1880                      that.  */
1881                   jmp = emit_jump_insn_after (gen_jump (exit_label), copy);
1882                   jmp = emit_barrier_after (jmp);
1883                   emit_label_after (lab, jmp);
1884                   LABEL_NUSES (lab) = 0;
1885                   if (! redirect_exp (&pattern,
1886                                       map->label_map[CODE_LABEL_NUMBER
1887                                                      (JUMP_LABEL (insn))],
1888                                       lab, copy))
1889                     abort ();
1890                 }
1891             }
1892
1893 #ifdef HAVE_cc0
1894           if (cc0_insn)
1895             try_constants (cc0_insn, map);
1896           cc0_insn = 0;
1897 #endif
1898           try_constants (copy, map);
1899
1900           /* Set the jump label of COPY correctly to avoid problems with
1901              later passes of unroll_loop, if INSN had jump label set.  */
1902           if (JUMP_LABEL (insn))
1903             {
1904               rtx label = 0;
1905
1906               /* Can't use the label_map for every insn, since this may be
1907                  the backward branch, and hence the label was not mapped.  */
1908               if (GET_CODE (pattern) == SET)
1909                 {
1910                   tem = SET_SRC (pattern);
1911                   if (GET_CODE (tem) == LABEL_REF)
1912                     label = XEXP (tem, 0);
1913                   else if (GET_CODE (tem) == IF_THEN_ELSE)
1914                     {
1915                       if (XEXP (tem, 1) != pc_rtx)
1916                         label = XEXP (XEXP (tem, 1), 0);
1917                       else
1918                         label = XEXP (XEXP (tem, 2), 0);
1919                     }
1920                 }
1921
1922               if (label && GET_CODE (label) == CODE_LABEL)
1923                 JUMP_LABEL (copy) = label;
1924               else
1925                 {
1926                   /* An unrecognizable jump insn, probably the entry jump
1927                      for a switch statement.  This label must have been mapped,
1928                      so just use the label_map to get the new jump label.  */
1929                   JUMP_LABEL (copy)
1930                     = map->label_map[CODE_LABEL_NUMBER (JUMP_LABEL (insn))];
1931                 }
1932
1933               /* If this is a non-local jump, then must increase the label
1934                  use count so that the label will not be deleted when the
1935                  original jump is deleted.  */
1936               LABEL_NUSES (JUMP_LABEL (copy))++;
1937             }
1938           else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
1939                    || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
1940             {
1941               rtx pat = PATTERN (copy);
1942               int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
1943               int len = XVECLEN (pat, diff_vec_p);
1944               int i;
1945
1946               for (i = 0; i < len; i++)
1947                 LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
1948             }
1949
1950           /* If this used to be a conditional jump insn but whose branch
1951              direction is now known, we must do something special.  */
1952           if (condjump_p (insn) && !simplejump_p (insn) && map->last_pc_value)
1953             {
1954 #ifdef HAVE_cc0
1955               /* The previous insn set cc0 for us.  So delete it.  */
1956               delete_insn (PREV_INSN (copy));
1957 #endif
1958
1959               /* If this is now a no-op, delete it.  */
1960               if (map->last_pc_value == pc_rtx)
1961                 {
1962                   /* Don't let delete_insn delete the label referenced here,
1963                      because we might possibly need it later for some other
1964                      instruction in the loop.  */
1965                   if (JUMP_LABEL (copy))
1966                     LABEL_NUSES (JUMP_LABEL (copy))++;
1967                   delete_insn (copy);
1968                   if (JUMP_LABEL (copy))
1969                     LABEL_NUSES (JUMP_LABEL (copy))--;
1970                   copy = 0;
1971                 }
1972               else
1973                 /* Otherwise, this is unconditional jump so we must put a
1974                    BARRIER after it.  We could do some dead code elimination
1975                    here, but jump.c will do it just as well.  */
1976                 emit_barrier ();
1977             }
1978           break;
1979
1980         case CALL_INSN:
1981           pattern = copy_rtx_and_substitute (PATTERN (insn), map);
1982           copy = emit_call_insn (pattern);
1983           REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1984
1985           /* Because the USAGE information potentially contains objects other
1986              than hard registers, we need to copy it.  */
1987           CALL_INSN_FUNCTION_USAGE (copy) =
1988              copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), map);
1989
1990 #ifdef HAVE_cc0
1991           if (cc0_insn)
1992             try_constants (cc0_insn, map);
1993           cc0_insn = 0;
1994 #endif
1995           try_constants (copy, map);
1996
1997           /* Be lazy and assume CALL_INSNs clobber all hard registers.  */
1998           for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1999             map->const_equiv_map[i] = 0;
2000           break;
2001
2002         case CODE_LABEL:
2003           /* If this is the loop start label, then we don't need to emit a
2004              copy of this label since no one will use it.  */
2005
2006           if (insn != start_label)
2007             {
2008               copy = emit_label (map->label_map[CODE_LABEL_NUMBER (insn)]);
2009               map->const_age++;
2010             }
2011           break;
2012
2013         case BARRIER:
2014           copy = emit_barrier ();
2015           break;
2016
2017         case NOTE:
2018           /* VTOP notes are valid only before the loop exit test.  If placed
2019              anywhere else, loop may generate bad code.  */
2020
2021           if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2022               && (NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
2023                   || (last_iteration && unroll_type != UNROLL_COMPLETELY)))
2024             copy = emit_note (NOTE_SOURCE_FILE (insn),
2025                               NOTE_LINE_NUMBER (insn));
2026           else
2027             copy = 0;
2028           break;
2029
2030         default:
2031           abort ();
2032           break;
2033         }
2034
2035       map->insn_map[INSN_UID (insn)] = copy;
2036     }
2037   while (insn != copy_end);
2038
2039   /* Now finish coping the REG_NOTES.  */
2040   insn = copy_start;
2041   do
2042     {
2043       insn = NEXT_INSN (insn);
2044       if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN
2045            || GET_CODE (insn) == CALL_INSN)
2046           && map->insn_map[INSN_UID (insn)])
2047         final_reg_note_copy (REG_NOTES (map->insn_map[INSN_UID (insn)]), map);
2048     }
2049   while (insn != copy_end);
2050
2051   /* There may be notes between copy_notes_from and loop_end.  Emit a copy of
2052      each of these notes here, since there may be some important ones, such as
2053      NOTE_INSN_BLOCK_END notes, in this group.  We don't do this on the last
2054      iteration, because the original notes won't be deleted.
2055
2056      We can't use insert_before here, because when from preconditioning,
2057      insert_before points before the loop.  We can't use copy_end, because
2058      there may be insns already inserted after it (which we don't want to
2059      copy) when not from preconditioning code.  */
2060
2061   if (! last_iteration)
2062     {
2063       for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
2064         {
2065           if (GET_CODE (insn) == NOTE
2066               && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED)
2067             emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
2068         }
2069     }
2070
2071   if (final_label && LABEL_NUSES (final_label) > 0)
2072     emit_label (final_label);
2073
2074   tem = gen_sequence ();
2075   end_sequence ();
2076   emit_insn_before (tem, insert_before);
2077 }
2078 \f
2079 /* Emit an insn, using the expand_binop to ensure that a valid insn is
2080    emitted.  This will correctly handle the case where the increment value
2081    won't fit in the immediate field of a PLUS insns.  */
2082
2083 void
2084 emit_unrolled_add (dest_reg, src_reg, increment)
2085      rtx dest_reg, src_reg, increment;
2086 {
2087   rtx result;
2088
2089   result = expand_binop (GET_MODE (dest_reg), add_optab, src_reg, increment,
2090                          dest_reg, 0, OPTAB_LIB_WIDEN);
2091
2092   if (dest_reg != result)
2093     emit_move_insn (dest_reg, result);
2094 }
2095 \f
2096 /* Searches the insns between INSN and LOOP_END.  Returns 1 if there
2097    is a backward branch in that range that branches to somewhere between
2098    LOOP_START and INSN.  Returns 0 otherwise.  */
2099
2100 /* ??? This is quadratic algorithm.  Could be rewritten to be linear.
2101    In practice, this is not a problem, because this function is seldom called,
2102    and uses a negligible amount of CPU time on average.  */
2103
2104 int
2105 back_branch_in_range_p (insn, loop_start, loop_end)
2106      rtx insn;
2107      rtx loop_start, loop_end;
2108 {
2109   rtx p, q, target_insn;
2110
2111   /* Stop before we get to the backward branch at the end of the loop.  */
2112   loop_end = prev_nonnote_insn (loop_end);
2113   if (GET_CODE (loop_end) == BARRIER)
2114     loop_end = PREV_INSN (loop_end);
2115
2116   /* Check in case insn has been deleted, search forward for first non
2117      deleted insn following it.  */
2118   while (INSN_DELETED_P (insn))
2119     insn = NEXT_INSN (insn);
2120
2121   /* Check for the case where insn is the last insn in the loop.  */
2122   if (insn == loop_end)
2123     return 0;
2124
2125   for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
2126     {
2127       if (GET_CODE (p) == JUMP_INSN)
2128         {
2129           target_insn = JUMP_LABEL (p);
2130
2131           /* Search from loop_start to insn, to see if one of them is
2132              the target_insn.  We can't use INSN_LUID comparisons here,
2133              since insn may not have an LUID entry.  */
2134           for (q = loop_start; q != insn; q = NEXT_INSN (q))
2135             if (q == target_insn)
2136               return 1;
2137         }
2138     }
2139
2140   return 0;
2141 }
2142
2143 /* Try to generate the simplest rtx for the expression
2144    (PLUS (MULT mult1 mult2) add1).  This is used to calculate the initial
2145    value of giv's.  */
2146
2147 static rtx
2148 fold_rtx_mult_add (mult1, mult2, add1, mode)
2149      rtx mult1, mult2, add1;
2150      enum machine_mode mode;
2151 {
2152   rtx temp, mult_res;
2153   rtx result;
2154
2155   /* The modes must all be the same.  This should always be true.  For now,
2156      check to make sure.  */
2157   if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
2158       || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
2159       || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
2160     abort ();
2161
2162   /* Ensure that if at least one of mult1/mult2 are constant, then mult2
2163      will be a constant.  */
2164   if (GET_CODE (mult1) == CONST_INT)
2165     {
2166       temp = mult2;
2167       mult2 = mult1;
2168       mult1 = temp;
2169     }
2170
2171   mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
2172   if (! mult_res)
2173     mult_res = gen_rtx (MULT, mode, mult1, mult2);
2174
2175   /* Again, put the constant second.  */
2176   if (GET_CODE (add1) == CONST_INT)
2177     {
2178       temp = add1;
2179       add1 = mult_res;
2180       mult_res = temp;
2181     }
2182
2183   result = simplify_binary_operation (PLUS, mode, add1, mult_res);
2184   if (! result)
2185     result = gen_rtx (PLUS, mode, add1, mult_res);
2186
2187   return result;
2188 }
2189
2190 /* Searches the list of induction struct's for the biv BL, to try to calculate
2191    the total increment value for one iteration of the loop as a constant.
2192
2193    Returns the increment value as an rtx, simplified as much as possible,
2194    if it can be calculated.  Otherwise, returns 0.  */
2195
2196 rtx
2197 biv_total_increment (bl, loop_start, loop_end)
2198      struct iv_class *bl;
2199      rtx loop_start, loop_end;
2200 {
2201   struct induction *v;
2202   rtx result;
2203
2204   /* For increment, must check every instruction that sets it.  Each
2205      instruction must be executed only once each time through the loop.
2206      To verify this, we check that the the insn is always executed, and that
2207      there are no backward branches after the insn that branch to before it.
2208      Also, the insn must have a mult_val of one (to make sure it really is
2209      an increment).  */
2210
2211   result = const0_rtx;
2212   for (v = bl->biv; v; v = v->next_iv)
2213     {
2214       if (v->always_computable && v->mult_val == const1_rtx
2215           && ! back_branch_in_range_p (v->insn, loop_start, loop_end))
2216         result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
2217       else
2218         return 0;
2219     }
2220
2221   return result;
2222 }
2223
2224 /* Determine the initial value of the iteration variable, and the amount
2225    that it is incremented each loop.  Use the tables constructed by
2226    the strength reduction pass to calculate these values.
2227
2228    Initial_value and/or increment are set to zero if their values could not
2229    be calculated.  */
2230
2231 static void
2232 iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
2233      rtx iteration_var, *initial_value, *increment;
2234      rtx loop_start, loop_end;
2235 {
2236   struct iv_class *bl;
2237   struct induction *v, *b;
2238
2239   /* Clear the result values, in case no answer can be found.  */
2240   *initial_value = 0;
2241   *increment = 0;
2242
2243   /* The iteration variable can be either a giv or a biv.  Check to see
2244      which it is, and compute the variable's initial value, and increment
2245      value if possible.  */
2246
2247   /* If this is a new register, can't handle it since we don't have any
2248      reg_iv_type entry for it.  */
2249   if (REGNO (iteration_var) >= max_reg_before_loop)
2250     {
2251       if (loop_dump_stream)
2252         fprintf (loop_dump_stream,
2253                  "Loop unrolling: No reg_iv_type entry for iteration var.\n");
2254       return;
2255     }
2256   /* Reject iteration variables larger than the host long size, since they
2257      could result in a number of iterations greater than the range of our
2258      `unsigned long' variable loop_n_iterations.  */
2259   else if (GET_MODE_BITSIZE (GET_MODE (iteration_var)) > HOST_BITS_PER_LONG)
2260     {
2261       if (loop_dump_stream)
2262         fprintf (loop_dump_stream,
2263                  "Loop unrolling: Iteration var rejected because mode larger than host long.\n");
2264       return;
2265     }
2266   else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
2267     {
2268       if (loop_dump_stream)
2269         fprintf (loop_dump_stream,
2270                  "Loop unrolling: Iteration var not an integer.\n");
2271       return;
2272     }
2273   else if (reg_iv_type[REGNO (iteration_var)] == BASIC_INDUCT)
2274     {
2275       /* Grab initial value, only useful if it is a constant.  */
2276       bl = reg_biv_class[REGNO (iteration_var)];
2277       *initial_value = bl->initial_value;
2278
2279       *increment = biv_total_increment (bl, loop_start, loop_end);
2280     }
2281   else if (reg_iv_type[REGNO (iteration_var)] == GENERAL_INDUCT)
2282     {
2283 #if 1
2284       /* ??? The code below does not work because the incorrect number of
2285          iterations is calculated when the biv is incremented after the giv
2286          is set (which is the usual case).  This can probably be accounted
2287          for by biasing the initial_value by subtracting the amount of the
2288          increment that occurs between the giv set and the giv test.  However,
2289          a giv as an iterator is very rare, so it does not seem worthwhile
2290          to handle this.  */
2291       /* ??? An example failure is: i = 6; do {;} while (i++ < 9).  */
2292       if (loop_dump_stream)
2293         fprintf (loop_dump_stream,
2294                  "Loop unrolling: Giv iterators are not handled.\n");
2295       return;
2296 #else
2297       /* Initial value is mult_val times the biv's initial value plus
2298          add_val.  Only useful if it is a constant.  */
2299       v = reg_iv_info[REGNO (iteration_var)];
2300       bl = reg_biv_class[REGNO (v->src_reg)];
2301       *initial_value = fold_rtx_mult_add (v->mult_val, bl->initial_value,
2302                                           v->add_val, v->mode);
2303
2304       /* Increment value is mult_val times the increment value of the biv.  */
2305
2306       *increment = biv_total_increment (bl, loop_start, loop_end);
2307       if (*increment)
2308         *increment = fold_rtx_mult_add (v->mult_val, *increment, const0_rtx,
2309                                         v->mode);
2310 #endif
2311     }
2312   else
2313     {
2314       if (loop_dump_stream)
2315         fprintf (loop_dump_stream,
2316                  "Loop unrolling: Not basic or general induction var.\n");
2317       return;
2318     }
2319 }
2320
2321 /* Calculate the approximate final value of the iteration variable
2322    which has an loop exit test with code COMPARISON_CODE and comparison value
2323    of COMPARISON_VALUE.  Also returns an indication of whether the comparison
2324    was signed or unsigned, and the direction of the comparison.  This info is
2325    needed to calculate the number of loop iterations.  */
2326
2327 static rtx
2328 approx_final_value (comparison_code, comparison_value, unsigned_p, compare_dir)
2329      enum rtx_code comparison_code;
2330      rtx comparison_value;
2331      int *unsigned_p;
2332      int *compare_dir;
2333 {
2334   /* Calculate the final value of the induction variable.
2335      The exact final value depends on the branch operator, and increment sign.
2336      This is only an approximate value.  It will be wrong if the iteration
2337      variable is not incremented by one each time through the loop, and
2338      approx final value - start value % increment != 0.  */
2339
2340   *unsigned_p = 0;
2341   switch (comparison_code)
2342     {
2343     case LEU:
2344       *unsigned_p = 1;
2345     case LE:
2346       *compare_dir = 1;
2347       return plus_constant (comparison_value, 1);
2348     case GEU:
2349       *unsigned_p = 1;
2350     case GE:
2351       *compare_dir = -1;
2352       return plus_constant (comparison_value, -1);
2353     case EQ:
2354       /* Can not calculate a final value for this case.  */
2355       *compare_dir = 0;
2356       return 0;
2357     case LTU:
2358       *unsigned_p = 1;
2359     case LT:
2360       *compare_dir = 1;
2361       return comparison_value;
2362       break;
2363     case GTU:
2364       *unsigned_p = 1;
2365     case GT:
2366       *compare_dir = -1;
2367       return comparison_value;
2368     case NE:
2369       *compare_dir = 0;
2370       return comparison_value;
2371     default:
2372       abort ();
2373     }
2374 }
2375
2376 /* For each biv and giv, determine whether it can be safely split into
2377    a different variable for each unrolled copy of the loop body.  If it
2378    is safe to split, then indicate that by saving some useful info
2379    in the splittable_regs array.
2380
2381    If the loop is being completely unrolled, then splittable_regs will hold
2382    the current value of the induction variable while the loop is unrolled.
2383    It must be set to the initial value of the induction variable here.
2384    Otherwise, splittable_regs will hold the difference between the current
2385    value of the induction variable and the value the induction variable had
2386    at the top of the loop.  It must be set to the value 0 here.
2387
2388    Returns the total number of instructions that set registers that are
2389    splittable.  */
2390
2391 /* ?? If the loop is only unrolled twice, then most of the restrictions to
2392    constant values are unnecessary, since we can easily calculate increment
2393    values in this case even if nothing is constant.  The increment value
2394    should not involve a multiply however.  */
2395
2396 /* ?? Even if the biv/giv increment values aren't constant, it may still
2397    be beneficial to split the variable if the loop is only unrolled a few
2398    times, since multiplies by small integers (1,2,3,4) are very cheap.  */
2399
2400 static int
2401 find_splittable_regs (unroll_type, loop_start, loop_end, end_insert_before,
2402                      unroll_number)
2403      enum unroll_types unroll_type;
2404      rtx loop_start, loop_end;
2405      rtx end_insert_before;
2406      int unroll_number;
2407 {
2408   struct iv_class *bl;
2409   struct induction *v;
2410   rtx increment, tem;
2411   rtx biv_final_value;
2412   int biv_splittable;
2413   int result = 0;
2414
2415   for (bl = loop_iv_list; bl; bl = bl->next)
2416     {
2417       /* Biv_total_increment must return a constant value,
2418          otherwise we can not calculate the split values.  */
2419
2420       increment = biv_total_increment (bl, loop_start, loop_end);
2421       if (! increment || GET_CODE (increment) != CONST_INT)
2422         continue;
2423
2424       /* The loop must be unrolled completely, or else have a known number
2425          of iterations and only one exit, or else the biv must be dead
2426          outside the loop, or else the final value must be known.  Otherwise,
2427          it is unsafe to split the biv since it may not have the proper
2428          value on loop exit.  */
2429
2430       /* loop_number_exit_count is non-zero if the loop has an exit other than
2431          a fall through at the end.  */
2432
2433       biv_splittable = 1;
2434       biv_final_value = 0;
2435       if (unroll_type != UNROLL_COMPLETELY
2436           && (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2437               || unroll_type == UNROLL_NAIVE)
2438           && (uid_luid[regno_last_uid[bl->regno]] >= INSN_LUID (loop_end)
2439               || ! bl->init_insn
2440               || INSN_UID (bl->init_insn) >= max_uid_for_loop
2441               || (uid_luid[regno_first_uid[bl->regno]]
2442                   < INSN_LUID (bl->init_insn))
2443               || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
2444           && ! (biv_final_value = final_biv_value (bl, loop_start, loop_end)))
2445         biv_splittable = 0;
2446
2447       /* If any of the insns setting the BIV don't do so with a simple
2448          PLUS, we don't know how to split it.  */
2449       for (v = bl->biv; biv_splittable && v; v = v->next_iv)
2450         if ((tem = single_set (v->insn)) == 0
2451             || GET_CODE (SET_DEST (tem)) != REG
2452             || REGNO (SET_DEST (tem)) != bl->regno
2453             || GET_CODE (SET_SRC (tem)) != PLUS)
2454           biv_splittable = 0;
2455
2456       /* If final value is non-zero, then must emit an instruction which sets
2457          the value of the biv to the proper value.  This is done after
2458          handling all of the givs, since some of them may need to use the
2459          biv's value in their initialization code.  */
2460
2461       /* This biv is splittable.  If completely unrolling the loop, save
2462          the biv's initial value.  Otherwise, save the constant zero.  */
2463
2464       if (biv_splittable == 1)
2465         {
2466           if (unroll_type == UNROLL_COMPLETELY)
2467             {
2468               /* If the initial value of the biv is itself (i.e. it is too
2469                  complicated for strength_reduce to compute), or is a hard
2470                  register, or it isn't invariant, then we must create a new
2471                  pseudo reg to hold the initial value of the biv.  */
2472
2473               if (GET_CODE (bl->initial_value) == REG
2474                   && (REGNO (bl->initial_value) == bl->regno
2475                       || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER
2476                       || ! invariant_p (bl->initial_value)))
2477                 {
2478                   rtx tem = gen_reg_rtx (bl->biv->mode);
2479
2480                   emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2481                                     loop_start);
2482
2483                   if (loop_dump_stream)
2484                     fprintf (loop_dump_stream, "Biv %d initial value remapped to %d.\n",
2485                              bl->regno, REGNO (tem));
2486
2487                   splittable_regs[bl->regno] = tem;
2488                 }
2489               else
2490                 splittable_regs[bl->regno] = bl->initial_value;
2491             }
2492           else
2493             splittable_regs[bl->regno] = const0_rtx;
2494
2495           /* Save the number of instructions that modify the biv, so that
2496              we can treat the last one specially.  */
2497
2498           splittable_regs_updates[bl->regno] = bl->biv_count;
2499           result += bl->biv_count;
2500
2501           if (loop_dump_stream)
2502             fprintf (loop_dump_stream,
2503                      "Biv %d safe to split.\n", bl->regno);
2504         }
2505
2506       /* Check every giv that depends on this biv to see whether it is
2507          splittable also.  Even if the biv isn't splittable, givs which
2508          depend on it may be splittable if the biv is live outside the
2509          loop, and the givs aren't.  */
2510
2511       result += find_splittable_givs (bl, unroll_type, loop_start, loop_end,
2512                                      increment, unroll_number);
2513
2514       /* If final value is non-zero, then must emit an instruction which sets
2515          the value of the biv to the proper value.  This is done after
2516          handling all of the givs, since some of them may need to use the
2517          biv's value in their initialization code.  */
2518       if (biv_final_value)
2519         {
2520           /* If the loop has multiple exits, emit the insns before the
2521              loop to ensure that it will always be executed no matter
2522              how the loop exits.  Otherwise emit the insn after the loop,
2523              since this is slightly more efficient.  */
2524           if (! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
2525             emit_insn_before (gen_move_insn (bl->biv->src_reg,
2526                                              biv_final_value),
2527                               end_insert_before);
2528           else
2529             {
2530               /* Create a new register to hold the value of the biv, and then
2531                  set the biv to its final value before the loop start.  The biv
2532                  is set to its final value before loop start to ensure that
2533                  this insn will always be executed, no matter how the loop
2534                  exits.  */
2535               rtx tem = gen_reg_rtx (bl->biv->mode);
2536               emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2537                                 loop_start);
2538               emit_insn_before (gen_move_insn (bl->biv->src_reg,
2539                                                biv_final_value),
2540                                 loop_start);
2541
2542               if (loop_dump_stream)
2543                 fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
2544                          REGNO (bl->biv->src_reg), REGNO (tem));
2545
2546               /* Set up the mapping from the original biv register to the new
2547                  register.  */
2548               bl->biv->src_reg = tem;
2549             }
2550         }
2551     }
2552   return result;
2553 }
2554
2555 /* Return 1 if the first and last unrolled copy of the address giv V is valid
2556    for the instruction that is using it.  Do not make any changes to that
2557    instruction.  */
2558
2559 static int
2560 verify_addresses (v, giv_inc, unroll_number)
2561      struct induction *v;
2562      rtx giv_inc;
2563      int unroll_number;
2564 {
2565   int ret = 1;
2566   rtx orig_addr = *v->location;
2567   rtx last_addr = plus_constant (v->dest_reg,
2568                                  INTVAL (giv_inc) * (unroll_number - 1));
2569
2570   /* First check to see if either address would fail.  */
2571   if (! validate_change (v->insn, v->location, v->dest_reg, 0)
2572       || ! validate_change (v->insn, v->location, last_addr, 0))
2573     ret = 0;
2574
2575   /* Now put things back the way they were before.  This will always
2576    succeed.  */
2577   validate_change (v->insn, v->location, orig_addr, 0);
2578
2579   return ret;
2580 }
2581
2582 /* For every giv based on the biv BL, check to determine whether it is
2583    splittable.  This is a subroutine to find_splittable_regs ().
2584
2585    Return the number of instructions that set splittable registers.  */
2586
2587 static int
2588 find_splittable_givs (bl, unroll_type, loop_start, loop_end, increment,
2589                       unroll_number)
2590      struct iv_class *bl;
2591      enum unroll_types unroll_type;
2592      rtx loop_start, loop_end;
2593      rtx increment;
2594      int unroll_number;
2595 {
2596   struct induction *v, *v2;
2597   rtx final_value;
2598   rtx tem;
2599   int result = 0;
2600
2601   /* Scan the list of givs, and set the same_insn field when there are
2602      multiple identical givs in the same insn.  */
2603   for (v = bl->giv; v; v = v->next_iv)
2604     for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2605       if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg)
2606           && ! v2->same_insn)
2607         v2->same_insn = v;
2608
2609   for (v = bl->giv; v; v = v->next_iv)
2610     {
2611       rtx giv_inc, value;
2612
2613       /* Only split the giv if it has already been reduced, or if the loop is
2614          being completely unrolled.  */
2615       if (unroll_type != UNROLL_COMPLETELY && v->ignore)
2616         continue;
2617
2618       /* The giv can be split if the insn that sets the giv is executed once
2619          and only once on every iteration of the loop.  */
2620       /* An address giv can always be split.  v->insn is just a use not a set,
2621          and hence it does not matter whether it is always executed.  All that
2622          matters is that all the biv increments are always executed, and we
2623          won't reach here if they aren't.  */
2624       if (v->giv_type != DEST_ADDR
2625           && (! v->always_computable
2626               || back_branch_in_range_p (v->insn, loop_start, loop_end)))
2627         continue;
2628
2629       /* The giv increment value must be a constant.  */
2630       giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2631                                    v->mode);
2632       if (! giv_inc || GET_CODE (giv_inc) != CONST_INT)
2633         continue;
2634
2635       /* The loop must be unrolled completely, or else have a known number of
2636          iterations and only one exit, or else the giv must be dead outside
2637          the loop, or else the final value of the giv must be known.
2638          Otherwise, it is not safe to split the giv since it may not have the
2639          proper value on loop exit.  */
2640
2641       /* The used outside loop test will fail for DEST_ADDR givs.  They are
2642          never used outside the loop anyways, so it is always safe to split a
2643          DEST_ADDR giv.  */
2644
2645       final_value = 0;
2646       if (unroll_type != UNROLL_COMPLETELY
2647           && (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2648               || unroll_type == UNROLL_NAIVE)
2649           && v->giv_type != DEST_ADDR
2650           && ((regno_first_uid[REGNO (v->dest_reg)] != INSN_UID (v->insn)
2651                /* Check for the case where the pseudo is set by a shift/add
2652                   sequence, in which case the first insn setting the pseudo
2653                   is the first insn of the shift/add sequence.  */
2654                && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX))
2655                    || (regno_first_uid[REGNO (v->dest_reg)]
2656                        != INSN_UID (XEXP (tem, 0)))))
2657               /* Line above always fails if INSN was moved by loop opt.  */
2658               || (uid_luid[regno_last_uid[REGNO (v->dest_reg)]]
2659                   >= INSN_LUID (loop_end)))
2660           && ! (final_value = v->final_value))
2661         continue;
2662
2663 #if 0
2664       /* Currently, non-reduced/final-value givs are never split.  */
2665       /* Should emit insns after the loop if possible, as the biv final value
2666          code below does.  */
2667
2668       /* If the final value is non-zero, and the giv has not been reduced,
2669          then must emit an instruction to set the final value.  */
2670       if (final_value && !v->new_reg)
2671         {
2672           /* Create a new register to hold the value of the giv, and then set
2673              the giv to its final value before the loop start.  The giv is set
2674              to its final value before loop start to ensure that this insn
2675              will always be executed, no matter how we exit.  */
2676           tem = gen_reg_rtx (v->mode);
2677           emit_insn_before (gen_move_insn (tem, v->dest_reg), loop_start);
2678           emit_insn_before (gen_move_insn (v->dest_reg, final_value),
2679                             loop_start);
2680
2681           if (loop_dump_stream)
2682             fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
2683                      REGNO (v->dest_reg), REGNO (tem));
2684
2685           v->src_reg = tem;
2686         }
2687 #endif
2688
2689       /* This giv is splittable.  If completely unrolling the loop, save the
2690          giv's initial value.  Otherwise, save the constant zero for it.  */
2691
2692       if (unroll_type == UNROLL_COMPLETELY)
2693         {
2694           /* It is not safe to use bl->initial_value here, because it may not
2695              be invariant.  It is safe to use the initial value stored in
2696              the splittable_regs array if it is set.  In rare cases, it won't
2697              be set, so then we do exactly the same thing as
2698              find_splittable_regs does to get a safe value.  */
2699           rtx biv_initial_value;
2700
2701           if (splittable_regs[bl->regno])
2702             biv_initial_value = splittable_regs[bl->regno];
2703           else if (GET_CODE (bl->initial_value) != REG
2704                    || (REGNO (bl->initial_value) != bl->regno
2705                        && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER))
2706             biv_initial_value = bl->initial_value;
2707           else
2708             {
2709               rtx tem = gen_reg_rtx (bl->biv->mode);
2710
2711               emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2712                                 loop_start);
2713               biv_initial_value = tem;
2714             }
2715           value = fold_rtx_mult_add (v->mult_val, biv_initial_value,
2716                                      v->add_val, v->mode);
2717         }
2718       else
2719         value = const0_rtx;
2720
2721       if (v->new_reg)
2722         {
2723           /* If a giv was combined with another giv, then we can only split
2724              this giv if the giv it was combined with was reduced.  This
2725              is because the value of v->new_reg is meaningless in this
2726              case.  */
2727           if (v->same && ! v->same->new_reg)
2728             {
2729               if (loop_dump_stream)
2730                 fprintf (loop_dump_stream,
2731                          "giv combined with unreduced giv not split.\n");
2732               continue;
2733             }
2734           /* If the giv is an address destination, it could be something other
2735              than a simple register, these have to be treated differently.  */
2736           else if (v->giv_type == DEST_REG)
2737             {
2738               /* If value is not a constant, register, or register plus
2739                  constant, then compute its value into a register before
2740                  loop start.  This prevents invalid rtx sharing, and should
2741                  generate better code.  We can use bl->initial_value here
2742                  instead of splittable_regs[bl->regno] because this code
2743                  is going before the loop start.  */
2744               if (unroll_type == UNROLL_COMPLETELY
2745                   && GET_CODE (value) != CONST_INT
2746                   && GET_CODE (value) != REG
2747                   && (GET_CODE (value) != PLUS
2748                       || GET_CODE (XEXP (value, 0)) != REG
2749                       || GET_CODE (XEXP (value, 1)) != CONST_INT))
2750                 {
2751                   rtx tem = gen_reg_rtx (v->mode);
2752                   emit_iv_add_mult (bl->initial_value, v->mult_val,
2753                                     v->add_val, tem, loop_start);
2754                   value = tem;
2755                 }
2756
2757               splittable_regs[REGNO (v->new_reg)] = value;
2758             }
2759           else
2760             {
2761               /* Splitting address givs is useful since it will often allow us
2762                  to eliminate some increment insns for the base giv as
2763                  unnecessary.  */
2764
2765               /* If the addr giv is combined with a dest_reg giv, then all
2766                  references to that dest reg will be remapped, which is NOT
2767                  what we want for split addr regs. We always create a new
2768                  register for the split addr giv, just to be safe.  */
2769
2770               /* ??? If there are multiple address givs which have been
2771                  combined with the same dest_reg giv, then we may only need
2772                  one new register for them.  Pulling out constants below will
2773                  catch some of the common cases of this.  Currently, I leave
2774                  the work of simplifying multiple address givs to the
2775                  following cse pass.  */
2776
2777               /* As a special case, if we have multiple identical address givs
2778                  within a single instruction, then we do use a single pseudo
2779                  reg for both.  This is necessary in case one is a match_dup
2780                  of the other.  */
2781
2782               v->const_adjust = 0;
2783
2784               if (v->same_insn)
2785                 {
2786                   v->dest_reg = v->same_insn->dest_reg;
2787                   if (loop_dump_stream)
2788                     fprintf (loop_dump_stream,
2789                              "Sharing address givs in insn %d\n",
2790                              INSN_UID (v->insn));
2791                 }
2792               else if (unroll_type != UNROLL_COMPLETELY)
2793                 {
2794                   /* If not completely unrolling the loop, then create a new
2795                      register to hold the split value of the DEST_ADDR giv.
2796                      Emit insn to initialize its value before loop start.  */
2797                   tem = gen_reg_rtx (v->mode);
2798
2799                   /* If the address giv has a constant in its new_reg value,
2800                      then this constant can be pulled out and put in value,
2801                      instead of being part of the initialization code.  */
2802
2803                   if (GET_CODE (v->new_reg) == PLUS
2804                       && GET_CODE (XEXP (v->new_reg, 1)) == CONST_INT)
2805                     {
2806                       v->dest_reg
2807                         = plus_constant (tem, INTVAL (XEXP (v->new_reg,1)));
2808
2809                       /* Only succeed if this will give valid addresses.
2810                          Try to validate both the first and the last
2811                          address resulting from loop unrolling, if
2812                          one fails, then can't do const elim here.  */
2813                       if (verify_addresses (v, giv_inc, unroll_number))
2814                         {
2815                           /* Save the negative of the eliminated const, so
2816                              that we can calculate the dest_reg's increment
2817                              value later.  */
2818                           v->const_adjust = - INTVAL (XEXP (v->new_reg, 1));
2819
2820                           v->new_reg = XEXP (v->new_reg, 0);
2821                           if (loop_dump_stream)
2822                             fprintf (loop_dump_stream,
2823                                      "Eliminating constant from giv %d\n",
2824                                      REGNO (tem));
2825                         }
2826                       else
2827                         v->dest_reg = tem;
2828                     }
2829                   else
2830                     v->dest_reg = tem;
2831
2832                   /* If the address hasn't been checked for validity yet, do so
2833                      now, and fail completely if either the first or the last
2834                      unrolled copy of the address is not a valid address
2835                      for the instruction that uses it.  */
2836                   if (v->dest_reg == tem
2837                       && ! verify_addresses (v, giv_inc, unroll_number))
2838                     {
2839                       if (loop_dump_stream)
2840                         fprintf (loop_dump_stream,
2841                                  "Invalid address for giv at insn %d\n",
2842                                  INSN_UID (v->insn));
2843                       continue;
2844                     }
2845
2846                   /* To initialize the new register, just move the value of
2847                      new_reg into it.  This is not guaranteed to give a valid
2848                      instruction on machines with complex addressing modes.
2849                      If we can't recognize it, then delete it and emit insns
2850                      to calculate the value from scratch.  */
2851                   emit_insn_before (gen_rtx (SET, VOIDmode, tem,
2852                                              copy_rtx (v->new_reg)),
2853                                     loop_start);
2854                   if (recog_memoized (PREV_INSN (loop_start)) < 0)
2855                     {
2856                       rtx sequence, ret;
2857
2858                       /* We can't use bl->initial_value to compute the initial
2859                          value, because the loop may have been preconditioned.
2860                          We must calculate it from NEW_REG.  Try using
2861                          force_operand instead of emit_iv_add_mult.  */
2862                       delete_insn (PREV_INSN (loop_start));
2863
2864                       start_sequence ();
2865                       ret = force_operand (v->new_reg, tem);
2866                       if (ret != tem)
2867                         emit_move_insn (tem, ret);
2868                       sequence = gen_sequence ();
2869                       end_sequence ();
2870                       emit_insn_before (sequence, loop_start);
2871
2872                       if (loop_dump_stream)
2873                         fprintf (loop_dump_stream,
2874                                  "Invalid init insn, rewritten.\n");
2875                     }
2876                 }
2877               else
2878                 {
2879                   v->dest_reg = value;
2880
2881                   /* Check the resulting address for validity, and fail
2882                      if the resulting address would be invalid.  */
2883                   if (! verify_addresses (v, giv_inc, unroll_number))
2884                     {
2885                       if (loop_dump_stream)
2886                         fprintf (loop_dump_stream,
2887                                  "Invalid address for giv at insn %d\n",
2888                                  INSN_UID (v->insn));
2889                       continue;
2890                     }
2891                 }
2892
2893               /* Store the value of dest_reg into the insn.  This sharing
2894                  will not be a problem as this insn will always be copied
2895                  later.  */
2896
2897               *v->location = v->dest_reg;
2898
2899               /* If this address giv is combined with a dest reg giv, then
2900                  save the base giv's induction pointer so that we will be
2901                  able to handle this address giv properly.  The base giv
2902                  itself does not have to be splittable.  */
2903
2904               if (v->same && v->same->giv_type == DEST_REG)
2905                 addr_combined_regs[REGNO (v->same->new_reg)] = v->same;
2906
2907               if (GET_CODE (v->new_reg) == REG)
2908                 {
2909                   /* This giv maybe hasn't been combined with any others.
2910                      Make sure that it's giv is marked as splittable here.  */
2911
2912                   splittable_regs[REGNO (v->new_reg)] = value;
2913
2914                   /* Make it appear to depend upon itself, so that the
2915                      giv will be properly split in the main loop above.  */
2916                   if (! v->same)
2917                     {
2918                       v->same = v;
2919                       addr_combined_regs[REGNO (v->new_reg)] = v;
2920                     }
2921                 }
2922
2923               if (loop_dump_stream)
2924                 fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n");
2925             }
2926         }
2927       else
2928         {
2929 #if 0
2930           /* Currently, unreduced giv's can't be split.  This is not too much
2931              of a problem since unreduced giv's are not live across loop
2932              iterations anyways.  When unrolling a loop completely though,
2933              it makes sense to reduce&split givs when possible, as this will
2934              result in simpler instructions, and will not require that a reg
2935              be live across loop iterations.  */
2936
2937           splittable_regs[REGNO (v->dest_reg)] = value;
2938           fprintf (stderr, "Giv %d at insn %d not reduced\n",
2939                    REGNO (v->dest_reg), INSN_UID (v->insn));
2940 #else
2941           continue;
2942 #endif
2943         }
2944
2945       /* Givs are only updated once by definition.  Mark it so if this is
2946          a splittable register.  Don't need to do anything for address givs
2947          where this may not be a register.  */
2948
2949       if (GET_CODE (v->new_reg) == REG)
2950         splittable_regs_updates[REGNO (v->new_reg)] = 1;
2951
2952       result++;
2953
2954       if (loop_dump_stream)
2955         {
2956           int regnum;
2957
2958           if (GET_CODE (v->dest_reg) == CONST_INT)
2959             regnum = -1;
2960           else if (GET_CODE (v->dest_reg) != REG)
2961             regnum = REGNO (XEXP (v->dest_reg, 0));
2962           else
2963             regnum = REGNO (v->dest_reg);
2964           fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
2965                    regnum, INSN_UID (v->insn));
2966         }
2967     }
2968
2969   return result;
2970 }
2971 \f
2972 /* Try to prove that the register is dead after the loop exits.  Trace every
2973    loop exit looking for an insn that will always be executed, which sets
2974    the register to some value, and appears before the first use of the register
2975    is found.  If successful, then return 1, otherwise return 0.  */
2976
2977 /* ?? Could be made more intelligent in the handling of jumps, so that
2978    it can search past if statements and other similar structures.  */
2979
2980 static int
2981 reg_dead_after_loop (reg, loop_start, loop_end)
2982      rtx reg, loop_start, loop_end;
2983 {
2984   rtx insn, label;
2985   enum rtx_code code;
2986   int jump_count = 0;
2987   int label_count = 0;
2988   int this_loop_num = uid_loop_num[INSN_UID (loop_start)];
2989
2990   /* In addition to checking all exits of this loop, we must also check
2991      all exits of inner nested loops that would exit this loop.  We don't
2992      have any way to identify those, so we just give up if there are any
2993      such inner loop exits.  */
2994
2995   for (label = loop_number_exit_labels[this_loop_num]; label;
2996        label = LABEL_NEXTREF (label))
2997     label_count++;
2998
2999   if (label_count != loop_number_exit_count[this_loop_num])
3000     return 0;
3001
3002   /* HACK: Must also search the loop fall through exit, create a label_ref
3003      here which points to the loop_end, and append the loop_number_exit_labels
3004      list to it.  */
3005   label = gen_rtx (LABEL_REF, VOIDmode, loop_end);
3006   LABEL_NEXTREF (label) = loop_number_exit_labels[this_loop_num];
3007
3008   for ( ; label; label = LABEL_NEXTREF (label))
3009     {
3010       /* Succeed if find an insn which sets the biv or if reach end of
3011          function.  Fail if find an insn that uses the biv, or if come to
3012          a conditional jump.  */
3013
3014       insn = NEXT_INSN (XEXP (label, 0));
3015       while (insn)
3016         {
3017           code = GET_CODE (insn);
3018           if (GET_RTX_CLASS (code) == 'i')
3019             {
3020               rtx set;
3021
3022               if (reg_referenced_p (reg, PATTERN (insn)))
3023                 return 0;
3024
3025               set = single_set (insn);
3026               if (set && rtx_equal_p (SET_DEST (set), reg))
3027                 break;
3028             }
3029
3030           if (code == JUMP_INSN)
3031             {
3032               if (GET_CODE (PATTERN (insn)) == RETURN)
3033                 break;
3034               else if (! simplejump_p (insn)
3035                        /* Prevent infinite loop following infinite loops.  */
3036                        || jump_count++ > 20)
3037                 return 0;
3038               else
3039                 insn = JUMP_LABEL (insn);
3040             }
3041
3042           insn = NEXT_INSN (insn);
3043         }
3044     }
3045
3046   /* Success, the register is dead on all loop exits.  */
3047   return 1;
3048 }
3049
3050 /* Try to calculate the final value of the biv, the value it will have at
3051    the end of the loop.  If we can do it, return that value.  */
3052
3053 rtx
3054 final_biv_value (bl, loop_start, loop_end)
3055      struct iv_class *bl;
3056      rtx loop_start, loop_end;
3057 {
3058   rtx increment, tem;
3059
3060   /* ??? This only works for MODE_INT biv's.  Reject all others for now.  */
3061
3062   if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
3063     return 0;
3064
3065   /* The final value for reversed bivs must be calculated differently than
3066       for ordinary bivs.  In this case, there is already an insn after the
3067      loop which sets this biv's final value (if necessary), and there are
3068      no other loop exits, so we can return any value.  */
3069   if (bl->reversed)
3070     {
3071       if (loop_dump_stream)
3072         fprintf (loop_dump_stream,
3073                  "Final biv value for %d, reversed biv.\n", bl->regno);
3074
3075       return const0_rtx;
3076     }
3077
3078   /* Try to calculate the final value as initial value + (number of iterations
3079      * increment).  For this to work, increment must be invariant, the only
3080      exit from the loop must be the fall through at the bottom (otherwise
3081      it may not have its final value when the loop exits), and the initial
3082      value of the biv must be invariant.  */
3083
3084   if (loop_n_iterations != 0
3085       && ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
3086       && invariant_p (bl->initial_value))
3087     {
3088       increment = biv_total_increment (bl, loop_start, loop_end);
3089
3090       if (increment && invariant_p (increment))
3091         {
3092           /* Can calculate the loop exit value, emit insns after loop
3093              end to calculate this value into a temporary register in
3094              case it is needed later.  */
3095
3096           tem = gen_reg_rtx (bl->biv->mode);
3097           /* Make sure loop_end is not the last insn.  */
3098           if (NEXT_INSN (loop_end) == 0)
3099             emit_note_after (NOTE_INSN_DELETED, loop_end);
3100           emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
3101                             bl->initial_value, tem, NEXT_INSN (loop_end));
3102
3103           if (loop_dump_stream)
3104             fprintf (loop_dump_stream,
3105                      "Final biv value for %d, calculated.\n", bl->regno);
3106
3107           return tem;
3108         }
3109     }
3110
3111   /* Check to see if the biv is dead at all loop exits.  */
3112   if (reg_dead_after_loop (bl->biv->src_reg, loop_start, loop_end))
3113     {
3114       if (loop_dump_stream)
3115         fprintf (loop_dump_stream,
3116                  "Final biv value for %d, biv dead after loop exit.\n",
3117                  bl->regno);
3118
3119       return const0_rtx;
3120     }
3121
3122   return 0;
3123 }
3124
3125 /* Try to calculate the final value of the giv, the value it will have at
3126    the end of the loop.  If we can do it, return that value.  */
3127
3128 rtx
3129 final_giv_value (v, loop_start, loop_end)
3130      struct induction *v;
3131      rtx loop_start, loop_end;
3132 {
3133   struct iv_class *bl;
3134   rtx insn;
3135   rtx increment, tem;
3136   rtx insert_before, seq;
3137
3138   bl = reg_biv_class[REGNO (v->src_reg)];
3139
3140   /* The final value for givs which depend on reversed bivs must be calculated
3141      differently than for ordinary givs.  In this case, there is already an
3142      insn after the loop which sets this giv's final value (if necessary),
3143      and there are no other loop exits, so we can return any value.  */
3144   if (bl->reversed)
3145     {
3146       if (loop_dump_stream)
3147         fprintf (loop_dump_stream,
3148                  "Final giv value for %d, depends on reversed biv\n",
3149                  REGNO (v->dest_reg));
3150       return const0_rtx;
3151     }
3152
3153   /* Try to calculate the final value as a function of the biv it depends
3154      upon.  The only exit from the loop must be the fall through at the bottom
3155      (otherwise it may not have its final value when the loop exits).  */
3156
3157   /* ??? Can calculate the final giv value by subtracting off the
3158      extra biv increments times the giv's mult_val.  The loop must have
3159      only one exit for this to work, but the loop iterations does not need
3160      to be known.  */
3161
3162   if (loop_n_iterations != 0
3163       && ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
3164     {
3165       /* ?? It is tempting to use the biv's value here since these insns will
3166          be put after the loop, and hence the biv will have its final value
3167          then.  However, this fails if the biv is subsequently eliminated.
3168          Perhaps determine whether biv's are eliminable before trying to
3169          determine whether giv's are replaceable so that we can use the
3170          biv value here if it is not eliminable.  */
3171
3172       increment = biv_total_increment (bl, loop_start, loop_end);
3173
3174       if (increment && invariant_p (increment))
3175         {
3176           /* Can calculate the loop exit value of its biv as
3177              (loop_n_iterations * increment) + initial_value */
3178
3179           /* The loop exit value of the giv is then
3180              (final_biv_value - extra increments) * mult_val + add_val.
3181              The extra increments are any increments to the biv which
3182              occur in the loop after the giv's value is calculated.
3183              We must search from the insn that sets the giv to the end
3184              of the loop to calculate this value.  */
3185
3186           insert_before = NEXT_INSN (loop_end);
3187
3188           /* Put the final biv value in tem.  */
3189           tem = gen_reg_rtx (bl->biv->mode);
3190           emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
3191                             bl->initial_value, tem, insert_before);
3192
3193           /* Subtract off extra increments as we find them.  */
3194           for (insn = NEXT_INSN (v->insn); insn != loop_end;
3195                insn = NEXT_INSN (insn))
3196             {
3197               struct induction *biv;
3198
3199               for (biv = bl->biv; biv; biv = biv->next_iv)
3200                 if (biv->insn == insn)
3201                   {
3202                     start_sequence ();
3203                     tem = expand_binop (GET_MODE (tem), sub_optab, tem,
3204                                         biv->add_val, NULL_RTX, 0,
3205                                         OPTAB_LIB_WIDEN);
3206                     seq = gen_sequence ();
3207                     end_sequence ();
3208                     emit_insn_before (seq, insert_before);
3209                   }
3210             }
3211
3212           /* Now calculate the giv's final value.  */
3213           emit_iv_add_mult (tem, v->mult_val, v->add_val, tem,
3214                             insert_before);
3215
3216           if (loop_dump_stream)
3217             fprintf (loop_dump_stream,
3218                      "Final giv value for %d, calc from biv's value.\n",
3219                      REGNO (v->dest_reg));
3220
3221           return tem;
3222         }
3223     }
3224
3225   /* Replaceable giv's should never reach here.  */
3226   if (v->replaceable)
3227     abort ();
3228
3229   /* Check to see if the biv is dead at all loop exits.  */
3230   if (reg_dead_after_loop (v->dest_reg, loop_start, loop_end))
3231     {
3232       if (loop_dump_stream)
3233         fprintf (loop_dump_stream,
3234                  "Final giv value for %d, giv dead after loop exit.\n",
3235                  REGNO (v->dest_reg));
3236
3237       return const0_rtx;
3238     }
3239
3240   return 0;
3241 }
3242
3243
3244 /* Calculate the number of loop iterations.  Returns the exact number of loop
3245    iterations if it can be calculated, otherwise returns zero.  */
3246
3247 unsigned HOST_WIDE_INT
3248 loop_iterations (loop_start, loop_end)
3249      rtx loop_start, loop_end;
3250 {
3251   rtx comparison, comparison_value;
3252   rtx iteration_var, initial_value, increment, final_value;
3253   enum rtx_code comparison_code;
3254   HOST_WIDE_INT i;
3255   int increment_dir;
3256   int unsigned_compare, compare_dir, final_larger;
3257   unsigned long tempu;
3258   rtx last_loop_insn;
3259
3260   /* First find the iteration variable.  If the last insn is a conditional
3261      branch, and the insn before tests a register value, make that the
3262      iteration variable.  */
3263
3264   loop_initial_value = 0;
3265   loop_increment = 0;
3266   loop_final_value = 0;
3267   loop_iteration_var = 0;
3268
3269   /* We used to use pren_nonnote_insn here, but that fails because it might
3270      accidentally get the branch for a contained loop if the branch for this
3271      loop was deleted.  We can only trust branches immediately before the
3272      loop_end.  */
3273   last_loop_insn = PREV_INSN (loop_end);
3274
3275   comparison = get_condition_for_loop (last_loop_insn);
3276   if (comparison == 0)
3277     {
3278       if (loop_dump_stream)
3279         fprintf (loop_dump_stream,
3280                  "Loop unrolling: No final conditional branch found.\n");
3281       return 0;
3282     }
3283
3284   /* ??? Get_condition may switch position of induction variable and
3285      invariant register when it canonicalizes the comparison.  */
3286
3287   comparison_code = GET_CODE (comparison);
3288   iteration_var = XEXP (comparison, 0);
3289   comparison_value = XEXP (comparison, 1);
3290
3291   if (GET_CODE (iteration_var) != REG)
3292     {
3293       if (loop_dump_stream)
3294         fprintf (loop_dump_stream,
3295                  "Loop unrolling: Comparison not against register.\n");
3296       return 0;
3297     }
3298
3299   /* Loop iterations is always called before any new registers are created
3300      now, so this should never occur.  */
3301
3302   if (REGNO (iteration_var) >= max_reg_before_loop)
3303     abort ();
3304
3305   iteration_info (iteration_var, &initial_value, &increment,
3306                   loop_start, loop_end);
3307   if (initial_value == 0)
3308     /* iteration_info already printed a message.  */
3309     return 0;
3310
3311   /* If the comparison value is an invariant register, then try to find
3312      its value from the insns before the start of the loop.  */
3313
3314   if (GET_CODE (comparison_value) == REG && invariant_p (comparison_value))
3315     {
3316       rtx insn, set;
3317
3318       for (insn = PREV_INSN (loop_start); insn ; insn = PREV_INSN (insn))
3319         {
3320           if (GET_CODE (insn) == CODE_LABEL)
3321             break;
3322
3323           else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
3324                    && reg_set_p (comparison_value, insn))
3325             {
3326               /* We found the last insn before the loop that sets the register.
3327                  If it sets the entire register, and has a REG_EQUAL note,
3328                  then use the value of the REG_EQUAL note.  */
3329               if ((set = single_set (insn))
3330                   && (SET_DEST (set) == comparison_value))
3331                 {
3332                   rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3333
3334                   /* Only use the REG_EQUAL note if it is a constant.
3335                      Other things, divide in particular, will cause
3336                      problems later if we use them.  */
3337                   if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
3338                       && CONSTANT_P (XEXP (note, 0)))
3339                     comparison_value = XEXP (note, 0);
3340                 }
3341               break;
3342             }
3343         }
3344     }
3345
3346   final_value = approx_final_value (comparison_code, comparison_value,
3347                                     &unsigned_compare, &compare_dir);
3348
3349   /* Save the calculated values describing this loop's bounds, in case
3350      precondition_loop_p will need them later.  These values can not be
3351      recalculated inside precondition_loop_p because strength reduction
3352      optimizations may obscure the loop's structure.  */
3353
3354   loop_iteration_var = iteration_var;
3355   loop_initial_value = initial_value;
3356   loop_increment = increment;
3357   loop_final_value = final_value;
3358
3359   if (increment == 0)
3360     {
3361       if (loop_dump_stream)
3362         fprintf (loop_dump_stream,
3363                  "Loop unrolling: Increment value can't be calculated.\n");
3364       return 0;
3365     }
3366   else if (GET_CODE (increment) != CONST_INT)
3367     {
3368       if (loop_dump_stream)
3369         fprintf (loop_dump_stream,
3370                  "Loop unrolling: Increment value not constant.\n");
3371       return 0;
3372     }
3373   else if (GET_CODE (initial_value) != CONST_INT)
3374     {
3375       if (loop_dump_stream)
3376         fprintf (loop_dump_stream,
3377                  "Loop unrolling: Initial value not constant.\n");
3378       return 0;
3379     }
3380   else if (final_value == 0)
3381     {
3382       if (loop_dump_stream)
3383         fprintf (loop_dump_stream,
3384                  "Loop unrolling: EQ comparison loop.\n");
3385       return 0;
3386     }
3387   else if (GET_CODE (final_value) != CONST_INT)
3388     {
3389       if (loop_dump_stream)
3390         fprintf (loop_dump_stream,
3391                  "Loop unrolling: Final value not constant.\n");
3392       return 0;
3393     }
3394
3395   /* ?? Final value and initial value do not have to be constants.
3396      Only their difference has to be constant.  When the iteration variable
3397      is an array address, the final value and initial value might both
3398      be addresses with the same base but different constant offsets.
3399      Final value must be invariant for this to work.
3400
3401      To do this, need some way to find the values of registers which are
3402      invariant.  */
3403
3404   /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1.  */
3405   if (unsigned_compare)
3406     final_larger
3407       = ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3408          > (unsigned HOST_WIDE_INT) INTVAL (initial_value))
3409         - ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3410            < (unsigned HOST_WIDE_INT) INTVAL (initial_value));
3411   else
3412     final_larger = (INTVAL (final_value) > INTVAL (initial_value))
3413       - (INTVAL (final_value) < INTVAL (initial_value));
3414
3415   if (INTVAL (increment) > 0)
3416     increment_dir = 1;
3417   else if (INTVAL (increment) == 0)
3418     increment_dir = 0;
3419   else
3420     increment_dir = -1;
3421
3422   /* There are 27 different cases: compare_dir = -1, 0, 1;
3423      final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
3424      There are 4 normal cases, 4 reverse cases (where the iteration variable
3425      will overflow before the loop exits), 4 infinite loop cases, and 15
3426      immediate exit (0 or 1 iteration depending on loop type) cases.
3427      Only try to optimize the normal cases.  */
3428
3429   /* (compare_dir/final_larger/increment_dir)
3430      Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
3431      Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
3432      Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
3433      Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
3434
3435   /* ?? If the meaning of reverse loops (where the iteration variable
3436      will overflow before the loop exits) is undefined, then could
3437      eliminate all of these special checks, and just always assume
3438      the loops are normal/immediate/infinite.  Note that this means
3439      the sign of increment_dir does not have to be known.  Also,
3440      since it does not really hurt if immediate exit loops or infinite loops
3441      are optimized, then that case could be ignored also, and hence all
3442      loops can be optimized.
3443
3444      According to ANSI Spec, the reverse loop case result is undefined,
3445      because the action on overflow is undefined.
3446
3447      See also the special test for NE loops below.  */
3448
3449   if (final_larger == increment_dir && final_larger != 0
3450       && (final_larger == compare_dir || compare_dir == 0))
3451     /* Normal case.  */
3452     ;
3453   else
3454     {
3455       if (loop_dump_stream)
3456         fprintf (loop_dump_stream,
3457                  "Loop unrolling: Not normal loop.\n");
3458       return 0;
3459     }
3460
3461   /* Calculate the number of iterations, final_value is only an approximation,
3462      so correct for that.  Note that tempu and loop_n_iterations are
3463      unsigned, because they can be as large as 2^n - 1.  */
3464
3465   i = INTVAL (increment);
3466   if (i > 0)
3467     tempu = INTVAL (final_value) - INTVAL (initial_value);
3468   else if (i < 0)
3469     {
3470       tempu = INTVAL (initial_value) - INTVAL (final_value);
3471       i = -i;
3472     }
3473   else
3474     abort ();
3475
3476   /* For NE tests, make sure that the iteration variable won't miss the
3477      final value.  If tempu mod i is not zero, then the iteration variable
3478      will overflow before the loop exits, and we can not calculate the
3479      number of iterations.  */
3480   if (compare_dir == 0 && (tempu % i) != 0)
3481     return 0;
3482
3483   return tempu / i + ((tempu % i) != 0);
3484 }
3485
3486 /* Replace uses of split bivs with their split pseudo register.  This is
3487    for original instructions which remain after loop unrolling without
3488    copying.  */
3489
3490 static rtx
3491 remap_split_bivs (x)
3492      rtx x;
3493 {
3494   register enum rtx_code code;
3495   register int i;
3496   register char *fmt;
3497
3498   if (x == 0)
3499     return x;
3500
3501   code = GET_CODE (x);
3502   switch (code)
3503     {
3504     case SCRATCH:
3505     case PC:
3506     case CC0:
3507     case CONST_INT:
3508     case CONST_DOUBLE:
3509     case CONST:
3510     case SYMBOL_REF:
3511     case LABEL_REF:
3512       return x;
3513
3514     case REG:
3515 #if 0
3516       /* If non-reduced/final-value givs were split, then this would also
3517          have to remap those givs also.  */
3518 #endif
3519       if (REGNO (x) < max_reg_before_loop
3520           && reg_iv_type[REGNO (x)] == BASIC_INDUCT)
3521         return reg_biv_class[REGNO (x)]->biv->src_reg;
3522     }
3523
3524   fmt = GET_RTX_FORMAT (code);
3525   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3526     {
3527       if (fmt[i] == 'e')
3528         XEXP (x, i) = remap_split_bivs (XEXP (x, i));
3529       if (fmt[i] == 'E')
3530         {
3531           register int j;
3532           for (j = 0; j < XVECLEN (x, i); j++)
3533             XVECEXP (x, i, j) = remap_split_bivs (XVECEXP (x, i, j));
3534         }
3535     }
3536   return x;
3537 }
3538
3539 /* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g.
3540    FIST_UID is always executed if LAST_UID is), then return 1.  Otherwise
3541    return 0.  COPY_START is where we can start looking for the insns
3542    FIRST_UID and LAST_UID.  COPY_END is where we stop looking for these
3543    insns.
3544
3545    If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID
3546    must dominate LAST_UID.
3547
3548    If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
3549    may not dominate LAST_UID.
3550
3551    If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
3552    must dominate LAST_UID.  */
3553
3554 int
3555 set_dominates_use (regno, first_uid, last_uid, copy_start, copy_end)
3556      int regno;
3557      int first_uid;
3558      int last_uid;
3559      rtx copy_start;
3560      rtx copy_end;
3561 {
3562   int passed_jump = 0;
3563   rtx p = NEXT_INSN (copy_start);
3564
3565   while (INSN_UID (p) != first_uid)
3566     {
3567       if (GET_CODE (p) == JUMP_INSN)
3568         passed_jump= 1;
3569       /* Could not find FIRST_UID.  */
3570       if (p == copy_end)
3571         return 0;
3572       p = NEXT_INSN (p);
3573     }
3574
3575   /* Verify that FIRST_UID is an insn that entirely sets REGNO.  */
3576   if (GET_RTX_CLASS (GET_CODE (p)) != 'i'
3577       || ! dead_or_set_regno_p (p, regno))
3578     return 0;
3579
3580   /* FIRST_UID is always executed.  */
3581   if (passed_jump == 0)
3582     return 1;
3583
3584   while (INSN_UID (p) != last_uid)
3585     {
3586       /* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we
3587          can not be sure that FIRST_UID dominates LAST_UID.  */
3588       if (GET_CODE (p) == CODE_LABEL)
3589         return 0;
3590       p = NEXT_INSN (p);
3591     }
3592
3593   /* FIRST_UID is always executed if LAST_UID is executed.  */
3594   return 1;
3595 }