Initial revision

author Richard Kenner <kenner@gcc.gnu.org>

Sat, 8 Feb 1992 03:27:38 +0000 (22:27 -0500)

committer Richard Kenner <kenner@gcc.gnu.org>

Sat, 8 Feb 1992 03:27:38 +0000 (22:27 -0500)
author Richard Kenner <kenner@gcc.gnu.org>
Sat, 8 Feb 1992 03:27:38 +0000 (22:27 -0500)
committer Richard Kenner <kenner@gcc.gnu.org>
Sat, 8 Feb 1992 03:27:38 +0000 (22:27 -0500)
diff --git a/gcc/unroll.c b/gcc/unroll.c

new file mode 100644 (file)

index 0000000..249099a
--- /dev/null
+++ b/gcc/unroll.c
@@ -0,0 +1,3156 @@
+/* Try to unroll loops, and split induction variables.
+   Copyright (C) 1992 Free Software Foundation, Inc.
+   Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* Try to unroll a loop, and split induction variables.
+
+   Loops for which the number of iterations can be calculated exactly are
+   handled specially.  If the number of iterations times the insn_count is
+   less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
+   Otherwise, we try to unroll the loop a number of times modulo the number
+   of iterations, so that only one exit test will be needed.  It is unrolled
+   a number of times approximately equal to MAX_UNROLLED_INSNS divided by
+   the insn count.
+
+   Otherwise, if the number of iterations can be calculated exactly at
+   run time, and the loop is always entered at the top, then we try to
+   precondition the loop.  That is, at run time, calculate how many times
+   the loop will execute, and then execute the loop body a few times so
+   that the remaining iterations will be some multiple of 4 (or 2 if the
+   loop is large).  Then fall through to a loop unrolled 4 (or 2) times,
+   with only one exit test needed at the end of the loop.
+
+   Otherwise, if the number of iterations can not be calculated exactly,
+   not even at run time, then we still unroll the loop a number of times
+   approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
+   but there must be an exit test after each copy of the loop body.
+
+   For each induction variable, which is dead outside the loop (replaceable)
+   or for which we can easily calculate the final value, if we can easily
+   calculate its value at each place where it is set as a function of the
+   current loop unroll count and the variable's value at loop entry, then
+   the induction variable is split into `N' different variables, one for
+   each copy of the loop body.  One variable is live across the backward
+   branch, and the others are all calculated as a function of this variable.
+   This helps eliminate data dependencies, and leads to further opportunities
+   for cse.  */
+
+/* Possible improvements follow:  */
+
+/* ??? Add an extra pass somewhere to determine whether unrolling will
+   give any benefit.  E.g. after generating all unrolled insns, compute the
+   cost of all insns and compare against cost of insns in rolled loop.
+
+   - On traditional architectures, unrolling a non-constant bound loop
+     is a win if there is a giv whose only use is in memory addresses, the
+     memory addresses can be split, and hence giv incremenets can be
+     eliminated.
+   - It is also a win if the loop is executed many times, and preconditioning
+     can be performed for the loop.
+   Add code to check for these and similar cases.  */
+
+/* ??? Improve control of which loops get unrolled.  Could use profiling
+   info to only unroll the most commonly executed loops.  Perhaps have
+   a user specifyable option to control the amount of code expansion,
+   or the percent of loops to consider for unrolling.  Etc.  */
+
+/* ??? Look at the register copies inside the loop to see if they form a
+   simple permutation.  If so, iterate the permutation until it gets back to
+   the start state.  This is how many times we should unroll the loop, for
+   best results, because then all register copies can be eliminated.
+   For example, the lisp nreverse function should be unrolled 3 times
+   while (this)
+     {
+       next = this->cdr;
+       this->cdr = prev;
+       prev = this;
+       this = next;
+     }
+
+   ??? The number of times to unroll the loop may also be based on data
+   references in the loop.  For example, if we have a loop that references
+   x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times.  */
+
+/* ??? Add some simple linear equation solving capability so that we can
+   determine the number of loop iterations for more complex loops.
+   For example, consider this loop from gdb
+   #define SWAP_TARGET_AND_HOST(buffer,len)
+     {
+       char tmp;
+       char *p = (char *) buffer;
+       char *q = ((char *) buffer) + len - 1;
+       int iterations = (len + 1) >> 1;
+       int i;
+       for (p; p < q; p++, q--;)
+         {
+           tmp = *q;
+           *q = *p;
+           *p = tmp;
+         }
+     }
+   Note that:
+     start value = p = &buffer + current_iteration
+     end value   = q = &buffer + len - 1 - current_iteration
+   Given the loop exit test of "p < q", then there must be "q - p" iterations,
+   set equal to zero and solve for number of iterations:
+     q - p = len - 1 - 2*current_iteration = 0
+     current_iteration = (len - 1) / 2
+   Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
+   iterations of this loop.  */
+
+/* ??? Currently, no labels are marked as loop invariant when doing loop
+   unrolling.  This is because an insn inside the loop, that loads the address
+   of a label inside the loop into a register, could be moved outside the loop
+   by the invariant code motion pass if labels were invariant.  If the loop
+   is subsequently unrolled, the code will be wrong because each unrolled
+   body of the loop will use the same address, whereas each actually needs a
+   different address.  A case where this happens is when a loop containing
+   a switch statement is unrolled.
+
+   It would be better to let labels be considered invariant.  When we
+   unroll loops here, check to see if any insns using a label local to the
+   loop were moved before the loop.  If so, then correct the problem, by
+   moving the insn back into the loop, or perhaps replicate the insn before
+   the loop, one copy for each time the loop is unrolled.  */
+
+/* The prime factors looked for when trying to unroll a loop by some
+   number which is modulo the total number of iterations.  Just checking
+   for these 4 prime factors will find at least one factor for 75% of
+   all numbers theoretically.  Practically speaking, this will succeed
+   almost all of the time since loops are generally a multiple of 2
+   and/or 5.  */
+
+#define NUM_FACTORS 4
+
+struct _factor { int factor, count; } factors[NUM_FACTORS]
+  = { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
+      
+/* Describes the different types of loop unrolling performed.  */
+
+enum unroll_types { UNROLL_COMPLETELY, UNROLL_MODULO, UNROLL_NAIVE };
+
+#include "config.h"
+#include "rtl.h"
+#include "insn-config.h"
+#include "integrate.h"
+#include "regs.h"
+#include "flags.h"
+#include "expr.h"
+#include <stdio.h>
+#include "loop.h"
+
+/* This controls which loops are unrolled, and by how much we unroll
+   them.  */
+
+#ifndef MAX_UNROLLED_INSNS
+#define MAX_UNROLLED_INSNS 100
+#endif
+
+/* Indexed by register number, if non-zero, then it contains a pointer
+   to a struct induction for a DEST_REG giv which has been combined with
+   one of more address givs.  This is needed because whenever such a DEST_REG
+   giv is modified, we must modify the value of all split address givs
+   that were combined with this DEST_REG giv.  */
+
+static struct induction **addr_combined_regs;
+
+/* Indexed by register number, if this is a splittable induction variable,
+   then this will hold the current value of the register, which depends on the
+   iteration number.  */
+
+static rtx *splittable_regs;
+
+/* Indexed by register number, if this is a splittable induction variable,
+   then this will hold the number of instructions in the loop that modify
+   the induction variable.  Used to ensure that only the last insn modifying
+   a split iv will update the original iv of the dest.  */
+
+static int *splittable_regs_updates;
+
+/* Values describing the current loop's iteration variable.  These are set up
+   by loop_iterations, and used by precondition_loop_p.  */
+
+static rtx loop_iteration_var;
+static rtx loop_initial_value;
+static rtx loop_increment;
+static rtx loop_final_value;
+
+/* Forward declarations.  */
+
+static void init_reg_map ();
+static int precondition_loop_p ();
+static void copy_loop_body ();
+static void iteration_info ();
+static rtx approx_final_value ();
+static int find_splittable_regs ();
+static int find_splittable_givs ();
+static rtx fold_rtx_mult_add ();
+
+/* Try to unroll one loop and split induction variables in the loop.
+
+   The loop is described by the arguments LOOP_END, INSN_COUNT, and
+   LOOP_START.  END_INSERT_BEDFORE indicates where insns should be added
+   which need to be executed when the loop falls through.  STRENGTH_REDUCTION_P
+   indicates whether information generated in the strength reduction pass
+   is available.
+
+   This function is intended to be called from within `strength_reduce'
+   in loop.c.  */
+
+void
+unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
+            strength_reduce_p)
+     rtx loop_end;
+     int insn_count;
+     rtx loop_start;
+     rtx end_insert_before;
+     int strength_reduce_p;
+{
+  int i, j, temp;
+  int unroll_number = 1;
+  rtx copy_start, copy_end;
+  rtx insn, copy, sequence, pattern, tem;
+  int max_labelno, max_insnno;
+  rtx insert_before;
+  struct inline_remap *map;
+  char *local_label;
+  int maxregnum;
+  int new_maxregnum;
+  rtx exit_label = 0;
+  rtx start_label;
+  struct iv_class *bl;
+  struct induction *v;
+  int splitting_not_safe = 0;
+  enum unroll_types unroll_type;
+  int loop_preconditioned = 0;
+  rtx safety_label;
+  /* This points to the last real insn in the loop, which should be either
+     a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
+     jumps).  */
+  rtx last_loop_insn;
+
+  /* Don't bother unrolling huge loops.  Since the minimum factor is
+     two, loops greater than one half of MAX_UNROLLED_INSNS will never
+     be unrolled.  */
+  if (insn_count > MAX_UNROLLED_INSNS / 2)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
+      return;
+    }
+
+  /* When emitting debugger info, we can't unroll loops with unequal numbers
+     of block_beg and block_end notes, because that would unbalance the block
+     structure of the function.  This can happen as a result of the
+     "if (foo) bar; else break;" optimization in jump.c.  */
+
+  if (write_symbols != NO_DEBUG)
+    {
+      int block_begins = 0;
+      int block_ends = 0;
+
+      for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn))
+       {
+         if (GET_CODE (insn) == NOTE)
+           {
+             if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
+               block_begins++;
+             else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
+               block_ends++;
+           }
+       }
+
+      if (block_begins != block_ends)
+       {
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Unrolling failure: Unbalanced block notes.\n");
+         return;
+       }
+    }
+
+  /* Determine type of unroll to perform.  Depends on the number of iterations
+     and the size of the loop.  */
+
+  /* If there is no strength reduce info, then set loop_n_iterations to zero.
+     This can happen if strength_reduce can't find any bivs in the loop.
+     A value of zero indicates that the number of iterations could not be
+     calculated.  */
+
+  if (! strength_reduce_p)
+    loop_n_iterations = 0;
+
+  if (loop_dump_stream && loop_n_iterations > 0)
+    fprintf (loop_dump_stream,
+            "Loop unrolling: %d iterations.\n", loop_n_iterations);
+
+  /* Find and save a pointer to the last nonnote insn in the loop.  */
+
+  last_loop_insn = prev_nonnote_insn (loop_end);
+
+  /* Calculate how many times to unroll the loop.  Indicate whether or
+     not the loop is being completely unrolled.  */
+
+  if (loop_n_iterations == 1)
+    {
+      /* If number of iterations is exactly 1, then eliminate the compare and
+        branch at the end of the loop since they will never be taken.
+        Then return, since no other action is needed here.  */
+
+      /* If the last instruction is not a BARRIER or a JUMP_INSN, then
+        don't do anything.  */
+
+      if (GET_CODE (last_loop_insn) == BARRIER)
+       {
+         /* Delete the jump insn.  This will delete the barrier also.  */
+         delete_insn (PREV_INSN (last_loop_insn));
+       }
+      else if (GET_CODE (last_loop_insn) == JUMP_INSN)
+       {
+#ifdef HAVE_cc0
+         /* The immediately preceeding insn is a compare which must be
+            deleted.  */
+         delete_insn (last_loop_insn);
+         delete_insn (PREV_INSN (last_loop_insn));
+#else
+         /* The immediately preceeding insn may not be the compare, so don't
+            delete it.  */
+         delete_insn (last_loop_insn);
+#endif
+       }
+      return;
+    }
+  else if (loop_n_iterations > 0
+      && loop_n_iterations * insn_count < MAX_UNROLLED_INSNS)
+    {
+      unroll_number = loop_n_iterations;
+      unroll_type = UNROLL_COMPLETELY;
+    }
+  else if (loop_n_iterations > 0)
+    {
+      /* Try to factor the number of iterations.  Don't bother with the
+        general case, only using 2, 3, 5, and 7 will get 75% of all
+        numbers theoretically, and almost all in practice.  */
+
+      for (i = 0; i < NUM_FACTORS; i++)
+       factors[i].count = 0;
+
+      temp = loop_n_iterations;
+      for (i = NUM_FACTORS - 1; i >= 0; i--)
+       while (temp % factors[i].factor == 0)
+         {
+           factors[i].count++;
+           temp = temp / factors[i].factor;
+         }
+
+      /* Start with the larger factors first so that we generally
+        get lots of unrolling.  */
+
+      unroll_number = 1;
+      temp = insn_count;
+      for (i = 3; i >= 0; i--)
+       while (factors[i].count--)
+         {
+           if (temp * factors[i].factor < MAX_UNROLLED_INSNS)
+             {
+               unroll_number *= factors[i].factor;
+               temp *= factors[i].factor;
+             }
+           else
+             break;
+         }
+
+      /* If we couldn't find any factors, then unroll as in the normal
+        case.  */
+      if (unroll_number == 1)
+       {
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Loop unrolling: No factors found.\n");
+       }
+      else
+       unroll_type = UNROLL_MODULO;
+    }
+
+
+  /* Default case, calculate number of times to unroll loop based on its
+     size.  */
+  if (unroll_number == 1)
+    {
+      if (8 * insn_count < MAX_UNROLLED_INSNS)
+       unroll_number = 8;
+      else if (4 * insn_count < MAX_UNROLLED_INSNS)
+       unroll_number = 4;
+      else
+       unroll_number = 2;
+
+      unroll_type = UNROLL_NAIVE;
+    }
+
+  /* Now we know how many times to unroll the loop.  */
+
+  if (loop_dump_stream)
+    fprintf (loop_dump_stream,
+            "Unrolling loop %d times.\n", unroll_number);
+
+
+  if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO)
+    {
+      /* Loops of these types should never start with a jump down to
+        the exit condition test.  For now, check for this case just to
+        be sure.  UNROLL_NAIVE loops can be of this form, this case is
+        handled below.  */
+      insn = loop_start;
+      while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
+       insn = NEXT_INSN (insn);
+      if (GET_CODE (insn) == JUMP_INSN)
+       abort ();
+    }
+
+  if (unroll_type == UNROLL_COMPLETELY)
+    {
+      /* Completely unrolling the loop:  Delete the compare and branch at
+        the end (the last two instructions).   This delete must done at the
+        very end of loop unrolling, to avoid problems with calls to
+        back_branch_in_range_p, which is called by find_splittable_regs.
+        All increments of splittable bivs/givs are changed to load constant
+        instructions.  */
+
+      copy_start = loop_start;
+
+      /* Set insert_before to the instruction immediately after the JUMP_INSN
+        (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
+        the loop will be correctly handled by copy_loop_body.  */
+      insert_before = NEXT_INSN (last_loop_insn);
+
+      /* Set copy_end to the insn before the jump at the end of the loop.  */
+      if (GET_CODE (last_loop_insn) == BARRIER)
+       copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
+      else if (GET_CODE (last_loop_insn) == JUMP_INSN)
+       {
+#ifdef HAVE_cc0
+         /* The instruction immediately before the JUMP_INSN is a compare
+            instruction which we do not want to copy.  */
+         copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
+#else
+         /* The instruction immediately before the JUMP_INSN may not be the
+            compare, so we must copy it.  */
+         copy_end = PREV_INSN (last_loop_insn);
+#endif
+       }
+      else
+       {
+         /* We currently can't unroll a loop if it doesn't end with a
+            JUMP_INSN.  There would need to be a mechanism that recognizes
+            this case, and then inserts a jump after each loop body, which
+            jumps to after the last loop body.  */
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Unrolling failure: loop does not end with a JUMP_INSN.\n");
+         return;
+       }
+    }
+  else if (unroll_type == UNROLL_MODULO)
+    {
+      /* Partially unrolling the loop:  The compare and branch at the end
+        (the last two instructions) must remain.  Don't copy the compare
+        and branch instructions at the end of the loop.  Insert the unrolled
+        code immediately before the compare/branch at the end so that the
+        code will fall through to them as before.  */
+
+      copy_start = loop_start;
+
+      /* Set insert_before to the jump insn at the end of the loop.
+        Set copy_end to before the jump insn at the end of the loop.  */
+      if (GET_CODE (last_loop_insn) == BARRIER)
+       {
+         insert_before = PREV_INSN (last_loop_insn);
+         copy_end = PREV_INSN (insert_before);
+       }
+      else if (GET_CODE (last_loop_insn) == JUMP_INSN)
+       {
+#ifdef HAVE_cc0
+         /* The instruction immediately before the JUMP_INSN is a compare
+            instruction which we do not want to copy or delete.  */
+         insert_before = PREV_INSN (last_loop_insn);
+         copy_end = PREV_INSN (insert_before);
+#else
+         /* The instruction immediately before the JUMP_INSN may not be the
+            compare, so we must copy it.  */
+         insert_before = last_loop_insn;
+         copy_end = PREV_INSN (last_loop_insn);
+#endif
+       }
+      else
+       {
+         /* We currently can't unroll a loop if it doesn't end with a
+            JUMP_INSN.  There would need to be a mechanism that recognizes
+            this case, and then inserts a jump after each loop body, which
+            jumps to after the last loop body.  */
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Unrolling failure: loop does not end with a JUMP_INSN.\n");
+         return;
+       }
+    }
+  else
+    {
+      /* Normal case: Must copy the compare and branch instructions at the
+        end of the loop.  */
+
+      if (GET_CODE (last_loop_insn) == BARRIER)
+       {
+         /* Loop ends with an unconditional jump and a barrier.
+            Handle this like above, don't copy jump and barrier.
+            This is not strictly necessary, but doing so prevents generating
+            unconditional jumps to an immediately following label.
+
+            This will be corrected below if the target of this jump is
+            not the start_label.  */
+
+         insert_before = PREV_INSN (last_loop_insn);
+         copy_end = PREV_INSN (insert_before);
+       }
+      else if (GET_CODE (last_loop_insn) == JUMP_INSN)
+       {
+         /* Set insert_before to immediately after the JUMP_INSN, so that
+            NOTEs at the end of the loop will be correctly handled by
+            copy_loop_body.  */
+         insert_before = NEXT_INSN (last_loop_insn);
+         copy_end = last_loop_insn;
+       }
+      else
+       {
+         /* We currently can't unroll a loop if it doesn't end with a
+            JUMP_INSN.  There would need to be a mechanism that recognizes
+            this case, and then inserts a jump after each loop body, which
+            jumps to after the last loop body.  */
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Unrolling failure: loop does not end with a JUMP_INSN.\n");
+         return;
+       }
+
+      /* If copying exit test branches because they can not be eliminated,
+        then must convert the fall through case of the branch to a jump past
+        the end of the loop.  Create a label to emit after the loop and save
+        it for later use.  Do not use the label after the loop, if any, since
+        it might be used by insns outside the loop, or there might be insns
+        added before it later by final_[bg]iv_value which must be after
+        the real exit label.  */
+      exit_label = gen_label_rtx ();
+
+      insn = loop_start;
+      while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
+       insn = NEXT_INSN (insn);
+
+      if (GET_CODE (insn) == JUMP_INSN)
+       {
+         /* The loop starts with a jump down to the exit condition test.
+            Start copying the loop after the barrier following this
+            jump insn.  */
+         copy_start = NEXT_INSN (insn);
+
+         /* Splitting induction variables doesn't work when the loop is
+            entered via a jump to the bottom, because then we end up doing
+            a comparison against a new register for a split variable, but
+            we did not execute the set insn for the new register because
+            it was skipped over.  */
+         splitting_not_safe = 1;
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Splitting not safe, because loop not entered at top.\n");
+       }
+      else
+       copy_start = loop_start;
+    }
+
+  /* This should always be the first label in the loop.  */
+  start_label = NEXT_INSN (copy_start);
+  /* There may be a line number note and/or a loop continue note here.  */
+  while (GET_CODE (start_label) == NOTE)
+    start_label = NEXT_INSN (start_label);
+  if (GET_CODE (start_label) != CODE_LABEL)
+    {
+      /* This can happen as a result of jump threading.  If the first insns in
+        the loop test the same condition as the loop's backward jump, or the
+        opposite condition, then the backward jump will be modified to point
+        to elsewhere, and the loop's start label is deleted.
+
+        This case currently can not be handled by the loop unrolling code.  */
+
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Unrolling failure: unknown insns between BEG note and loop label.\n");
+      return;
+    }
+
+  if (unroll_type == UNROLL_NAIVE
+      && GET_CODE (last_loop_insn) == BARRIER
+      && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
+    {
+      /* In this case, we must copy the jump and barrier, because they will
+        not be converted to jumps to an immediately following label.  */
+
+      insert_before = NEXT_INSN (last_loop_insn);
+      copy_end = last_loop_insn;
+    }
+
+  /* Allocate a translation table for the labels and insn numbers.
+     They will be filled in as we copy the insns in the loop.  */
+
+  max_labelno = max_label_num ();
+  max_insnno = get_max_uid ();
+
+  map = (struct inline_remap *) alloca (sizeof (struct inline_remap));
+
+  /* Allocate the label map.  */
+
+  if (max_labelno > 0)
+    {
+      map->label_map = (rtx *) alloca (max_labelno * sizeof (rtx));
+
+      local_label = (char *) alloca (max_labelno);
+      bzero (local_label, max_labelno);
+    }
+  else
+    map->label_map = 0;
+
+  /* Search the loop and mark all local labels, i.e. the ones which have to
+     be distinct labels when copied.  For all labels which might be
+     non-local, set their label_map entries to point to themselves.
+     If they happen to be local their label_map entries will be overwritten
+     before the loop body is copied.  The label_map entries for local labels
+     will be set to a different value each time the loop body is copied.  */
+
+  for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == CODE_LABEL)
+       local_label[CODE_LABEL_NUMBER (insn)] = 1;
+      else if (GET_CODE (insn) == JUMP_INSN)
+       {
+         if (JUMP_LABEL (insn))
+           map->label_map[CODE_LABEL_NUMBER (JUMP_LABEL (insn))]
+             = JUMP_LABEL (insn);
+         else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
+                  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
+           {
+             rtx pat = PATTERN (insn);
+             int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
+             int len = XVECLEN (pat, diff_vec_p);
+             rtx label;
+
+             for (i = 0; i < len; i++)
+               {
+                 label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
+                 map->label_map[CODE_LABEL_NUMBER (label)] = label;
+               }
+           }
+       }
+    }
+
+  /* Allocate space for the insn map.  */
+
+  map->insn_map = (rtx *) alloca (max_insnno * sizeof (rtx));
+
+  /* Set this to zero, to indicate that we are doing loop unrolling,
+     not function inlining.  */
+  map->inline_target = 0;
+
+  /* The register and constant maps depend on the number of registers
+     present, so the final maps can't be created until after
+     find_splittable_regs is called.  However, they are needed for
+     preconditioning, so we create temporary maps when preconditioning
+     is performed.  */
+
+  /* The preconditioning code may allocate two new pseudo registers.  */
+  maxregnum = max_reg_num ();
+
+  /* Allocate and zero out the splittable_regs and addr_combined_regs
+     arrays.  These must be zeroed here because they will be used if
+     loop preconditioning is performed, and must be zero for that case.
+
+     It is safe to do this here, since the extra registers created by the
+     preconditioning code and find_splittable_regs will never be used
+     to accees the splittable_regs[] and addr_combined_regs[] arrays.  */
+
+  splittable_regs = (rtx *) alloca (maxregnum * sizeof (rtx));
+  bzero (splittable_regs, maxregnum * sizeof (rtx));
+  splittable_regs_updates = (int *) alloca (maxregnum * sizeof (int));
+  bzero (splittable_regs_updates, maxregnum * sizeof (int));
+  addr_combined_regs
+    = (struct induction **) alloca (maxregnum * sizeof (struct induction *));
+  bzero (addr_combined_regs, maxregnum * sizeof (struct induction *));
+
+  /* If this loop requires exit tests when unrolled, check to see if we
+     can precondition the loop so as to make the exit tests unnecessary.
+     Just like variable splitting, this is not safe if the loop is entered
+     via a jump to the bottom.  Also, can not do this if no strength
+     reduce info, because precondition_loop_p uses this info.  */
+
+  /* Must copy the loop body for preconditioning before the following
+     find_splittable_regs call since that will emit insns which need to
+     be after the preconditioned loop copies, but immediately before the
+     unrolled loop copies.  */
+
+  /* Also, it is not safe to split induction variables for the preconditioned
+     copies of the loop body.  If we split induction variables, then the code
+     assumes that each induction variable can be represented as a function
+     of its initial value and the loop iteration number.  This is not true
+     in this case, because the last preconditioned copy of the loop body
+     could be any iteration from the first up to the `unroll_number-1'th,
+     depending on the initial value of the iteration variable.  Therefore
+     we can not split induction variables here, because we can not calculate
+     their value.  Hence, this code must occur before find_splittable_regs
+     is called.  */
+
+  if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
+    {
+      rtx initial_value, final_value, increment;
+
+      if (precondition_loop_p (&initial_value, &final_value, &increment,
+                              loop_start, loop_end))
+       {
+         register rtx diff, temp;
+         enum machine_mode mode;
+         rtx *labels;
+         int abs_inc, neg_inc;
+
+         map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
+
+         map->const_equiv_map = (rtx *) alloca (maxregnum * sizeof (rtx));
+         map->const_age_map = (unsigned *) alloca (maxregnum
+                                                   * sizeof (unsigned));
+         map->const_equiv_map_size = maxregnum;
+         global_const_equiv_map = map->const_equiv_map;
+
+         init_reg_map (map, maxregnum);
+
+         /* Limit loop unrolling to 4, since this will make 7 copies of
+            the loop body.  */
+         if (unroll_number > 4)
+           unroll_number = 4;
+
+         /* Save the absolute value of the increment, and also whether or
+            not it is negative.  */
+         neg_inc = 0;
+         abs_inc = INTVAL (increment);
+         if (abs_inc < 0)
+           {
+             abs_inc = - abs_inc;
+             neg_inc = 1;
+           }
+
+         start_sequence ();
+
+         /* Decide what mode to do these calculations in.  Choose the larger
+            of final_value's mode and initial_value's mode, or a full-word if
+            both are constants.  */
+         mode = GET_MODE (final_value);
+         if (mode == VOIDmode)
+           {
+             mode = GET_MODE (initial_value);
+             if (mode == VOIDmode)
+               mode = word_mode;
+           }
+         else if (mode != GET_MODE (initial_value)
+                  && (GET_MODE_SIZE (mode)
+                      < GET_MODE_SIZE (GET_MODE (initial_value))))
+           mode = GET_MODE (initial_value);
+
+         /* Calculate the difference between the final and initial values.
+            Final value may be a (plus (reg x) (const_int 1)) rtx.
+            Let the following cse pass simplify this if initial value is
+            a constant. 
+
+            We must copy the final and initial values here to avoid
+            improperly shared rtl.  */
+
+         diff = expand_binop (mode, sub_optab, copy_rtx (final_value),
+                              copy_rtx (initial_value), 0, 0,
+                              OPTAB_LIB_WIDEN);
+
+         /* Now calculate (diff % (unroll * abs (increment))) by using an
+            and instruction.  */
+         diff = expand_binop (GET_MODE (diff), and_optab, diff,
+                              gen_rtx (CONST_INT, VOIDmode,
+                                       unroll_number * abs_inc - 1),
+                              0, 0, OPTAB_LIB_WIDEN);
+
+         /* Now emit a sequence of branches to jump to the proper precond
+            loop entry point.  */
+
+         labels = (rtx *) alloca (sizeof (rtx) * unroll_number);
+         for (i = 0; i < unroll_number; i++)
+           labels[i] = gen_label_rtx ();
+
+         /* Assuming the unroll_number is 4, and the increment is 2, then
+            for a negative increment:  for a positive increment:
+            diff = 0,1   precond 0     diff = 0,7   precond 0
+            diff = 2,3   precond 3     diff = 1,2   precond 1
+            diff = 4,5   precond 2     diff = 3,4   precond 2
+            diff = 6,7   precond 1     diff = 5,6   precond 3  */
+
+         /* We only need to emit (unroll_number - 1) branches here, the
+            last case just falls through to the following code.  */
+
+         /* ??? This would give better code if we emitted a tree of branches
+            instead of the current linear list of branches.  */
+
+         for (i = 0; i < unroll_number - 1; i++)
+           {
+             int cmp_const;
+
+             /* For negative increments, must invert the constant compared
+                against, except when comparing against zero.  */
+             if (i == 0)
+               cmp_const = 0;
+             else if (neg_inc)
+               cmp_const = unroll_number - i;
+             else
+               cmp_const = i;
+
+             emit_cmp_insn (diff, gen_rtx (CONST_INT, VOIDmode,
+                                           abs_inc * cmp_const),
+                            EQ, 0, mode, 0, 0);
+
+             if (i == 0)
+               emit_jump_insn (gen_beq (labels[i]));
+             else if (neg_inc)
+               emit_jump_insn (gen_bge (labels[i]));
+             else
+               emit_jump_insn (gen_ble (labels[i]));
+             JUMP_LABEL (get_last_insn ()) = labels[i];
+             LABEL_NUSES (labels[i])++;
+           }
+
+         /* If the increment is greater than one, then we need another branch,
+            to handle other cases equivalent to 0.  */
+
+         /* ??? This should be merged into the code above somehow to help
+            simplify the code here, and reduce the number of branches emitted.
+            For the negative increment case, the branch here could easily
+            be merged with the `0' case branch above.  For the positive
+            increment case, it is not clear how this can be simplified.  */
+            
+         if (abs_inc != 1)
+           {
+             int cmp_const;
+
+             if (neg_inc)
+               cmp_const = abs_inc - 1;
+             else
+               cmp_const = abs_inc * (unroll_number - 1) + 1;
+
+             emit_cmp_insn (diff, gen_rtx (CONST_INT, VOIDmode, cmp_const),
+                            EQ, 0, mode, 0, 0);
+
+             if (neg_inc)
+               emit_jump_insn (gen_ble (labels[0]));
+             else
+               emit_jump_insn (gen_bge (labels[0]));
+             JUMP_LABEL (get_last_insn ()) = labels[0];
+             LABEL_NUSES (labels[0])++;
+           }
+
+         sequence = gen_sequence ();
+         end_sequence ();
+         emit_insn_before (sequence, loop_start);
+         
+         /* Only the last copy of the loop body here needs the exit
+            test, so set copy_end to exclude the compare/branch here,
+            and then reset it inside the loop when get to the last
+            copy.  */
+
+         if (GET_CODE (last_loop_insn) == BARRIER)
+           copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
+         else if (GET_CODE (last_loop_insn) == JUMP_INSN)
+           {
+#ifdef HAVE_cc0
+             /* The immediately preceeding insn is a compare which we do not
+                want to copy.  */
+             copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
+#else
+             /* The immediately preceeding insn may not be a compare, so we
+                must copy it.  */
+             copy_end = PREV_INSN (last_loop_insn);
+#endif
+           }
+         else
+           abort ();
+
+         for (i = 1; i < unroll_number; i++)
+           {
+             emit_label_after (labels[unroll_number - i],
+                               PREV_INSN (loop_start));
+
+             bzero (map->insn_map, max_insnno * sizeof (rtx));
+             bzero (map->const_equiv_map, maxregnum * sizeof (rtx));
+             bzero (map->const_age_map, maxregnum * sizeof (unsigned));
+             map->const_age = 0;
+
+             for (j = 0; j < max_labelno; j++)
+               if (local_label[j])
+                 map->label_map[j] = gen_label_rtx ();
+
+             /* The last copy needs the compare/branch insns at the end,
+                so reset copy_end here if the loop ends with a conditional
+                branch.  */
+
+             if (i == unroll_number - 1)
+               {
+                 if (GET_CODE (last_loop_insn) == BARRIER)
+                   copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
+                 else
+                   copy_end = last_loop_insn;
+               }
+
+             /* None of the copies are the `last_iteration', so just
+                pass zero for that parameter.  */
+             copy_loop_body (copy_start, copy_end, map, exit_label, 0,
+                             unroll_type, start_label, loop_end,
+                             loop_start, copy_end);
+           }
+         emit_label_after (labels[0], PREV_INSN (loop_start));
+
+         if (GET_CODE (last_loop_insn) == BARRIER)
+           {
+             insert_before = PREV_INSN (last_loop_insn);
+             copy_end = PREV_INSN (insert_before);
+           }
+         else
+           {
+#ifdef HAVE_cc0
+             /* The immediately preceeding insn is a compare which we do not
+                want to copy.  */
+             insert_before = PREV_INSN (last_loop_insn);
+             copy_end = PREV_INSN (insert_before);
+#else
+             /* The immediately preceeding insn may not be a compare, so we
+                must copy it.  */
+             insert_before = last_loop_insn;
+             copy_end = PREV_INSN (last_loop_insn);
+#endif
+           }
+
+         /* Set unroll type to MODULO now.  */
+         unroll_type = UNROLL_MODULO;
+         loop_preconditioned = 1;
+       }
+    }
+
+  /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
+     the loop unless all loops are being unrolled.  */
+  if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n");
+      return;
+    }
+
+  /* At this point, we are guaranteed to unroll the loop.  */
+
+  /* For each biv and giv, determine whether it can be safely split into
+     a different variable for each unrolled copy of the loop body.
+     We precalculate and save this info here, since computing it is
+     expensive.
+
+     Do this before deleting any instructions from the loop, so that
+     back_branch_in_range_p will work correctly.  */
+
+  if (splitting_not_safe)
+    temp = 0;
+  else
+    temp = find_splittable_regs (unroll_type, loop_start, loop_end,
+                               end_insert_before, unroll_number);
+
+  /* find_splittable_regs may have created some new registers, so must
+     reallocate the reg_map with the new larger size, and must realloc
+     the constant maps also.  */
+
+  maxregnum = max_reg_num ();
+  map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx));
+
+  init_reg_map (map, maxregnum);
+
+  /* Space is needed in some of the map for new registers, so new_maxregnum
+     is an (over)estimate of how many registers will exist at the end.  */
+  new_maxregnum = maxregnum + (temp * unroll_number * 2);
+
+  /* Must realloc space for the constant maps, because the number of registers
+     may have changed.  */
+
+  map->const_equiv_map = (rtx *) alloca (new_maxregnum * sizeof (rtx));
+  map->const_age_map = (unsigned *) alloca (new_maxregnum * sizeof (unsigned));
+
+  global_const_equiv_map = map->const_equiv_map;
+
+  /* Search the list of bivs and givs to find ones which need to be remapped
+     when split, and set their reg_map entry appropriately.  */
+
+  for (bl = loop_iv_list; bl; bl = bl->next)
+    {
+      if (REGNO (bl->biv->src_reg) != bl->regno)
+       map->reg_map[bl->regno] = bl->biv->src_reg;
+#if 0
+      /* Currently, non-reduced/final-value givs are never split.  */
+      for (v = bl->giv; v; v = v->next_iv)
+       if (REGNO (v->src_reg) != bl->regno)
+         map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
+#endif
+    }
+
+  /* If the loop is being partially unrolled, and the iteration variables
+     are being split, and are being renamed for the split, then must fix up
+     the compare instruction at the end of the loop to refer to the new
+     registers.  This compare isn't copied, so the registers used in it
+     will never be replaced if it isn't done here.  */
+
+  if (unroll_type == UNROLL_MODULO)
+    {
+      insn = NEXT_INSN (copy_end);
+      if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SET)
+       {
+#if 0
+         /* If non-reduced/final-value givs were split, then this would also
+            have to remap those givs.  */
+#endif
+
+         tem = SET_SRC (PATTERN (insn));
+         /* The set source is a register.  */
+         if (GET_CODE (tem) == REG)
+           {
+             if (REGNO (tem) < max_reg_before_loop
+                 && reg_iv_type[REGNO (tem)] == BASIC_INDUCT)
+               SET_SRC (PATTERN (insn))
+                 = reg_biv_class[REGNO (tem)]->biv->src_reg;
+           }
+         else
+           {
+             /* The set source is a compare of some sort.  */
+             tem = XEXP (SET_SRC (PATTERN (insn)), 0);
+             if (GET_CODE (tem) == REG
+                 && REGNO (tem) < max_reg_before_loop
+                 && reg_iv_type[REGNO (tem)] == BASIC_INDUCT)
+               XEXP (SET_SRC (PATTERN (insn)), 0)
+                 = reg_biv_class[REGNO (tem)]->biv->src_reg;
+             
+             tem = XEXP (SET_SRC (PATTERN (insn)), 1);
+             if (GET_CODE (tem) == REG
+                 && REGNO (tem) < max_reg_before_loop
+                 && reg_iv_type[REGNO (tem)] == BASIC_INDUCT)
+               XEXP (SET_SRC (PATTERN (insn)), 1)
+                 = reg_biv_class[REGNO (tem)]->biv->src_reg;
+           }
+       }
+    }
+
+  /* For unroll_number - 1 times, make a copy of each instruction
+     between copy_start and copy_end, and insert these new instructions
+     before the end of the loop.  */
+
+  for (i = 0; i < unroll_number; i++)
+    {
+      bzero (map->insn_map, max_insnno * sizeof (rtx));
+      bzero (map->const_equiv_map, new_maxregnum * sizeof (rtx));
+      bzero (map->const_age_map, new_maxregnum * sizeof (unsigned));
+      map->const_age = 0;
+
+      for (j = 0; j < max_labelno; j++)
+       if (local_label[j])
+         map->label_map[j] = gen_label_rtx ();
+
+      /* If loop starts with a branch to the test, then fix it so that
+        it points to the test of the first unrolled copy of the loop.  */
+      if (i == 0 && loop_start != copy_start)
+       {
+         insn = PREV_INSN (copy_start);
+         pattern = PATTERN (insn);
+         
+         tem = map->label_map[CODE_LABEL_NUMBER
+                              (XEXP (SET_SRC (pattern), 0))];
+         SET_SRC (pattern) = gen_rtx (LABEL_REF, VOIDmode, tem);
+
+         /* Set the jump label so that it can be used by later loop unrolling
+            passes.  */
+         JUMP_LABEL (insn) = tem;
+         LABEL_NUSES (tem)++;
+       }
+
+      copy_loop_body (copy_start, copy_end, map, exit_label,
+                     i == unroll_number - 1, unroll_type, start_label,
+                     loop_end, insert_before, insert_before);
+    }
+
+  /* Before deleting any insns, emit a CODE_LABEL immediately after the last
+     insn to be deleted.  This prevents any runaway delete_insn call from
+     more insns that it should, as it always stops at a CODE_LABEL.  */
+
+  /* Delete the compare and branch at the end of the loop if completely
+     unrolling the loop.  Deleting the backward branch at the end also
+     deletes the code label at the start of the loop.  This is done at
+     the very end to avoid problems with back_branch_in_range_p.  */
+
+  if (unroll_type == UNROLL_COMPLETELY)
+    safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
+  else
+    safety_label = emit_label_after (gen_label_rtx (), copy_end);
+
+  /* Delete all of the original loop instructions.  Don't delete the 
+     LOOP_BEG note, or the first code label in the loop.  */
+
+  insn = NEXT_INSN (copy_start);
+  while (insn != safety_label)
+    {
+      if (insn != start_label)
+       insn = delete_insn (insn);
+      else
+       insn = NEXT_INSN (insn);
+    }
+
+  /* Can now delete the 'safety' label emitted to protect us from runaway
+     delete_insn calls.  */
+  if (INSN_DELETED_P (safety_label))
+    abort ();
+  delete_insn (safety_label);
+
+  /* If exit_label exists, emit it after the loop.  Doing the emit here
+     forces it to have a higher INSN_UID than any insn in the unrolled loop.
+     This is needed so that mostly_true_jump in reorg.c will treat jumps
+     to this loop end label correctly, i.e. predict that they are usually
+     not taken.  */
+  if (exit_label)
+    emit_label_after (exit_label, loop_end);
+
+  /* If debugging, we must replicate the tree nodes corresponsing to the blocks
+     inside the loop, so that the original one to one mapping will remain.  */
+
+  if (write_symbols != NO_DEBUG)
+    {
+      int copies = unroll_number;
+
+      if (loop_preconditioned)
+       copies += unroll_number - 1;
+
+      unroll_block_trees (uid_loop_num[INSN_UID (loop_start)], copies);
+    }
+}
+\f
+/* Return true if the loop can be safely, and profitably, preconditioned
+   so that the unrolled copies of the loop body don't need exit tests.
+
+   This only works if final_value, initial_value and increment can be
+   determined, and if increment is a constant power of 2.
+   If increment is not a power of 2, then the preconditioning modulo
+   operation would require a real modulo instead of a boolean AND, and this
+   is not considered `profitable'.  */
+
+/* ??? If the loop is known to be executed very many times, or the machine
+   has a very cheap divide instruction, then preconditioning is a win even
+   when the increment is not a power of 2.  Use RTX_COST to compute
+   whether divide is cheap.  */
+
+static int
+precondition_loop_p (initial_value, final_value, increment, loop_start,
+                    loop_end)
+     rtx *initial_value, *final_value, *increment;
+     rtx loop_start, loop_end;
+{
+  int unsigned_compare, compare_dir;
+
+  if (loop_n_iterations > 0)
+    {
+      *initial_value = const0_rtx;
+      *increment = const1_rtx;
+      *final_value = gen_rtx (CONST_INT, VOIDmode, loop_n_iterations);
+
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Preconditioning: Success, number of iterations known, %d.\n",
+                loop_n_iterations);
+      return 1;
+    }
+
+  if (loop_initial_value == 0)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Preconditioning: Could not find initial value.\n");
+      return 0;
+    }
+  else if (loop_increment == 0)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Preconditioning: Could not find increment value.\n");
+      return 0;
+    }
+  else if (GET_CODE (loop_increment) != CONST_INT)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Preconditioning: Increment not a constant.\n");
+      return 0;
+    }
+  else if ((exact_log2 (INTVAL (loop_increment)) < 0)
+          && (exact_log2 (- INTVAL (loop_increment)) < 0))
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Preconditioning: Increment not a constant power of 2.\n");
+      return 0;
+    }
+
+  /* Unsigned_compare and compare_dir can be ignored here, since they do
+     not matter for preconditioning.  */
+
+  if (loop_final_value == 0)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Preconditioning: EQ comparison loop.\n");
+      return 0;
+    }
+
+  /* Must ensure that final_value is invariant, so call invariant_p to
+     check.  Before doing so, must check regno against max_reg_before_loop
+     to make sure that the register is in the range convered by invariant_p.
+     If it isn't, then it is most likely a biv/giv which by definition are
+     not invariant.  */
+  if ((GET_CODE (loop_final_value) == REG
+       && REGNO (loop_final_value) >= max_reg_before_loop)
+      || (GET_CODE (loop_final_value) == PLUS
+         && REGNO (XEXP (loop_final_value, 0)) >= max_reg_before_loop)
+      || ! invariant_p (loop_final_value))
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Preconditioning: Final value not invariant.\n");
+      return 0;
+    }
+
+  /* Fail for floating point values, since the caller of this function
+     does not have code to deal with them.  */
+  if (GET_MODE_CLASS (GET_MODE (loop_final_value)) == MODE_FLOAT
+      || GET_MODE_CLASS (GET_MODE (loop_initial_value) == MODE_FLOAT))
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Preconditioning: Floating point final or initial value.\n");
+      return 0;
+    }
+
+  /* Now set initial_value to be the iteration_var, since that may be a
+     simpler expression, and is guaranteed to be correct if all of the
+     above tests succeed.
+
+     We can not use the initial_value as calculated, because it will be
+     one too small for loops of the form "while (i-- > 0)".  We can not
+     emit code before the loop_skip_over insns to fix this problem as this
+     will then give a number one too large for loops of the form
+     "while (--i > 0)".
+
+     Note that all loops that reach here are entered at the top, because
+     this function is not called if the loop starts with a jump.  */
+
+  /* Fail if loop_iteration_var is not live before loop_start, since we need
+     to test its value in the preconditioning code.  */
+
+  if (uid_luid[regno_first_uid[REGNO (loop_iteration_var)]]
+      > INSN_LUID (loop_start))
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Preconditioning: Iteration var not live before loop start.\n");
+      return 0;
+    }
+
+  *initial_value = loop_iteration_var;
+  *increment = loop_increment;
+  *final_value = loop_final_value;
+
+  /* Success! */
+  if (loop_dump_stream)
+    fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
+  return 1;
+}
+
+
+/* All pseudo-registers must be mapped to themselves.  Two hard registers
+   must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
+   REGNUM, to avoid function-inlining specific conversions of these
+   registers.  All other hard regs can not be mapped because they may be
+   used with different
+   modes.  */
+
+static void
+init_reg_map (map, maxregnum)
+     struct inline_remap *map;
+     int maxregnum;
+{
+  int i;
+
+  for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
+    map->reg_map[i] = regno_reg_rtx[i];
+  /* Just clear the rest of the entries.  */
+  for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
+    map->reg_map[i] = 0;
+
+  map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
+    = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
+  map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
+    = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
+}
+\f
+/* Strength-reduction will often emit code for optimized biv/givs which
+   calculates their value in a temporary register, and then copies the result
+   to the iv.  This procedure reconstructs the pattern computing the iv;
+   verifying that all operands are of the proper form.
+
+   The return value is the amount that the giv is incremented by.  */
+
+static rtx
+calculate_giv_inc (pattern, src_insn, regno)
+     rtx pattern, src_insn;
+     int regno;
+{
+  rtx increment;
+
+  /* Verify that we have an increment insn here.  First check for a plus
+     as the set source.  */
+  if (GET_CODE (SET_SRC (pattern)) != PLUS)
+    {
+      /* SR sometimes computes the new giv value in a temp, then copies it
+        to the new_reg.  */
+      src_insn = PREV_INSN (src_insn);
+      pattern = PATTERN (src_insn);
+      if (GET_CODE (SET_SRC (pattern)) != PLUS)
+       abort ();
+                 
+      /* The last insn emitted is not needed, so delete it to avoid confusing
+        the second cse pass.  This insn sets the giv unnecessarily.  */
+      delete_insn (get_last_insn ());
+    }
+
+  /* Verify that we have a constant as the second operand of the plus.  */
+  increment = XEXP (SET_SRC (pattern), 1);
+  if (GET_CODE (increment) != CONST_INT)
+    {
+      /* SR sometimes puts the constant in a register, especially if it is
+        too big to be an add immed operand.  */
+      increment = SET_SRC (PATTERN (PREV_INSN (src_insn)));
+
+      /* SR may have used LO_SUM to compute the constant if it is too large
+        for a load immed operand.  In this case, the constant is in operand
+        one of the LO_SUM rtx.  */
+      if (GET_CODE (increment) == LO_SUM)
+       increment = XEXP (increment, 1);
+
+      if (GET_CODE (increment) != CONST_INT)
+       abort ();
+                 
+      /* The insn loading the constant into a register is not longer needed,
+        so delete it.  */
+      delete_insn (get_last_insn ());
+    }
+
+  /* Check that the source register is the same as the dest register.  */
+  if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
+      || REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
+    abort ();
+
+  return increment;
+}
+
+
+/* Copy each instruction in the loop, substituting from map as appropriate.
+   This is very similar to a loop in expand_inline_function.  */
+  
+static void
+copy_loop_body (copy_start, copy_end, map, exit_label, last_iteration,
+               unroll_type, start_label, loop_end, insert_before,
+               copy_notes_from)
+     rtx copy_start, copy_end;
+     struct inline_remap *map;
+     int last_iteration;
+     enum unroll_types unroll_type;
+     rtx start_label, loop_end, insert_before, copy_notes_from;
+{
+  rtx insn, pattern;
+  rtx tem, copy;
+  int dest_reg_was_split, i;
+  rtx cc0_insn = 0;
+  rtx final_label = 0;
+  rtx giv_inc, giv_dest_reg, giv_src_reg;
+
+  /* If this isn't the last iteration, then map any references to the
+     start_label to final_label.  Final label will then be emitted immediately
+     after the end of this loop body if it was ever used.
+
+     If this is the last iteration, then map references to the start_label
+     to itself.  */
+  if (! last_iteration)
+    {
+      final_label = gen_label_rtx ();
+      map->label_map[CODE_LABEL_NUMBER (start_label)] = final_label;
+    }
+  else
+    map->label_map[CODE_LABEL_NUMBER (start_label)] = start_label;
+
+  start_sequence ();
+  
+  insn = copy_start;
+  do
+    {
+      insn = NEXT_INSN (insn);
+      
+      map->orig_asm_operands_vector = 0;
+      
+      switch (GET_CODE (insn))
+       {
+       case INSN:
+         pattern = PATTERN (insn);
+         copy = 0;
+         giv_inc = 0;
+         
+         /* Check to see if this is a giv that has been combined with
+            some split address givs.  (Combined in the sense that 
+            `combine_givs' in loop.c has put two givs in the same register.)
+            In this case, we must search all givs based on the same biv to
+            find the address givs.  Then split the address givs.
+            Do this before splitting the giv, since that may map the
+            SET_DEST to a new register.  */
+         
+         if (GET_CODE (pattern) == SET
+             && GET_CODE (SET_DEST (pattern)) == REG
+             && addr_combined_regs[REGNO (SET_DEST (pattern))])
+           {
+             struct iv_class *bl;
+             struct induction *v, *tv;
+             int regno = REGNO (SET_DEST (pattern));
+             
+             v = addr_combined_regs[REGNO (SET_DEST (pattern))];
+             bl = reg_biv_class[REGNO (v->src_reg)];
+             
+             /* Although the giv_inc amount is not needed here, we must call
+                calculate_giv_inc here since it might try to delete the
+                last insn emitted.  If we wait until later to call it,
+                we might accidentally delete insns generated immediately
+                below by emit_unrolled_add.  */
+
+             giv_inc = calculate_giv_inc (pattern, insn, regno);
+
+             /* Now find all address giv's that were combined with this
+                giv 'v'.  */
+             for (tv = bl->giv; tv; tv = tv->next_iv)
+               if (tv->giv_type == DEST_ADDR && tv->same == v)
+                 {
+                   tv->dest_reg = plus_constant (tv->dest_reg,
+                                                 INTVAL (giv_inc));
+                   *tv->location = tv->dest_reg;
+                   
+                   if (last_iteration && unroll_type != UNROLL_COMPLETELY)
+                     {
+                       /* Must emit an insn to increment the split address
+                          giv.  Add in the const_adjust field in case there
+                          was a constant eliminated from the address.  */
+                       rtx value, dest_reg;
+                       
+                       /* tv->dest_reg will be either a bare register,
+                          or else a register plus a constant.  */
+                       if (GET_CODE (tv->dest_reg) == REG)
+                         dest_reg = tv->dest_reg;
+                       else
+                         dest_reg = XEXP (tv->dest_reg, 0);
+                       
+                       /* tv->dest_reg may actually be a (PLUS (REG) (CONST))
+                          here, so we must call plus_constant to add
+                          the const_adjust amount before calling
+                          emit_unrolled_add below.  */
+                       value = plus_constant (tv->dest_reg, tv->const_adjust);
+
+                       /* The constant could be too large for an add
+                          immediate, so can't directly emit an insn here.  */
+                       emit_unrolled_add (dest_reg, XEXP (value, 0),
+                                          XEXP (value, 1));
+                       
+                       /* Reset the giv to be just the register again, in case
+                          it is used after the set we have just emitted.  */
+                       tv->dest_reg = dest_reg;
+                       *tv->location = tv->dest_reg;
+                     }
+                 }
+           }
+         
+         /* If this is a setting of a splittable variable, then determine
+            how to split the variable, create a new set based on this split,
+            and set up the reg_map so that later uses of the variable will
+            use the new split variable.  */
+         
+         dest_reg_was_split = 0;
+         
+         if (GET_CODE (pattern) == SET
+             && GET_CODE (SET_DEST (pattern)) == REG
+             && splittable_regs[REGNO (SET_DEST (pattern))])
+           {
+             int regno = REGNO (SET_DEST (pattern));
+             
+             dest_reg_was_split = 1;
+             
+             /* Compute the increment value for the giv, if it wasn't
+                already computed above.  */
+
+             if (giv_inc == 0)
+               giv_inc = calculate_giv_inc (pattern, insn, regno);
+             giv_dest_reg = SET_DEST (pattern);
+             giv_src_reg = SET_DEST (pattern);
+
+             if (unroll_type == UNROLL_COMPLETELY)
+               {
+                 /* Completely unrolling the loop.  Set the induction
+                    variable to a known constant value.  */
+                 
+                 /* The value in splittable_regs may be an invariant
+                    value, so we must use plus_constant here.  */
+                 splittable_regs[regno]
+                   = plus_constant (splittable_regs[regno], INTVAL (giv_inc));
+
+                 if (GET_CODE (splittable_regs[regno]) == PLUS)
+                   {
+                     giv_src_reg = XEXP (splittable_regs[regno], 0);
+                     giv_inc = XEXP (splittable_regs[regno], 1);
+                   }
+                 else
+                   {
+                     /* The splittable_regs value must be a REG or a
+                        CONST_INT, so put the entire value in the giv_src_reg
+                        variable.  */
+                     giv_src_reg = splittable_regs[regno];
+                     giv_inc = const0_rtx;
+                   }
+               }
+             else
+               {
+                 /* Partially unrolling loop.  Create a new pseudo
+                    register for the iteration variable, and set it to
+                    be a constant plus the original register.  Except
+                    on the last iteration, when the result has to
+                    go back into the original iteration var register.  */
+                 
+                 /* Handle bivs which must be mapped to a new register
+                    when split.  This happens for bivs which need their
+                    final value set before loop entry.  The new register
+                    for the biv was stored in the biv's first struct
+                    induction entry by find_splittable_regs.  */
+
+                 if (regno < max_reg_before_loop
+                     && reg_iv_type[regno] == BASIC_INDUCT)
+                   {
+                     giv_src_reg = reg_biv_class[regno]->biv->src_reg;
+                     giv_dest_reg = giv_src_reg;
+                   }
+                 
+#if 0
+                 /* If non-reduced/final-value givs were split, then
+                    this would have to remap those givs also.  See
+                    find_splittable_regs.  */
+#endif
+                 
+                 splittable_regs[regno]
+                   = gen_rtx (CONST_INT, VOIDmode,
+                              INTVAL (giv_inc)
+                              + INTVAL (splittable_regs[regno]));
+                 giv_inc = splittable_regs[regno];
+                 
+                 /* Now split the induction variable by changing the dest
+                    of this insn to a new register, and setting its
+                    reg_map entry to point to this new register.
+
+                    If this is the last iteration, and this is the last insn
+                    that will update the iv, then reuse the original dest,
+                    to ensure that the iv will have the proper value when
+                    the loop exits or repeats.
+
+                    Using splittable_regs_updates here like this is safe,
+                    because it can only be greater than one if all
+                    instructions modifying the iv are always executed in
+                    order.  */
+
+                 if (! last_iteration
+                     || (splittable_regs_updates[regno]-- != 1))
+                   {
+                     tem = gen_reg_rtx (GET_MODE (giv_src_reg));
+                     giv_dest_reg = tem;
+                     map->reg_map[regno] = tem;
+                   }
+                 else
+                   map->reg_map[regno] = giv_src_reg;
+               }
+
+             /* The constant being added could be too large for an add
+                immediate, so can't directly emit an insn here.  */
+             emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
+             copy = get_last_insn ();
+             pattern = PATTERN (copy);
+           }
+         else
+           {
+             pattern = copy_rtx_and_substitute (pattern, map);
+             copy = emit_insn (pattern);
+           }
+         /* REG_NOTES will be copied later.  */
+         
+#ifdef HAVE_cc0
+         /* If this insn is setting CC0, it may need to look at
+            the insn that uses CC0 to see what type of insn it is.
+            In that case, the call to recog via validate_change will
+            fail.  So don't substitute constants here.  Instead,
+            do it when we emit the following insn.
+
+            For example, see the pyr.md file.  That machine has signed and
+            unsigned compares.  The compare patterns must check the
+            following branch insn to see which what kind of compare to
+            emit.
+
+            If the previous insn set CC0, substitute constants on it as
+            well.  */
+         if (sets_cc0_p (copy) != 0)
+           cc0_insn = copy;
+         else
+           {
+             if (cc0_insn)
+               try_constants (cc0_insn, map);
+             cc0_insn = 0;
+             try_constants (copy, map);
+           }
+#else
+         try_constants (copy, map);
+#endif
+
+         /* Make split induction variable constants `permanent' since we
+            know there are no backward branches across iteration variable
+            settings which would invalidate this.  */
+         if (dest_reg_was_split)
+           {
+             int regno = REGNO (SET_DEST (pattern));
+
+             if (map->const_age_map[regno] == map->const_age)
+               map->const_age_map[regno] = -1;
+           }
+         break;
+         
+       case JUMP_INSN:
+         if (JUMP_LABEL (insn) == start_label && insn == copy_end
+             && ! last_iteration)
+           {
+             /* This is a branch to the beginning of the loop; this is the
+                last insn being copied; and this is not the last iteration.
+                In this case, we want to change the original fall through
+                case to be a branch past the end of the loop, and the
+                original jump label case to fall_through.  */
+
+             int fall_through;
+
+             /* Never map the label in this case.  */
+             pattern = copy_rtx (PATTERN (insn));
+             
+             /* Assume a conditional branch, since the code above
+                does not let unconditional branches be copied.  */
+             if (! condjump_p (insn))
+               abort ();
+             fall_through
+               = (XEXP (SET_SRC (PATTERN (insn)), 2) == pc_rtx) + 1;
+
+             /* Set the fall through case to the exit label.  Must
+                create a new label_ref since they can't be shared.  */
+             XEXP (SET_SRC (pattern), fall_through)
+               = gen_rtx (LABEL_REF, VOIDmode, exit_label);
+                     
+             /* Set the original branch case to fall through.  */
+             XEXP (SET_SRC (pattern), 3 - fall_through)
+               = pc_rtx;
+           }
+         else
+           pattern = copy_rtx_and_substitute (PATTERN (insn), map);
+         
+         copy = emit_jump_insn (pattern);
+         
+#ifdef HAVE_cc0
+         if (cc0_insn)
+           try_constants (cc0_insn, map);
+         cc0_insn = 0;
+#endif
+         try_constants (copy, map);
+
+         /* Set the jump label of COPY correctly to avoid problems with
+            later passes of unroll_loop, if INSN had jump label set.  */
+         if (JUMP_LABEL (insn))
+           {
+             /* Can't use the label_map for every insn, since this may be
+                the backward branch, and hence the label was not mapped.  */
+             if (GET_CODE (pattern) == SET)
+               {
+                 tem = SET_SRC (pattern);
+                 if (GET_CODE (tem) == LABEL_REF)
+                   JUMP_LABEL (copy) = XEXP (tem, 0);
+                 else if (GET_CODE (tem) == IF_THEN_ELSE)
+                   {
+                     if (XEXP (tem, 1) != pc_rtx)
+                       JUMP_LABEL (copy) = XEXP (XEXP (tem, 1), 0);
+                     else
+                       JUMP_LABEL (copy) = XEXP (XEXP (tem, 2), 0);
+                   }
+                 else
+                   abort ();
+               }
+             else
+               {
+                 /* An unrecognizable jump insn, probably the entry jump
+                    for a switch statement.  This label must have been mapped,
+                    so just use the label_map to get the new jump label.  */
+                 JUMP_LABEL (copy) = map->label_map[CODE_LABEL_NUMBER
+                                                    (JUMP_LABEL (insn))];
+               }
+         
+             /* If this is a non-local jump, then must increase the label
+                use count so that the label will not be deleted when the
+                original jump is deleted.  */
+             LABEL_NUSES (JUMP_LABEL (copy))++;
+           }
+         else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
+                  || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
+           {
+             rtx pat = PATTERN (copy);
+             int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
+             int len = XVECLEN (pat, diff_vec_p);
+             int i;
+
+             for (i = 0; i < len; i++)
+               LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
+           }
+
+         /* If this used to be a conditional jump insn but whose branch
+            direction is now known, we must do something special.  */
+         if (condjump_p (insn) && !simplejump_p (insn) && map->last_pc_value)
+           {
+#ifdef HAVE_cc0
+             /* The previous insn set cc0 for us.  So delete it.  */
+             delete_insn (PREV_INSN (copy));
+#endif
+
+             /* If this is now a no-op, delete it.  */
+             if (map->last_pc_value == pc_rtx)
+               {
+                 delete_insn (copy);
+                 copy = 0;
+               }
+             else
+               /* Otherwise, this is unconditional jump so we must put a
+                  BARRIER after it.  We could do some dead code elimination
+                  here, but jump.c will do it just as well.  */
+               emit_barrier ();
+           }
+         break;
+         
+       case CALL_INSN:
+         pattern = copy_rtx_and_substitute (PATTERN (insn), map);
+         copy = emit_call_insn (pattern);
+
+#ifdef HAVE_cc0
+         if (cc0_insn)
+           try_constants (cc0_insn, map);
+         cc0_insn = 0;
+#endif
+         try_constants (copy, map);
+
+         /* Be lazy and assume CALL_INSNs clobber all hard registers.  */
+         for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+           map->const_equiv_map[i] = 0;
+         break;
+         
+       case CODE_LABEL:
+         /* If this is the loop start label, then we don't need to emit a
+            copy of this label since no one will use it.  */
+
+         if (insn != start_label)
+           {
+             copy = emit_label (map->label_map[CODE_LABEL_NUMBER (insn)]);
+             map->const_age++;
+           }
+         break;
+         
+       case BARRIER:
+         copy = emit_barrier ();
+         break;
+         
+       case NOTE:
+         if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED)
+           copy = emit_note (NOTE_SOURCE_FILE (insn),
+                             NOTE_LINE_NUMBER (insn));
+         else
+           copy = 0;
+         break;
+         
+       default:
+         abort ();
+         break;
+       }
+      
+      map->insn_map[INSN_UID (insn)] = copy;
+    }
+  while (insn != copy_end);
+  
+  /* Now copy the REG_NOTES.  */
+  insn = copy_start;
+  do
+    {
+      insn = NEXT_INSN (insn);
+      if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN
+          || GET_CODE (insn) == CALL_INSN)
+         && map->insn_map[INSN_UID (insn)])
+       REG_NOTES (map->insn_map[INSN_UID (insn)])
+         = copy_rtx_and_substitute (REG_NOTES (insn), map);
+    }
+  while (insn != copy_end);
+
+  /* There may be notes between copy_notes_from and loop_end.  Emit a copy of
+     each of these notes here, since there may be some important ones, such as
+     NOTE_INSN_BLOCK_END notes, in this group.  We don't do this on the last
+     iteration, because the original notes won't be deleted.
+
+     We can't use insert_before here, because when from preconditioning,
+     insert_before points before the loop.  We can't use copy_end, because
+     there may be insns already inserted after it (which we don't want to
+     copy) when not from preconditioning code.  */
+
+  if (! last_iteration)
+    {
+      for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
+       {
+         if (GET_CODE (insn) == NOTE
+             && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED)
+           emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
+       }
+    }
+
+  if (final_label && LABEL_NUSES (final_label) > 0)
+    emit_label (final_label);
+
+  tem = gen_sequence ();
+  end_sequence ();
+  emit_insn_before (tem, insert_before);
+}
+\f
+/* Emit an insn, using the expand_binop to ensure that a valid insn is
+   emitted.  This will correctly handle the case where the increment value
+   won't fit in the immediate field of a PLUS insns.  */
+
+void
+emit_unrolled_add (dest_reg, src_reg, increment)
+     rtx dest_reg, src_reg, increment;
+{
+  rtx result;
+
+  result = expand_binop (GET_MODE (dest_reg), add_optab, src_reg, increment,
+                        dest_reg, 0, OPTAB_LIB_WIDEN);
+
+  if (dest_reg != result)
+    emit_move_insn (dest_reg, result);
+}
+\f
+/* Searches the insns between INSN and LOOP_END.  Returns 1 if there
+   is a backward branch in that range that branches to somewhere between
+   LOOP_START and INSN.  Returns 0 otherwise.  */
+
+/* ??? This is quadratic algorithm.  Could be rewriten to be linear.
+   In practice, this is not a problem, because this function is seldom called,
+   and uses a negligible amount of CPU time on average.  */
+
+static int
+back_branch_in_range_p (insn, loop_start, loop_end)
+     rtx insn;
+     rtx loop_start, loop_end;
+{
+  rtx p, q, target_insn;
+
+  /* Stop before we get to the backward branch at the end of the loop.  */
+  loop_end = prev_nonnote_insn (loop_end);
+  if (GET_CODE (loop_end) == BARRIER)
+    loop_end = PREV_INSN (loop_end);
+
+  /* Check in case insn has been deleted, search forward for first non
+     deleted insn following it.  */
+  while (INSN_DELETED_P (insn))
+    insn = NEXT_INSN (insn);
+
+  /* Check for the case where insn is the last insn in the loop.  */
+  if (insn == loop_end)
+    return 0;
+
+  for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
+    {
+      if (GET_CODE (p) == JUMP_INSN)
+       {
+         target_insn = JUMP_LABEL (p);
+         
+         /* Search from loop_start to insn, to see if one of them is
+            the target_insn.  We can't use INSN_LUID comparisons here,
+            since insn may not have an LUID entry.  */
+         for (q = loop_start; q != insn; q = NEXT_INSN (q))
+           if (q == target_insn)
+             return 1;
+       }
+    }
+
+  return 0;
+}
+
+/* Try to generate the simplest rtx for the expression
+   (PLUS (MULT mult1 mult2) add1).  This is used to calculate the initial
+   value of giv's.  */
+
+static rtx
+fold_rtx_mult_add (mult1, mult2, add1, mode)
+     rtx mult1, mult2, add1;
+     enum machine_mode mode;
+{
+  rtx temp, mult_res;
+  rtx result;
+
+  /* The modes must all be the same.  This should always be true.  For now,
+     check to make sure.  */
+  if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
+      || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
+      || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
+    abort ();
+
+  /* Ensure that if at least one of mult1/mult2 are constant, then mult2
+     will be a constant.  */
+  if (GET_CODE (mult1) == CONST_INT)
+    {
+      temp = mult2;
+      mult2 = mult1;
+      mult1 = temp;
+    }
+
+  mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
+  if (! mult_res)
+    mult_res = gen_rtx (MULT, mode, mult1, mult2);
+
+  /* Again, put the constant second.  */
+  if (GET_CODE (add1) == CONST_INT)
+    {
+      temp = add1;
+      add1 = mult_res;
+      mult_res = temp;
+    }
+
+  result = simplify_binary_operation (PLUS, mode, add1, mult_res);
+  if (! result)
+    result = gen_rtx (PLUS, mode, add1, mult_res);
+
+  return result;
+}
+
+/* Searches the list of induction struct's for the biv BL, to try to calculate
+   the total increment value for one iteration of the loop as a constant.
+
+   Returns the increment value as an rtx, simplified as much as possible,
+   if it can be calculated.  Otherwise, returns 0.  */
+
+rtx 
+biv_total_increment (bl, loop_start, loop_end)
+     struct iv_class *bl;
+     rtx loop_start, loop_end;
+{
+  struct induction *v;
+  rtx result;
+
+  /* For increment, must check every instruction that sets it.  Each
+     instruction must be executed only once each time through the loop.
+     To verify this, we check that the the insn is always executed, and that
+     there are no backward branches after the insn that branch to before it.
+     Also, the insn must have a mult_val of one (to make sure it really is
+     an increment).  */
+
+  result = const0_rtx;
+  for (v = bl->biv; v; v = v->next_iv)
+    {
+      if (v->always_computable && v->mult_val == const1_rtx
+         && ! back_branch_in_range_p (v->insn, loop_start, loop_end))
+       result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
+      else
+       return 0;
+    }
+
+  return result;
+}
+
+/* Determine the initial value of the iteration variable, and the amount
+   that it is incremented each loop.  Use the tables constructed by
+   the strength reduction pass to calculate these values.
+
+   Initial_value and/or increment are set to zero if their values could not
+   be calculated.  */
+
+static void
+iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
+     rtx iteration_var, *initial_value, *increment;
+     rtx loop_start, loop_end;
+{
+  struct iv_class *bl;
+  struct induction *v, *b;
+
+  /* Clear the result values, in case no answer can be found.  */
+  *initial_value = 0;
+  *increment = 0;
+
+  /* The iteration variable can be either a giv or a biv.  Check to see
+     which it is, and compute the variable's initial value, and increment
+     value if possible.  */
+
+  /* If this is a new register, can't handle it since we don't have any
+     reg_iv_type entry for it.  */
+  if (REGNO (iteration_var) >= max_reg_before_loop)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: No reg_iv_type entry for iteration var.\n");
+      return;
+    }
+  /* Reject iteration variables larger than the host long size, since they
+     could result in a number of iterations greater than the range of our
+     `unsigned long' variable loop_n_iterations.  */
+  else if (GET_MODE_BITSIZE (GET_MODE (iteration_var)) > HOST_BITS_PER_LONG)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Iteration var rejected because mode larger than host long.\n");
+      return;
+    }
+  else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Iteration var not an interger.\n");
+      return;
+    }
+  else if (reg_iv_type[REGNO (iteration_var)] == BASIC_INDUCT)
+    {
+      /* Grab initial value, only useful if it is a constant.  */
+      bl = reg_biv_class[REGNO (iteration_var)];
+      *initial_value = bl->initial_value;
+
+      *increment = biv_total_increment (bl, loop_start, loop_end);
+    }
+  else if (reg_iv_type[REGNO (iteration_var)] == GENERAL_INDUCT)
+    {
+#if 1
+      /* ??? The code below does not work because the incorrect number of
+        iterations is calculated when the biv is incremented after the giv
+        is set (which is the usual case).  This can probably be accounted
+        for by biasing the initial_value by subtracting the amount of the
+        increment that occurs between the giv set and the giv test.  However,
+        a giv as an iterator is very rare, so it does not seem worthwhile
+        to handle this.  */
+      /* ??? An example failure is: i = 6; do {;} while (i++ < 9).  */
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Giv iterators are not handled.\n");
+      return;
+#else
+      /* Initial value is mult_val times the biv's initial value plus
+        add_val.  Only useful if it is a constant.  */
+      v = reg_iv_info[REGNO (iteration_var)];
+      bl = reg_biv_class[REGNO (v->src_reg)];
+      *initial_value = fold_rtx_mult_add (v->mult_val, bl->initial_value,
+                                         v->add_val, v->mode);
+      
+      /* Increment value is mult_val times the increment value of the biv.  */
+
+      *increment = biv_total_increment (bl, loop_start, loop_end);
+      if (*increment)
+       *increment = fold_rtx_mult_add (v->mult_val, *increment, const0_rtx,
+                                       v->mode);
+#endif
+    }
+  else
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Not basic or general induction var.\n");
+      return;
+    }
+}
+
+/* Calculate the approximate final value of the iteration variable
+   which has an loop exit test with code COMPARISON_CODE and comparison value
+   of COMPARISON_VALUE.  Also returns an indication of whether the comparison
+   was signed or unsigned, and the direction of the comparison.  This info is
+   needed to calculate the number of loop iterations.  */
+
+static rtx
+approx_final_value (comparison_code, comparison_value, unsigned_p, compare_dir)
+     enum rtx_code comparison_code;
+     rtx comparison_value;
+     int *unsigned_p;
+     int *compare_dir;
+{
+  /* Calculate the final value of the induction variable.
+     The exact final value depends on the branch operator, and increment sign.
+     This is only an approximate value.  It will be wrong if the iteration
+     variable is not incremented by one each time through the loop, and
+     approx final value - start value % increment != 0.  */
+
+  *unsigned_p = 0;
+  switch (comparison_code)
+    {
+    case LEU:
+      *unsigned_p = 1;
+    case LE:
+      *compare_dir = 1;
+      return plus_constant (comparison_value, 1);
+    case GEU:
+      *unsigned_p = 1;
+    case GE:
+      *compare_dir = -1;
+      return plus_constant (comparison_value, -1);
+    case EQ:
+      /* Can not calculate a final value for this case.  */
+      *compare_dir = 0;
+      return 0;
+    case LTU:
+      *unsigned_p = 1;
+    case LT:
+      *compare_dir = 1;
+      return comparison_value;
+      break;
+    case GTU:
+      *unsigned_p = 1;
+    case GT:
+      *compare_dir = -1;
+      return comparison_value;
+    case NE:
+      *compare_dir = 0;
+      return comparison_value;
+    default:
+      abort ();
+    }
+}
+
+/* For each biv and giv, determine whether it can be safely split into
+   a different variable for each unrolled copy of the loop body.  If it
+   is safe to split, then indicate that by saving some useful info
+   in the splittable_regs array.
+
+   If the loop is being completely unrolled, then splittable_regs will hold
+   the current value of the induction variable while the loop is unrolled.
+   It must be set to the initial value of the induction variable here.
+   Otherwise, splittable_regs will hold the difference between the current
+   value of the induction variable and the value the induction variable had
+   at the top of the loop.  It must be set to the value 0 here.  */
+
+/* ?? If the loop is only unrolled twice, then most of the restrictions to
+   constant values are unnecessary, since we can easily calculate increment
+   values in this case even if nothing is constant.  The increment value
+   should not involve a multiply however.  */
+
+/* ?? Even if the biv/giv increment values aren't constant, it may still
+   be beneficial to split the variable if the loop is only unrolled a few
+   times, since multiplies by small integers (1,2,3,4) are very cheap.  */
+
+static int
+find_splittable_regs (unroll_type, loop_start, loop_end, end_insert_before,
+                    unroll_number)
+     enum unroll_types unroll_type;
+     rtx loop_start, loop_end;
+     rtx end_insert_before;
+     int unroll_number;
+{
+  struct iv_class *bl;
+  rtx increment, tem;
+  rtx biv_final_value;
+  int biv_splittable;
+  int result = 0;
+
+  for (bl = loop_iv_list; bl; bl = bl->next)
+    {
+      /* Biv_total_increment must return a constant value,
+        otherwise we can not calculate the split values.  */
+
+      increment = biv_total_increment (bl, loop_start, loop_end);
+      if (! increment || GET_CODE (increment) != CONST_INT)
+       continue;
+
+      /* The loop must be unrolled completely, or else have a known number
+        of iterations and only one exit, or else the biv must be dead
+        outside the loop, or else the final value must be known.  Otherwise,
+        it is unsafe to split the biv since it may not have the proper
+        value on loop exit.  */
+
+      /* loop_number_exit_labels is non-zero if the loop has an exit other than
+        a fall through at the end.  */
+
+      biv_splittable = 1;
+      biv_final_value = 0;
+      if (unroll_type != UNROLL_COMPLETELY
+         && (loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]]
+             || unroll_type == UNROLL_NAIVE)
+         && (uid_luid[regno_last_uid[bl->regno]] >= INSN_LUID (loop_end)
+             || ! bl->init_insn
+             || INSN_UID (bl->init_insn) >= max_uid_for_loop
+             || (uid_luid[regno_first_uid[bl->regno]]
+                 < INSN_LUID (bl->init_insn))
+             || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
+         && ! (biv_final_value = final_biv_value (bl, loop_start, loop_end)))
+       biv_splittable = 0;
+
+      /* If final value is non-zero, then must emit an instruction which sets
+        the value of the biv to the proper value.  This is done after
+        handling all of the givs, since some of them may need to use the
+        biv's value in their initialization code.  */
+
+      /* This biv is splittable.  If completely unrolling the loop, save
+        the biv's initial value.  Otherwise, save the constant zero.  */
+
+      if (biv_splittable == 1)
+       {
+         if (unroll_type == UNROLL_COMPLETELY)
+           {
+             /* If the initial value of the biv is itself (i.e. it is too
+                complicated for strength_reduce to compute), or is a hard
+                register, then we must create a new psuedo reg to hold the
+                initial value of the biv.  */
+
+             if (GET_CODE (bl->initial_value) == REG
+                 && (REGNO (bl->initial_value) == bl->regno
+                     || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER))
+               {
+                 rtx tem = gen_reg_rtx (bl->biv->mode);
+                 
+                 emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
+                                   loop_start);
+
+                 if (loop_dump_stream)
+                   fprintf (loop_dump_stream, "Biv %d initial value remapped to %d.\n",
+                            bl->regno, REGNO (tem));
+
+                 splittable_regs[bl->regno] = tem;
+               }
+             else
+               splittable_regs[bl->regno] = bl->initial_value;
+           }
+         else
+           splittable_regs[bl->regno] = const0_rtx;
+
+         /* Save the number of instructions that modify the biv, so that
+            we can treat the last one specially.  */
+
+         splittable_regs_updates[bl->regno] = bl->biv_count;
+
+         result++;
+
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Biv %d safe to split.\n", bl->regno);
+       }
+
+      /* Check every giv that depends on this biv to see whether it is
+        splittable also.  Even if the biv isn't splittable, givs which
+        depend on it may be splittable if the biv is live outside the
+        loop, and the givs aren't.  */
+
+      result = find_splittable_givs (bl, unroll_type, loop_start, loop_end,
+                                    increment, unroll_number, result);
+
+      /* If final value is non-zero, then must emit an instruction which sets
+        the value of the biv to the proper value.  This is done after
+        handling all of the givs, since some of them may need to use the
+        biv's value in their initialization code.  */
+      if (biv_final_value)
+       {
+         /* If the loop has multiple exits, emit the insns before the
+            loop to ensure that it will always be executed no matter
+            how the loop exits.  Otherwise emit the insn after the loop,
+            since this is slightly more efficient.  */
+         if (! loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]])
+           emit_insn_before (gen_move_insn (bl->biv->src_reg,
+                                            biv_final_value),
+                             end_insert_before);
+         else
+           {
+             /* Create a new register to hold the value of the biv, and then
+                set the biv to its final value before the loop start.  The biv
+                is set to its final value before loop start to ensure that
+                this insn will always be executed, no matter how the loop
+                exits.  */
+             rtx tem = gen_reg_rtx (bl->biv->mode);
+             emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
+                               loop_start);
+             emit_insn_before (gen_move_insn (bl->biv->src_reg,
+                                              biv_final_value),
+                               loop_start);
+
+             if (loop_dump_stream)
+               fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
+                        REGNO (bl->biv->src_reg), REGNO (tem));
+
+             /* Set up the mapping from the original biv register to the new
+                register.  */
+             bl->biv->src_reg = tem;
+           }
+       }
+    }
+  return result;
+}
+
+/* For every giv based on the biv BL, check to determine whether it is
+   splittable.  This is a subroutine to find_splittable_regs ().  */
+
+static int
+find_splittable_givs (bl, unroll_type, loop_start, loop_end, increment,
+                     unroll_number, result)
+     struct iv_class *bl;
+     enum unroll_types unroll_type;
+     rtx loop_start, loop_end;
+     rtx increment;
+     int unroll_number, result;
+{
+  struct induction *v;
+  rtx final_value;
+  rtx tem;
+
+  for (v = bl->giv; v; v = v->next_iv)
+    {
+      rtx giv_inc, value;
+
+      /* Only split the giv if it has already been reduced, or if the loop is
+        being completely unrolled.  */
+      if (unroll_type != UNROLL_COMPLETELY && v->ignore)
+       continue;
+
+      /* The giv can be split if the insn that sets the giv is executed once
+        and only once on every iteration of the loop.  */
+      /* An address giv can always be split.  v->insn is just a use not a set,
+        and hence it does not matter whether it is always executed.  All that
+        matters is that all the biv increments are always executed, and we
+        won't reach here if they aren't.  */
+      if (v->giv_type != DEST_ADDR
+         && (! v->always_computable
+             || back_branch_in_range_p (v->insn, loop_start, loop_end)))
+       continue;
+      
+      /* The giv increment value must be a constant.  */
+      giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
+                                  v->mode);
+      if (! giv_inc || GET_CODE (giv_inc) != CONST_INT)
+       continue;
+
+      /* The loop must be unrolled completely, or else have a known number of
+        iterations and only one exit, or else the giv must be dead outside
+        the loop, or else the final value of the giv must be known.
+        Otherwise, it is not safe to split the giv since it may not have the
+        proper value on loop exit.  */
+         
+      /* The used outside loop test will fail for DEST_ADDR givs.  They are
+        never used outside the loop anyways, so it is always safe to split a
+        DEST_ADDR giv.  */
+
+      final_value = 0;
+      if (unroll_type != UNROLL_COMPLETELY
+         && (loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]]
+             || unroll_type == UNROLL_NAIVE)
+         && v->giv_type != DEST_ADDR
+         && ((regno_first_uid[REGNO (v->dest_reg)] != INSN_UID (v->insn)
+              /* Check for the case where the pseudo is set by a shift/add
+                 sequence, in which case the first insn setting the pseudo
+                 is the first insn of the shift/add sequence.  */
+              && (! (tem = find_reg_note (v->insn, REG_RETVAL, 0))
+                  || (regno_first_uid[REGNO (v->dest_reg)]
+                      != INSN_UID (XEXP (tem, 0)))))
+             /* Line above always fails if INSN was moved by loop opt.  */
+             || (uid_luid[regno_last_uid[REGNO (v->dest_reg)]]
+                 >= INSN_LUID (loop_end)))
+         && ! (final_value = v->final_value))
+       continue;
+
+#if 0
+      /* Currently, non-reduced/final-value givs are never split.  */
+      /* Should emit insns after the loop if possible, as the biv final value
+        code below does.  */
+
+      /* If the final value is non-zero, and the giv has not been reduced,
+        then must emit an instruction to set the final value.  */
+      if (final_value && !v->new_reg)
+       {
+         /* Create a new register to hold the value of the giv, and then set
+            the giv to its final value before the loop start.  The giv is set
+            to its final value before loop start to ensure that this insn
+            will always be executed, no matter how we exit.  */
+         tem = gen_reg_rtx (v->mode);
+         emit_insn_before (gen_move_insn (tem, v->dest_reg), loop_start);
+         emit_insn_before (gen_move_insn (v->dest_reg, final_value),
+                           loop_start);
+         
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
+                    REGNO (v->dest_reg), REGNO (tem));
+         
+         v->src_reg = tem;
+       }
+#endif
+
+      /* This giv is splittable.  If completely unrolling the loop, save the
+        giv's initial value.  Otherwise, save the constant zero for it.  */
+
+      if (unroll_type == UNROLL_COMPLETELY)
+       /* It is not safe to use bl->initial_value here, because it may not
+          be invariant.  It is safe to use the initial value stored in
+          the splittable_regs array.  */
+       value = fold_rtx_mult_add (v->mult_val, splittable_regs[bl->regno],
+                                  v->add_val, v->mode);
+      else
+       value = const0_rtx;
+
+      if (v->new_reg)
+       {
+         /* If the giv is an address destination, it could be something other
+            than a simple register, these have to be treated differently.  */
+         if (v->giv_type == DEST_REG)
+           splittable_regs[REGNO (v->new_reg)] = value;
+
+         /* If an addr giv was combined with another addr giv, then we
+            can only split this giv if the addr giv it was combined with
+            was reduced.  This is because the value of v->new_reg is
+            meaningless in this case.  (There is no problem if it was
+            combined with a dest_reg giv which wasn't reduced, v->new_reg
+            is still meaningful in this case.)  */
+
+         else if (v->same && v->same->giv_type == DEST_ADDR
+                 && ! v->same->new_reg) 
+           {
+             if (loop_dump_stream)
+               fprintf (loop_dump_stream,
+                        "DEST_ADDR giv not split, because combined with unreduced DEST_ADDR giv.\n");
+           }
+         else
+           {
+             /* Splitting address givs is useful since it will often allow us
+                to eliminate some increment insns for the base giv as
+                unnecessary.  */
+
+             /* If the addr giv is combined with a dest_reg giv, then all
+                references to that dest reg will be remapped, which is NOT
+                what we want for split addr regs. We always create a new
+                register for the split addr giv, just to be safe.  */
+
+             /* ??? If there are multiple address givs which have been
+                combined with the same dest_reg giv, then we may only need
+                one new register for them.  Pulling out constants below will
+                catch some of the common cases of this.  Currently, I leave
+                the work of simplifying multiple address givs to the
+                following cse pass.  */
+             
+             v->const_adjust = 0;
+             if (unroll_type != UNROLL_COMPLETELY)
+               {
+                 /* If not completely unrolling the loop, then create a new
+                    register to hold the split value of the DEST_ADDR giv.
+                    Emit insn to initialize its value before loop start.  */
+                 tem = gen_reg_rtx (v->mode);
+
+                 /* If the address giv has a constant in its new_reg value,
+                    then this constant can be pulled out and put in value,
+                    instead of being part of the initialization code.  */
+                 
+                 if (GET_CODE (v->new_reg) == PLUS
+                     && GET_CODE (XEXP (v->new_reg, 1)) == CONST_INT)
+                   {
+                     v->dest_reg
+                       = plus_constant (tem, INTVAL (XEXP (v->new_reg,1)));
+                     
+                     /* Only succeed if this will give valid addresses.
+                        Try to validate both the first and the last
+                        address resulting from loop unrolling, if
+                        one fails, then can't do const elim here.  */
+                     if (memory_address_p (v->mode, v->dest_reg)
+                         && memory_address_p (v->mode,
+                                      plus_constant (v->dest_reg,
+                                                     INTVAL (giv_inc)
+                                                     * (unroll_number - 1))))
+                       {
+                         /* Save the negative of the eliminated const, so
+                            that we can calculate the dest_reg's increment
+                            value later.  */
+                         v->const_adjust = - INTVAL (XEXP (v->new_reg, 1));
+
+                         v->new_reg = XEXP (v->new_reg, 0);
+                         if (loop_dump_stream)
+                           fprintf (loop_dump_stream,
+                                    "Eliminating constant from giv %d\n",
+                                    REGNO (tem));
+                       }
+                     else
+                       v->dest_reg = tem;
+                   }
+                 else
+                   v->dest_reg = tem;
+                 
+                 /* If the address hasn't been checked for validity yet, do so
+                    now, and fail completely if either the first or the last
+                    unrolled copy of the address is not a valid address.  */
+                 if (v->dest_reg == tem
+                     && (! memory_address_p (v->mode, v->dest_reg)
+                         || ! memory_address_p (v->mode,
+                                plus_constant (v->dest_reg,
+                                               INTVAL (giv_inc)
+                                               * (unroll_number -1)))))
+                   {
+                     if (loop_dump_stream)
+                       fprintf (loop_dump_stream,
+                                "Illegal address for giv at insn %d\n",
+                                INSN_UID (v->insn));
+                     continue;
+                   }
+                 
+                 /* To initialize the new register, just move the value of
+                    new_reg into it.  This is not guaranteed to give a valid
+                    instruction on machines with complex addressing modes.
+                    If we can't recognize it, then delete it and emit insns
+                    to calculate the value from scratch.  */
+                 emit_insn_before (gen_rtx (SET, VOIDmode, tem,
+                                            copy_rtx (v->new_reg)),
+                                   loop_start);
+                 if (! recog_memoized (PREV_INSN (loop_start)))
+                   {
+                     delete_insn (PREV_INSN (loop_start));
+                     emit_iv_add_mult (bl->initial_value, v->mult_val,
+                                       v->add_val, tem, loop_start);
+                     if (loop_dump_stream)
+                       fprintf (loop_dump_stream,
+                                "Illegal init insn, rewritten.\n");
+                   }
+               }
+             else
+               {
+                 v->dest_reg = value;
+                 
+                 /* Check the resulting address for validity, and fail
+                    if the resulting address would be illegal.  */
+                 if (! memory_address_p (v->mode, v->dest_reg)
+                     || ! memory_address_p (v->mode,
+                                    plus_constant (v->dest_reg,
+                                                   INTVAL (giv_inc) *
+                                                   (unroll_number -1))))
+                   {
+                     if (loop_dump_stream)
+                       fprintf (loop_dump_stream,
+                                "Illegal address for giv at insn %d\n",
+                                INSN_UID (v->insn));
+                     continue;
+                   }
+               }
+             
+             /* Store the value of dest_reg into the insn.  This sharing
+                will not be a problem as this insn will always be copied
+                later.  */
+             
+             *v->location = v->dest_reg;
+             
+             /* If this address giv is combined with a dest reg giv, then
+                save the base giv's induction pointer so that we will be
+                able to handle this address giv properly.  The base giv
+                itself does not have to be splittable.  */
+             
+             if (v->same && v->same->giv_type == DEST_REG)
+               addr_combined_regs[REGNO (v->same->new_reg)] = v->same;
+             
+             if (GET_CODE (v->new_reg) == REG)
+               {
+                 /* This giv maybe hasn't been combined with any others.
+                    Make sure that it's giv is marked as splittable here.  */
+                 
+                 splittable_regs[REGNO (v->new_reg)] = value;
+                 
+                 /* Make it appear to depend upon itself, so that the
+                    giv will be properly split in the main loop above.  */
+                 if (! v->same)
+                   {
+                     v->same = v;
+                     addr_combined_regs[REGNO (v->new_reg)] = v;
+                   }
+               }
+             
+             if (loop_dump_stream)
+               fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n");
+           }
+       }
+      else
+       {
+#if 0
+         /* Currently, unreduced giv's can't be split.  This is not too much
+            of a problem since unreduced giv's are not live across loop
+            iterations anyways.  When unrolling a loop completely though,
+            it makes sense to reduce&split givs when possible, as this will
+            result in simpler instructions, and will not require that a reg
+            be live across loop iterations.  */
+         
+         splittable_regs[REGNO (v->dest_reg)] = value;
+         fprintf (stderr, "Giv %d at insn %d not reduced\n",
+                  REGNO (v->dest_reg), INSN_UID (v->insn));
+#else
+         continue;
+#endif
+       }
+      
+      /* Givs are only updated once by definition.  Mark it so if this is
+        a splittable register.  Don't need to do anything for address givs
+        where this may not be a register.  */
+
+      if (GET_CODE (v->new_reg) == REG)
+       splittable_regs_updates[REGNO (v->new_reg)] = 1;
+
+      result++;
+      
+      if (loop_dump_stream)
+       {
+         int regnum;
+         
+         if (GET_CODE (v->dest_reg) == CONST_INT)
+           regnum = -1;
+         else if (GET_CODE (v->dest_reg) != REG)
+           regnum = REGNO (XEXP (v->dest_reg, 0));
+         else
+           regnum = REGNO (v->dest_reg);
+         fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
+                  regnum, INSN_UID (v->insn));
+       }
+    }
+
+  return result;
+}
+\f
+/* Try to prove that the register is dead after the loop exits.  Trace every
+   loop exit looking for an insn that will always be executed, which sets
+   the register to some value, and appears before the first use of the register
+   is found.  If successful, then return 1, otherwise return 0.  */
+
+/* ?? Could be made more intelligent in the handling of jumps, so that
+   it can search past if statements and other similar structures.  */
+
+static int
+reg_dead_after_loop (reg, loop_start, loop_end)
+     rtx reg, loop_start, loop_end;
+{
+  rtx insn, label;
+  enum rtx_code code;
+
+  /* HACK: Must also search the loop fall through exit, create a label_ref
+     here which points to the loop_end, and append the loop_number_exit_labels
+     list to it.  */
+  label = gen_rtx (LABEL_REF, VOIDmode, loop_end);
+  LABEL_NEXTREF (label)
+    = loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]];
+
+  for ( ; label; label = LABEL_NEXTREF (label))
+    {
+      /* Succeed if find an insn which sets the biv or if reach end of
+        function.  Fail if find an insn that uses the biv, or if come to
+        a conditional jump.  */
+
+      insn = NEXT_INSN (XEXP (label, 0));
+      while (1)
+       {
+         if (insn == 0)
+           break;
+
+         if ((code = GET_CODE (insn)) == INSN || code == JUMP_INSN
+             || code == CALL_INSN)
+           {
+             if (GET_CODE (PATTERN (insn)) == SET)
+               {
+                 if (reg_mentioned_p (reg, SET_SRC (PATTERN (insn))))
+                   return 0;
+                 if (SET_DEST (PATTERN (insn)) == reg)
+                   break;
+                 if (reg_mentioned_p (reg, SET_DEST (PATTERN (insn))))
+                   return 0;
+               }
+             else if (reg_mentioned_p (reg, PATTERN (insn)))
+               return 0;
+           }
+         if (code == JUMP_INSN)
+           {
+             if (GET_CODE (PATTERN (insn)) == RETURN)
+               break;
+             else if (! simplejump_p (insn))
+               return 0;
+             else
+               {
+                 insn = JUMP_LABEL (insn);
+                 /* If this branches to a code label after a LOOP_BEG or
+                    a LOOP_CONT note, then assume it is a loop back edge.
+                    Must fail in that case to prevent going into an infinite
+                    loop trying to trace infinite loops.
+
+                    In the presence of syntax errors, this may be a jump to
+                    a CODE_LABEL that was never emitted.  Fail in this case
+                    also.  */
+
+                 if (! PREV_INSN (insn)
+                     || (GET_CODE (PREV_INSN (insn)) == NOTE
+                         && ((NOTE_LINE_NUMBER (PREV_INSN (insn))
+                              == NOTE_INSN_LOOP_BEG)
+                             || (NOTE_LINE_NUMBER (PREV_INSN (insn))
+                                 == NOTE_INSN_LOOP_CONT))))
+                   return 0;
+               }
+           }
+         
+         insn = NEXT_INSN (insn);
+       }
+    }
+
+  /* Success, the register is dead on all loop exits.  */
+  return 1;
+}
+
+/* Try to calculate the final value of the biv, the value it will have at
+   the end of the loop.  If we can do it, return that value.  */
+  
+rtx
+final_biv_value (bl, loop_start, loop_end)
+     struct iv_class *bl;
+     rtx loop_start, loop_end;
+{
+  rtx increment, tem;
+
+  /* The final value for reversed bivs must be calculated differently than
+      for ordinary bivs.  In this case, there is already an insn after the
+     loop which sets this biv's final value (if necessary), and there are
+     no other loop exits, so we can return any value.  */
+  if (bl->reversed)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Final biv value for %d, reversed biv.\n", bl->regno);
+                
+      return const0_rtx;
+    }
+
+  /* Try to calculate the final value as initial value + (number of iterations
+     * increment).  For this to work, increment must be invariant, the only
+     exit from the loop must be the fall through at the bottom (otherwise
+     it may not have its final value when the loop exits), and the initial
+     value of the biv must be invariant.  */
+
+  if (loop_n_iterations != 0
+      && ! loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]]
+      && invariant_p (bl->initial_value))
+    {
+      increment = biv_total_increment (bl, loop_start, loop_end);
+      
+      if (increment && invariant_p (increment))
+       {
+         /* Can calculate the loop exit value, emit insns after loop
+            end to calculate this value into a temporary register in
+            case it is needed later.  */
+
+         tem = gen_reg_rtx (bl->biv->mode);
+         emit_iv_add_mult (increment,
+                           gen_rtx (CONST_INT, VOIDmode, loop_n_iterations),
+                           bl->initial_value, tem, NEXT_INSN (loop_end));
+
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Final biv value for %d, calculated.\n", bl->regno);
+         
+         return tem;
+       }
+    }
+
+  /* Check to see if the biv is dead at all loop exits.  */
+  if (reg_dead_after_loop (bl->biv->src_reg, loop_start, loop_end))
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Final biv value for %d, biv dead after loop exit.\n",
+                bl->regno);
+
+      return const0_rtx;
+    }
+
+  return 0;
+}
+
+/* Try to calculate the final value of the giv, the value it will have at
+   the end of the loop.  If we can do it, return that value.  */
+
+rtx
+final_giv_value (v, loop_start, loop_end)
+     struct induction *v;
+     rtx loop_start, loop_end;
+{
+  struct iv_class *bl;
+  rtx reg, insn, pattern;
+  rtx increment, tem;
+  enum rtx_code code;
+  rtx insert_before;
+
+  bl = reg_biv_class[REGNO (v->src_reg)];
+
+  /* The final value for givs which depend on reversed bivs must be calculated
+     differently than for ordinary givs.  In this case, there is already an
+     insn after the loop which sets this giv's final value (if necessary),
+     and there are no other loop exits, so we can return any value.  */
+  if (bl->reversed)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Final giv value for %d, depends on reversed biv\n",
+                REGNO (v->dest_reg));
+      return const0_rtx;
+    }
+
+  /* Try to calculate the final value as a function of the biv it depends
+     upon.  The only exit from the loop must be the fall through at the bottom
+     (otherwise it may not have its final value when the loop exits).  */
+      
+  /* ??? Can calculate the final giv value by subtracting off the
+     extra biv increments times the giv's mult_val.  The loop must have
+     only one exit for this to work, but the loop iterations does not need
+     to be known.  */
+
+  if (loop_n_iterations != 0
+      && ! loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]])
+    {
+      /* ?? It is tempting to use the biv's value here since these insns will
+        be put after the loop, and hence the biv will have its final value
+        then.  However, this fails if the biv is subsequently eliminated.
+        Perhaps determine whether biv's are eliminable before trying to
+        determine whether giv's are replaceable so that we can use the
+        biv value here if it is not eliminable.  */
+
+      increment = biv_total_increment (bl, loop_start, loop_end);
+
+      if (increment && invariant_p (increment))
+       {
+         /* Can calculate the loop exit value of its biv as
+            (loop_n_iterations * increment) + initial_value */
+             
+         /* The loop exit value of the giv is then
+            (final_biv_value - extra increments) * mult_val + add_val.
+            The extra increments are any increments to the biv which
+            occur in the loop after the giv's value is calculated.
+            We must search from the insn that sets the giv to the end
+            of the loop to calculate this value.  */
+
+         insert_before = NEXT_INSN (loop_end);
+
+         /* Put the final biv value in tem.  */
+         tem = gen_reg_rtx (bl->biv->mode);
+         emit_iv_add_mult (increment,
+                           gen_rtx (CONST_INT, VOIDmode, loop_n_iterations),
+                           bl->initial_value, tem, insert_before);
+
+         /* Subtract off extra increments as we find them.  */
+         for (insn = NEXT_INSN (v->insn); insn != loop_end;
+              insn = NEXT_INSN (insn))
+           {
+             if (GET_CODE (insn) == INSN
+                 && GET_CODE (PATTERN (insn)) == SET
+                 && SET_DEST (PATTERN (insn)) == v->src_reg)
+               {
+                 pattern = PATTERN (insn);
+                 if (GET_CODE (SET_SRC (pattern)) != PLUS)
+                   {
+                     /* Sometimes a biv is computed in a temp reg,
+                        and then copied into the biv reg.  */
+                     pattern = PATTERN (PREV_INSN (insn));
+                     if (GET_CODE (SET_SRC (pattern)) != PLUS)
+                       abort ();
+                   }
+                 if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
+                     || REGNO (XEXP (SET_SRC (pattern), 0)) != bl->regno)
+                   abort ();
+                 
+                 tem = expand_binop (GET_MODE (tem), sub_optab, tem,
+                                     XEXP (SET_SRC (pattern), 1), 0, 0,
+                                     OPTAB_LIB_WIDEN);
+               }
+           }
+         
+         /* Now calculate the giv's final value.  */
+         emit_iv_add_mult (tem, v->mult_val, v->add_val, tem,
+                           insert_before);
+         
+         if (loop_dump_stream)
+           fprintf (loop_dump_stream,
+                    "Final giv value for %d, calc from biv's value.\n",
+                    REGNO (v->dest_reg));
+
+         return tem;
+       }
+    }
+
+  /* Replaceable giv's should never reach here.  */
+  if (v->replaceable)
+    abort ();
+
+  /* Check to see if the biv is dead at all loop exits.  */
+  if (reg_dead_after_loop (v->dest_reg, loop_start, loop_end))
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Final giv value for %d, giv dead after loop exit.\n",
+                REGNO (v->dest_reg));
+
+      return const0_rtx;
+    }
+
+  return 0;
+}
+
+
+/* Calculate the number of loop iterations.  Returns the exact number of loop
+   iterations if it can be calculated, otherwise retusns zero.  */
+
+unsigned long
+loop_iterations (loop_start, loop_end)
+     rtx loop_start, loop_end;
+{
+  rtx comparison, comparison_value;
+  rtx iteration_var, initial_value, increment, final_value;
+  enum rtx_code comparison_code;
+  int i, increment_dir;
+  int unsigned_compare, compare_dir, final_larger;
+  unsigned long tempu;
+  rtx last_loop_insn;
+
+  /* First find the iteration variable.  If the last insn is a conditional
+     branch, and the insn before tests a register value, make that the
+     iteration variable.  */
+  
+  loop_initial_value = 0;
+  loop_increment = 0;
+  loop_final_value = 0;
+  loop_iteration_var = 0;
+
+  last_loop_insn = prev_nonnote_insn (loop_end);
+
+  comparison = get_condition_for_loop (last_loop_insn);
+  if (comparison == 0)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: No final conditional branch found.\n");
+      return 0;
+    }
+
+  /* ??? Get_condition may switch position of induction variable and
+     invariant register when it canonicalizes the comparison.  */
+
+  comparison_code = GET_CODE (comparison);
+  iteration_var = XEXP (comparison, 0);
+  comparison_value = XEXP (comparison, 1);
+
+  if (GET_CODE (iteration_var) != REG)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Comparison not against register.\n");
+      return 0;
+    }
+
+  /* Loop iterations is always called before any new registers are created
+     now, so this should never occur.  */
+
+  if (REGNO (iteration_var) >= max_reg_before_loop)
+    abort ();
+
+  iteration_info (iteration_var, &initial_value, &increment,
+                 loop_start, loop_end);
+  if (initial_value == 0)
+    /* iteration_info already printed a message.  */
+    return 0;
+
+  if (increment == 0)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Increment value can't be calculated.\n");
+      return 0;
+    }
+  if (GET_CODE (increment) != CONST_INT)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Increment value not constant.\n");
+      return 0;
+    }
+  if (GET_CODE (initial_value) != CONST_INT)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Initial value not constant.\n");
+      return 0;
+    }
+
+  /* If the comparison value is an invariant register, then try to find
+     its value from the insns before the start of the loop.  */
+
+  if (GET_CODE (comparison_value) == REG && invariant_p (comparison_value))
+    {
+      rtx insn, set;
+    
+      for (insn = PREV_INSN (loop_start); insn ; insn = PREV_INSN (insn))
+       {
+         if (GET_CODE (insn) == CODE_LABEL)
+           break;
+
+         else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
+                  && (set = single_set (insn))
+                  && (SET_DEST (set) == comparison_value))
+           {
+             rtx note = find_reg_note (insn, REG_EQUAL, 0);
+
+             if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST)
+               comparison_value = XEXP (note, 0);
+
+             break;
+           }
+       }
+    }
+
+  final_value = approx_final_value (comparison_code, comparison_value,
+                                   &unsigned_compare, &compare_dir);
+
+  /* Save the calculated values describing this loop's bounds, in case
+     precondition_loop_p will need them later.  These values can not be
+     recalculated inside precondition_loop_p because strength reduction
+     optimizations may obscure the loop's structure.  */
+
+  loop_iteration_var = iteration_var;
+  loop_initial_value = initial_value;
+  loop_increment = increment;
+  loop_final_value = final_value;
+
+  if (final_value == 0)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: EQ comparison loop.\n");
+      return 0;
+    }
+  else if (GET_CODE (final_value) != CONST_INT)
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Final value not constant.\n");
+      return 0;
+    }
+
+  /* ?? Final value and initial value do not have to be constants.
+     Only their difference has to be constant.  When the iteration variable
+     is an array address, the final value and initial value might both
+     be addresses with the same base but different constant offsets.
+     Final value must be invariant for this to work.
+
+     To do this, need someway to find the values of registers which are
+     invariant.  */
+
+  /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1.  */
+  if (unsigned_compare)
+    final_larger
+      = ((unsigned) INTVAL (final_value) > (unsigned) INTVAL (initial_value)) -
+       ((unsigned) INTVAL (final_value) < (unsigned) INTVAL (initial_value));
+  else
+    final_larger = (INTVAL (final_value) > INTVAL (initial_value)) -
+      (INTVAL (final_value) < INTVAL (initial_value));
+
+  if (INTVAL (increment) > 0)
+    increment_dir = 1;
+  else if (INTVAL (increment) == 0)
+    increment_dir = 0;
+  else
+    increment_dir = -1;
+
+  /* There are 27 different cases: compare_dir = -1, 0, 1;
+     final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
+     There are 4 normal cases, 4 reverse cases (where the iteration variable
+     will overflow before the loop exits), 4 infinite loop cases, and 15
+     immediate exit (0 or 1 iteration depending on loop type) cases.
+     Only try to optimize the normal cases.  */
+     
+  /* (compare_dir/final_larger/increment_dir)
+     Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
+     Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
+     Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
+     Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
+
+  /* ?? If the meaning of reverse loops (where the iteration variable
+     will overflow before the loop exits) is undefined, then could
+     eliminate all of these special checks, and just always assume
+     the loops are normal/immediate/infinite.  Note that this means
+     the sign of increment_dir does not have to be known.  Also,
+     since it does not really hurt if immediate exit loops or infinite loops
+     are optimized, then that case could be ignored also, and hence all
+     loops can be optimized.
+
+     According to ANSI Spec, the reverse loop case result is undefined,
+     because the action on overflow is undefined.
+
+     See also the special test for NE loops below.  */
+
+  if (final_larger == increment_dir && final_larger != 0
+      && (final_larger == compare_dir || compare_dir == 0))
+    /* Normal case.  */
+    ;
+  else
+    {
+      if (loop_dump_stream)
+       fprintf (loop_dump_stream,
+                "Loop unrolling: Not normal loop.\n");
+      return 0;
+    }
+
+  /* Calculate the number of iterations, final_value is only an approximation,
+     so correct for that.  Note that tempu and loop_n_iterations are
+     unsigned, because they can be as large as 2^n - 1.  */
+
+  i = INTVAL (increment);
+  if (i > 0)
+    tempu = INTVAL (final_value) - INTVAL (initial_value);
+  else if (i < 0)
+    {
+      tempu = INTVAL (initial_value) - INTVAL (final_value);
+      i = -i;
+    }
+  else
+    abort ();
+
+  /* For NE tests, make sure that the iteration variable won't miss the
+     final value.  If tempu mod i is not zero, then the iteration variable
+     will overflow before the loop exits, and we can not calculate the
+     number of iterations.  */
+  if (compare_dir == 0 && (tempu % i) != 0)
+    return 0;
+
+  return tempu / i + ((tempu % i) != 0);
+}
author	Richard Kenner <kenner@gcc.gnu.org>
	Sat, 8 Feb 1992 03:27:38 +0000 (22:27 -0500)
committer	Richard Kenner <kenner@gcc.gnu.org>
	Sat, 8 Feb 1992 03:27:38 +0000 (22:27 -0500)