gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "target.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "cfghooks.h"
  28 #include "expmed.h"
  29 #include "optabs.h"
  30 #include "emit-rtl.h"
  31 #include "recog.h"
  32 #include "alias.h"
  33 #include "profile.h"
  34 #include "cfgrtl.h"
  35 #include "cfgloop.h"
  36 #include "params.h"
  37 #include "flags.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "calls.h"
  41 #include "varasm.h"
  42 #include "stmt.h"
  43 #include "expr.h"
  44 #include "dumpfile.h"
  45
  46 /* This pass performs loop unrolling.  We only perform this
  47    optimization on innermost loops (with single exception) because
  48    the impact on performance is greatest here, and we want to avoid
  49    unnecessary code size growth.  The gain is caused by greater sequentiality
  50    of code, better code to optimize for further passes and in some cases
  51    by fewer testings of exit conditions.  The main problem is code growth,
  52    that impacts performance negatively due to effect of caches.
  53
  54    What we do:
  55
  56    -- unrolling of loops that roll constant times; this is almost always
  57       win, as we get rid of exit condition tests.
  58    -- unrolling of loops that roll number of times that we can compute
  59       in runtime; we also get rid of exit condition tests here, but there
  60       is the extra expense for calculating the number of iterations
  61    -- simple unrolling of remaining loops; this is performed only if we
  62       are asked to, as the gain is questionable in this case and often
  63       it may even slow down the code
  64    For more detailed descriptions of each of those, see comments at
  65    appropriate function below.
  66
  67    There is a lot of parameters (defined and described in params.def) that
  68    control how much we unroll.
  69
  70    ??? A great problem is that we don't have a good way how to determine
  71    how many times we should unroll the loop; the experiments I have made
  72    showed that this choice may affect performance in order of several %.
  73    */
  74
  75 /* Information about induction variables to split.  */
  76
  77 struct iv_to_split
  78 {
  79   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  80   rtx orig_var;         /* The variable (register) for the IV before split.  */
  81   rtx base_var;         /* The variable on that the values in the further
  82                            iterations are based.  */
  83   rtx step;             /* Step of the induction variable.  */
  84   struct iv_to_split *next; /* Next entry in walking order.  */
  85 };
  86
  87 /* Information about accumulators to expand.  */
  88
  89 struct var_to_expand
  90 {
  91   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  92   rtx reg;                         /* The accumulator which is expanded.  */
  93   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  94   struct var_to_expand *next;      /* Next entry in walking order.  */
  95   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  96                                       or multiplication.  */
  97   int expansion_count;             /* Count the number of expansions generated so far.  */
  98   int reuse_expansion;             /* The expansion we intend to reuse to expand
  99                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 100                                       the original accumulator.  Else use
 101                                       var_expansions[REUSE_EXPANSION - 1].  */
 102 };
 103
 104 /* Hashtable helper for iv_to_split.  */
 105
 106 struct iv_split_hasher : free_ptr_hash <iv_to_split>
 107 {
 108   static inline hashval_t hash (const iv_to_split *);
 109   static inline bool equal (const iv_to_split *, const iv_to_split *);
 110 };
 111
 112
 113 /* A hash function for information about insns to split.  */
 114
 115 inline hashval_t
 116 iv_split_hasher::hash (const iv_to_split *ivts)
 117 {
 118   return (hashval_t) INSN_UID (ivts->insn);
 119 }
 120
 121 /* An equality functions for information about insns to split.  */
 122
 123 inline bool
 124 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
 125 {
 126   return i1->insn == i2->insn;
 127 }
 128
 129 /* Hashtable helper for iv_to_split.  */
 130
 131 struct var_expand_hasher : free_ptr_hash <var_to_expand>
 132 {
 133   static inline hashval_t hash (const var_to_expand *);
 134   static inline bool equal (const var_to_expand *, const var_to_expand *);
 135 };
 136
 137 /* Return a hash for VES.  */
 138
 139 inline hashval_t
 140 var_expand_hasher::hash (const var_to_expand *ves)
 141 {
 142   return (hashval_t) INSN_UID (ves->insn);
 143 }
 144
 145 /* Return true if I1 and I2 refer to the same instruction.  */
 146
 147 inline bool
 148 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
 149 {
 150   return i1->insn == i2->insn;
 151 }
 152
 153 /* Information about optimization applied in
 154    the unrolled loop.  */
 155
 156 struct opt_info
 157 {
 158   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 159                                                   split.  */
 160   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 161   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 162   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 163                                         insns with accumulators to expand.  */
 164   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 165   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 166   unsigned first_new_block;        /* The first basic block that was
 167                                       duplicated.  */
 168   basic_block loop_exit;           /* The loop exit basic block.  */
 169   basic_block loop_preheader;      /* The loop preheader basic block.  */
 170 };
 171
 172 static void decide_unroll_stupid (struct loop *, int);
 173 static void decide_unroll_constant_iterations (struct loop *, int);
 174 static void decide_unroll_runtime_iterations (struct loop *, int);
 175 static void unroll_loop_stupid (struct loop *);
 176 static void decide_unrolling (int);
 177 static void unroll_loop_constant_iterations (struct loop *);
 178 static void unroll_loop_runtime_iterations (struct loop *);
 179 static struct opt_info *analyze_insns_in_loop (struct loop *);
 180 static void opt_info_start_duplication (struct opt_info *);
 181 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 182 static void free_opt_info (struct opt_info *);
 183 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 184 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 185 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 186 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 187 static void insert_var_expansion_initialization (struct var_to_expand *,
 188                                                  basic_block);
 189 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 190                                              basic_block);
 191 static rtx get_expansion (struct var_to_expand *);
 192
 193 /* Emit a message summarizing the unroll that will be
 194    performed for LOOP, along with the loop's location LOCUS, if
 195    appropriate given the dump or -fopt-info settings.  */
 196
 197 static void
 198 report_unroll (struct loop *loop, location_t locus)
 199 {
 200   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 201
 202   if (loop->lpt_decision.decision == LPT_NONE)
 203     return;
 204
 205   if (!dump_enabled_p ())
 206     return;
 207
 208   dump_printf_loc (report_flags, locus,
 209                    "loop unrolled %d times",
 210                    loop->lpt_decision.times);
 211   if (profile_info)
 212     dump_printf (report_flags,
 213                  " (header execution count %d)",
 214                  (int)loop->header->count);
 215
 216   dump_printf (report_flags, "\n");
 217 }
 218
 219 /* Decide whether unroll loops and how much.  */
 220 static void
 221 decide_unrolling (int flags)
 222 {
 223   struct loop *loop;
 224
 225   /* Scan the loops, inner ones first.  */
 226   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 227     {
 228       loop->lpt_decision.decision = LPT_NONE;
 229       location_t locus = get_loop_location (loop);
 230
 231       if (dump_enabled_p ())
 232         dump_printf_loc (TDF_RTL, locus,
 233                          ";; *** Considering loop %d at BB %d for "
 234                          "unrolling ***\n",
 235                          loop->num, loop->header->index);
 236
 237       /* Do not peel cold areas.  */
 238       if (optimize_loop_for_size_p (loop))
 239         {
 240           if (dump_file)
 241             fprintf (dump_file, ";; Not considering loop, cold area\n");
 242           continue;
 243         }
 244
 245       /* Can the loop be manipulated?  */
 246       if (!can_duplicate_loop_p (loop))
 247         {
 248           if (dump_file)
 249             fprintf (dump_file,
 250                      ";; Not considering loop, cannot duplicate\n");
 251           continue;
 252         }
 253
 254       /* Skip non-innermost loops.  */
 255       if (loop->inner)
 256         {
 257           if (dump_file)
 258             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 259           continue;
 260         }
 261
 262       loop->ninsns = num_loop_insns (loop);
 263       loop->av_ninsns = average_num_loop_insns (loop);
 264
 265       /* Try transformations one by one in decreasing order of
 266          priority.  */
 267
 268       decide_unroll_constant_iterations (loop, flags);
 269       if (loop->lpt_decision.decision == LPT_NONE)
 270         decide_unroll_runtime_iterations (loop, flags);
 271       if (loop->lpt_decision.decision == LPT_NONE)
 272         decide_unroll_stupid (loop, flags);
 273
 274       report_unroll (loop, locus);
 275     }
 276 }
 277
 278 /* Unroll LOOPS.  */
 279 void
 280 unroll_loops (int flags)
 281 {
 282   struct loop *loop;
 283   bool changed = false;
 284
 285   /* Now decide rest of unrolling.  */
 286   decide_unrolling (flags);
 287
 288   /* Scan the loops, inner ones first.  */
 289   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 290     {
 291       /* And perform the appropriate transformations.  */
 292       switch (loop->lpt_decision.decision)
 293         {
 294         case LPT_UNROLL_CONSTANT:
 295           unroll_loop_constant_iterations (loop);
 296           changed = true;
 297           break;
 298         case LPT_UNROLL_RUNTIME:
 299           unroll_loop_runtime_iterations (loop);
 300           changed = true;
 301           break;
 302         case LPT_UNROLL_STUPID:
 303           unroll_loop_stupid (loop);
 304           changed = true;
 305           break;
 306         case LPT_NONE:
 307           break;
 308         default:
 309           gcc_unreachable ();
 310         }
 311     }
 312
 313     if (changed)
 314       {
 315         calculate_dominance_info (CDI_DOMINATORS);
 316         fix_loop_structure (NULL);
 317       }
 318
 319   iv_analysis_done ();
 320 }
 321
 322 /* Check whether exit of the LOOP is at the end of loop body.  */
 323
 324 static bool
 325 loop_exit_at_end_p (struct loop *loop)
 326 {
 327   struct niter_desc *desc = get_simple_loop_desc (loop);
 328   rtx_insn *insn;
 329
 330   /* We should never have conditional in latch block.  */
 331   gcc_assert (desc->in_edge->dest != loop->header);
 332
 333   if (desc->in_edge->dest != loop->latch)
 334     return false;
 335
 336   /* Check that the latch is empty.  */
 337   FOR_BB_INSNS (loop->latch, insn)
 338     {
 339       if (INSN_P (insn) && active_insn_p (insn))
 340         return false;
 341     }
 342
 343   return true;
 344 }
 345
 346 /* Decide whether to unroll LOOP iterating constant number of times
 347    and how much.  */
 348
 349 static void
 350 decide_unroll_constant_iterations (struct loop *loop, int flags)
 351 {
 352   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 353   struct niter_desc *desc;
 354   widest_int iterations;
 355
 356   if (!(flags & UAP_UNROLL))
 357     {
 358       /* We were not asked to, just return back silently.  */
 359       return;
 360     }
 361
 362   if (dump_file)
 363     fprintf (dump_file,
 364              "\n;; Considering unrolling loop with constant "
 365              "number of iterations\n");
 366
 367   /* nunroll = total number of copies of the original loop body in
 368      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 369   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 370   nunroll_by_av
 371     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 372   if (nunroll > nunroll_by_av)
 373     nunroll = nunroll_by_av;
 374   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 375     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 376
 377   if (targetm.loop_unroll_adjust)
 378     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 379
 380   /* Skip big loops.  */
 381   if (nunroll <= 1)
 382     {
 383       if (dump_file)
 384         fprintf (dump_file, ";; Not considering loop, is too big\n");
 385       return;
 386     }
 387
 388   /* Check for simple loops.  */
 389   desc = get_simple_loop_desc (loop);
 390
 391   /* Check number of iterations.  */
 392   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 393     {
 394       if (dump_file)
 395         fprintf (dump_file,
 396                  ";; Unable to prove that the loop iterates constant times\n");
 397       return;
 398     }
 399
 400   /* Check whether the loop rolls enough to consider.
 401      Consult also loop bounds and profile; in the case the loop has more
 402      than one exit it may well loop less than determined maximal number
 403      of iterations.  */
 404   if (desc->niter < 2 * nunroll
 405       || ((get_estimated_loop_iterations (loop, &iterations)
 406            || get_max_loop_iterations (loop, &iterations))
 407           && wi::ltu_p (iterations, 2 * nunroll)))
 408     {
 409       if (dump_file)
 410         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 411       return;
 412     }
 413
 414   /* Success; now compute number of iterations to unroll.  We alter
 415      nunroll so that as few as possible copies of loop body are
 416      necessary, while still not decreasing the number of unrollings
 417      too much (at most by 1).  */
 418   best_copies = 2 * nunroll + 10;
 419
 420   i = 2 * nunroll + 2;
 421   if (i - 1 >= desc->niter)
 422     i = desc->niter - 2;
 423
 424   for (; i >= nunroll - 1; i--)
 425     {
 426       unsigned exit_mod = desc->niter % (i + 1);
 427
 428       if (!loop_exit_at_end_p (loop))
 429         n_copies = exit_mod + i + 1;
 430       else if (exit_mod != (unsigned) i
 431                || desc->noloop_assumptions != NULL_RTX)
 432         n_copies = exit_mod + i + 2;
 433       else
 434         n_copies = i + 1;
 435
 436       if (n_copies < best_copies)
 437         {
 438           best_copies = n_copies;
 439           best_unroll = i;
 440         }
 441     }
 442
 443   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 444   loop->lpt_decision.times = best_unroll;
 445 }
 446
 447 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 448    The transformation does this:
 449
 450    for (i = 0; i < 102; i++)
 451      body;
 452
 453    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 454
 455    i = 0;
 456    body; i++;
 457    body; i++;
 458    while (i < 102)
 459      {
 460        body; i++;
 461        body; i++;
 462        body; i++;
 463        body; i++;
 464      }
 465   */
 466 static void
 467 unroll_loop_constant_iterations (struct loop *loop)
 468 {
 469   unsigned HOST_WIDE_INT niter;
 470   unsigned exit_mod;
 471   sbitmap wont_exit;
 472   unsigned i;
 473   edge e;
 474   unsigned max_unroll = loop->lpt_decision.times;
 475   struct niter_desc *desc = get_simple_loop_desc (loop);
 476   bool exit_at_end = loop_exit_at_end_p (loop);
 477   struct opt_info *opt_info = NULL;
 478   bool ok;
 479
 480   niter = desc->niter;
 481
 482   /* Should not get here (such loop should be peeled instead).  */
 483   gcc_assert (niter > max_unroll + 1);
 484
 485   exit_mod = niter % (max_unroll + 1);
 486
 487   wont_exit = sbitmap_alloc (max_unroll + 1);
 488   bitmap_ones (wont_exit);
 489
 490   auto_vec<edge> remove_edges;
 491   if (flag_split_ivs_in_unroller
 492       || flag_variable_expansion_in_unroller)
 493     opt_info = analyze_insns_in_loop (loop);
 494
 495   if (!exit_at_end)
 496     {
 497       /* The exit is not at the end of the loop; leave exit test
 498          in the first copy, so that the loops that start with test
 499          of exit condition have continuous body after unrolling.  */
 500
 501       if (dump_file)
 502         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 503
 504       /* Peel exit_mod iterations.  */
 505       bitmap_clear_bit (wont_exit, 0);
 506       if (desc->noloop_assumptions)
 507         bitmap_clear_bit (wont_exit, 1);
 508
 509       if (exit_mod)
 510         {
 511           opt_info_start_duplication (opt_info);
 512           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 513                                               exit_mod,
 514                                               wont_exit, desc->out_edge,
 515                                               &remove_edges,
 516                                               DLTHE_FLAG_UPDATE_FREQ
 517                                               | (opt_info && exit_mod > 1
 518                                                  ? DLTHE_RECORD_COPY_NUMBER
 519                                                    : 0));
 520           gcc_assert (ok);
 521
 522           if (opt_info && exit_mod > 1)
 523             apply_opt_in_copies (opt_info, exit_mod, false, false);
 524
 525           desc->noloop_assumptions = NULL_RTX;
 526           desc->niter -= exit_mod;
 527           loop->nb_iterations_upper_bound -= exit_mod;
 528           if (loop->any_estimate
 529               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 530             loop->nb_iterations_estimate -= exit_mod;
 531           else
 532             loop->any_estimate = false;
 533         }
 534
 535       bitmap_set_bit (wont_exit, 1);
 536     }
 537   else
 538     {
 539       /* Leave exit test in last copy, for the same reason as above if
 540          the loop tests the condition at the end of loop body.  */
 541
 542       if (dump_file)
 543         fprintf (dump_file, ";; Condition at end of loop.\n");
 544
 545       /* We know that niter >= max_unroll + 2; so we do not need to care of
 546          case when we would exit before reaching the loop.  So just peel
 547          exit_mod + 1 iterations.  */
 548       if (exit_mod != max_unroll
 549           || desc->noloop_assumptions)
 550         {
 551           bitmap_clear_bit (wont_exit, 0);
 552           if (desc->noloop_assumptions)
 553             bitmap_clear_bit (wont_exit, 1);
 554
 555           opt_info_start_duplication (opt_info);
 556           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 557                                               exit_mod + 1,
 558                                               wont_exit, desc->out_edge,
 559                                               &remove_edges,
 560                                               DLTHE_FLAG_UPDATE_FREQ
 561                                               | (opt_info && exit_mod > 0
 562                                                  ? DLTHE_RECORD_COPY_NUMBER
 563                                                    : 0));
 564           gcc_assert (ok);
 565
 566           if (opt_info && exit_mod > 0)
 567             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 568
 569           desc->niter -= exit_mod + 1;
 570           loop->nb_iterations_upper_bound -= exit_mod + 1;
 571           if (loop->any_estimate
 572               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 573             loop->nb_iterations_estimate -= exit_mod + 1;
 574           else
 575             loop->any_estimate = false;
 576           desc->noloop_assumptions = NULL_RTX;
 577
 578           bitmap_set_bit (wont_exit, 0);
 579           bitmap_set_bit (wont_exit, 1);
 580         }
 581
 582       bitmap_clear_bit (wont_exit, max_unroll);
 583     }
 584
 585   /* Now unroll the loop.  */
 586
 587   opt_info_start_duplication (opt_info);
 588   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 589                                       max_unroll,
 590                                       wont_exit, desc->out_edge,
 591                                       &remove_edges,
 592                                       DLTHE_FLAG_UPDATE_FREQ
 593                                       | (opt_info
 594                                          ? DLTHE_RECORD_COPY_NUMBER
 595                                            : 0));
 596   gcc_assert (ok);
 597
 598   if (opt_info)
 599     {
 600       apply_opt_in_copies (opt_info, max_unroll, true, true);
 601       free_opt_info (opt_info);
 602     }
 603
 604   free (wont_exit);
 605
 606   if (exit_at_end)
 607     {
 608       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 609       /* Find a new in and out edge; they are in the last copy we have made.  */
 610
 611       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 612         {
 613           desc->out_edge = EDGE_SUCC (exit_block, 0);
 614           desc->in_edge = EDGE_SUCC (exit_block, 1);
 615         }
 616       else
 617         {
 618           desc->out_edge = EDGE_SUCC (exit_block, 1);
 619           desc->in_edge = EDGE_SUCC (exit_block, 0);
 620         }
 621     }
 622
 623   desc->niter /= max_unroll + 1;
 624   loop->nb_iterations_upper_bound
 625     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 626   if (loop->any_estimate)
 627     loop->nb_iterations_estimate
 628       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 629   desc->niter_expr = GEN_INT (desc->niter);
 630
 631   /* Remove the edges.  */
 632   FOR_EACH_VEC_ELT (remove_edges, i, e)
 633     remove_path (e);
 634
 635   if (dump_file)
 636     fprintf (dump_file,
 637              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 638              max_unroll, num_loop_insns (loop));
 639 }
 640
 641 /* Decide whether to unroll LOOP iterating runtime computable number of times
 642    and how much.  */
 643 static void
 644 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 645 {
 646   unsigned nunroll, nunroll_by_av, i;
 647   struct niter_desc *desc;
 648   widest_int iterations;
 649
 650   if (!(flags & UAP_UNROLL))
 651     {
 652       /* We were not asked to, just return back silently.  */
 653       return;
 654     }
 655
 656   if (dump_file)
 657     fprintf (dump_file,
 658              "\n;; Considering unrolling loop with runtime "
 659              "computable number of iterations\n");
 660
 661   /* nunroll = total number of copies of the original loop body in
 662      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 663   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 664   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 665   if (nunroll > nunroll_by_av)
 666     nunroll = nunroll_by_av;
 667   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 668     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 669
 670   if (targetm.loop_unroll_adjust)
 671     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 672
 673   /* Skip big loops.  */
 674   if (nunroll <= 1)
 675     {
 676       if (dump_file)
 677         fprintf (dump_file, ";; Not considering loop, is too big\n");
 678       return;
 679     }
 680
 681   /* Check for simple loops.  */
 682   desc = get_simple_loop_desc (loop);
 683
 684   /* Check simpleness.  */
 685   if (!desc->simple_p || desc->assumptions)
 686     {
 687       if (dump_file)
 688         fprintf (dump_file,
 689                  ";; Unable to prove that the number of iterations "
 690                  "can be counted in runtime\n");
 691       return;
 692     }
 693
 694   if (desc->const_iter)
 695     {
 696       if (dump_file)
 697         fprintf (dump_file, ";; Loop iterates constant times\n");
 698       return;
 699     }
 700
 701   /* Check whether the loop rolls.  */
 702   if ((get_estimated_loop_iterations (loop, &iterations)
 703        || get_max_loop_iterations (loop, &iterations))
 704       && wi::ltu_p (iterations, 2 * nunroll))
 705     {
 706       if (dump_file)
 707         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 708       return;
 709     }
 710
 711   /* Success; now force nunroll to be power of 2, as we are unable to
 712      cope with overflows in computation of number of iterations.  */
 713   for (i = 1; 2 * i <= nunroll; i *= 2)
 714     continue;
 715
 716   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 717   loop->lpt_decision.times = i - 1;
 718 }
 719
 720 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 721    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 722    and NULL is returned instead.  */
 723
 724 basic_block
 725 split_edge_and_insert (edge e, rtx_insn *insns)
 726 {
 727   basic_block bb;
 728
 729   if (!insns)
 730     return NULL;
 731   bb = split_edge (e);
 732   emit_insn_after (insns, BB_END (bb));
 733
 734   /* ??? We used to assume that INSNS can contain control flow insns, and
 735      that we had to try to find sub basic blocks in BB to maintain a valid
 736      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 737      and call break_superblocks when going out of cfglayout mode.  But it
 738      turns out that this never happens; and that if it does ever happen,
 739      the verify_flow_info at the end of the RTL loop passes would fail.
 740
 741      There are two reasons why we expected we could have control flow insns
 742      in INSNS.  The first is when a comparison has to be done in parts, and
 743      the second is when the number of iterations is computed for loops with
 744      the number of iterations known at runtime.  In both cases, test cases
 745      to get control flow in INSNS appear to be impossible to construct:
 746
 747       * If do_compare_rtx_and_jump needs several branches to do comparison
 748         in a mode that needs comparison by parts, we cannot analyze the
 749         number of iterations of the loop, and we never get to unrolling it.
 750
 751       * The code in expand_divmod that was suspected to cause creation of
 752         branching code seems to be only accessed for signed division.  The
 753         divisions used by # of iterations analysis are always unsigned.
 754         Problems might arise on architectures that emits branching code
 755         for some operations that may appear in the unroller (especially
 756         for division), but we have no such architectures.
 757
 758      Considering all this, it was decided that we should for now assume
 759      that INSNS can in theory contain control flow insns, but in practice
 760      it never does.  So we don't handle the theoretical case, and should
 761      a real failure ever show up, we have a pretty good clue for how to
 762      fix it.  */
 763
 764   return bb;
 765 }
 766
 767 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 768    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 769    in order to create a jump.  */
 770
 771 static rtx_insn *
 772 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
 773                       rtx_code_label *label, int prob, rtx_insn *cinsn)
 774 {
 775   rtx_insn *seq;
 776   rtx_jump_insn *jump;
 777   rtx cond;
 778   machine_mode mode;
 779
 780   mode = GET_MODE (op0);
 781   if (mode == VOIDmode)
 782     mode = GET_MODE (op1);
 783
 784   start_sequence ();
 785   if (GET_MODE_CLASS (mode) == MODE_CC)
 786     {
 787       /* A hack -- there seems to be no easy generic way how to make a
 788          conditional jump from a ccmode comparison.  */
 789       gcc_assert (cinsn);
 790       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 791       gcc_assert (GET_CODE (cond) == comp);
 792       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 793       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 794       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 795       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 796       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 797       LABEL_NUSES (JUMP_LABEL (jump))++;
 798       redirect_jump (jump, label, 0);
 799     }
 800   else
 801     {
 802       gcc_assert (!cinsn);
 803
 804       op0 = force_operand (op0, NULL_RTX);
 805       op1 = force_operand (op1, NULL_RTX);
 806       do_compare_rtx_and_jump (op0, op1, comp, 0,
 807                                mode, NULL_RTX, NULL, label, -1);
 808       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 809       jump->set_jump_target (label);
 810       LABEL_NUSES (label)++;
 811     }
 812   add_int_reg_note (jump, REG_BR_PROB, prob);
 813
 814   seq = get_insns ();
 815   end_sequence ();
 816
 817   return seq;
 818 }
 819
 820 /* Unroll LOOP for which we are able to count number of iterations in runtime
 821    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 822    extra care for case n < 0):
 823
 824    for (i = 0; i < n; i++)
 825      body;
 826
 827    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 828
 829    i = 0;
 830    mod = n % 4;
 831
 832    switch (mod)
 833      {
 834        case 3:
 835          body; i++;
 836        case 2:
 837          body; i++;
 838        case 1:
 839          body; i++;
 840        case 0: ;
 841      }
 842
 843    while (i < n)
 844      {
 845        body; i++;
 846        body; i++;
 847        body; i++;
 848        body; i++;
 849      }
 850    */
 851 static void
 852 unroll_loop_runtime_iterations (struct loop *loop)
 853 {
 854   rtx old_niter, niter, tmp;
 855   rtx_insn *init_code, *branch_code;
 856   unsigned i, j, p;
 857   basic_block preheader, *body, swtch, ezc_swtch;
 858   sbitmap wont_exit;
 859   int may_exit_copy;
 860   unsigned n_peel;
 861   edge e;
 862   bool extra_zero_check, last_may_exit;
 863   unsigned max_unroll = loop->lpt_decision.times;
 864   struct niter_desc *desc = get_simple_loop_desc (loop);
 865   bool exit_at_end = loop_exit_at_end_p (loop);
 866   struct opt_info *opt_info = NULL;
 867   bool ok;
 868
 869   if (flag_split_ivs_in_unroller
 870       || flag_variable_expansion_in_unroller)
 871     opt_info = analyze_insns_in_loop (loop);
 872
 873   /* Remember blocks whose dominators will have to be updated.  */
 874   auto_vec<basic_block> dom_bbs;
 875
 876   body = get_loop_body (loop);
 877   for (i = 0; i < loop->num_nodes; i++)
 878     {
 879       vec<basic_block> ldom;
 880       basic_block bb;
 881
 882       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 883       FOR_EACH_VEC_ELT (ldom, j, bb)
 884         if (!flow_bb_inside_loop_p (loop, bb))
 885           dom_bbs.safe_push (bb);
 886
 887       ldom.release ();
 888     }
 889   free (body);
 890
 891   if (!exit_at_end)
 892     {
 893       /* Leave exit in first copy (for explanation why see comment in
 894          unroll_loop_constant_iterations).  */
 895       may_exit_copy = 0;
 896       n_peel = max_unroll - 1;
 897       extra_zero_check = true;
 898       last_may_exit = false;
 899     }
 900   else
 901     {
 902       /* Leave exit in last copy (for explanation why see comment in
 903          unroll_loop_constant_iterations).  */
 904       may_exit_copy = max_unroll;
 905       n_peel = max_unroll;
 906       extra_zero_check = false;
 907       last_may_exit = true;
 908     }
 909
 910   /* Get expression for number of iterations.  */
 911   start_sequence ();
 912   old_niter = niter = gen_reg_rtx (desc->mode);
 913   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 914   if (tmp != niter)
 915     emit_move_insn (niter, tmp);
 916
 917   /* Count modulo by ANDing it with max_unroll; we use the fact that
 918      the number of unrollings is a power of two, and thus this is correct
 919      even if there is overflow in the computation.  */
 920   niter = expand_simple_binop (desc->mode, AND,
 921                                niter, gen_int_mode (max_unroll, desc->mode),
 922                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 923
 924   init_code = get_insns ();
 925   end_sequence ();
 926   unshare_all_rtl_in_chain (init_code);
 927
 928   /* Precondition the loop.  */
 929   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 930
 931   auto_vec<edge> remove_edges;
 932
 933   wont_exit = sbitmap_alloc (max_unroll + 2);
 934
 935   /* Peel the first copy of loop body (almost always we must leave exit test
 936      here; the only exception is when we have extra zero check and the number
 937      of iterations is reliable.  Also record the place of (possible) extra
 938      zero check.  */
 939   bitmap_clear (wont_exit);
 940   if (extra_zero_check
 941       && !desc->noloop_assumptions)
 942     bitmap_set_bit (wont_exit, 1);
 943   ezc_swtch = loop_preheader_edge (loop)->src;
 944   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 945                                       1, wont_exit, desc->out_edge,
 946                                       &remove_edges,
 947                                       DLTHE_FLAG_UPDATE_FREQ);
 948   gcc_assert (ok);
 949
 950   /* Record the place where switch will be built for preconditioning.  */
 951   swtch = split_edge (loop_preheader_edge (loop));
 952
 953   for (i = 0; i < n_peel; i++)
 954     {
 955       /* Peel the copy.  */
 956       bitmap_clear (wont_exit);
 957       if (i != n_peel - 1 || !last_may_exit)
 958         bitmap_set_bit (wont_exit, 1);
 959       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 960                                           1, wont_exit, desc->out_edge,
 961                                           &remove_edges,
 962                                           DLTHE_FLAG_UPDATE_FREQ);
 963       gcc_assert (ok);
 964
 965       /* Create item for switch.  */
 966       j = n_peel - i - (extra_zero_check ? 0 : 1);
 967       p = REG_BR_PROB_BASE / (i + 2);
 968
 969       preheader = split_edge (loop_preheader_edge (loop));
 970       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
 971                                           block_label (preheader), p,
 972                                           NULL);
 973
 974       /* We rely on the fact that the compare and jump cannot be optimized out,
 975          and hence the cfg we create is correct.  */
 976       gcc_assert (branch_code != NULL_RTX);
 977
 978       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
 979       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
 980       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
 981       e = make_edge (swtch, preheader,
 982                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
 983       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
 984       e->probability = p;
 985     }
 986
 987   if (extra_zero_check)
 988     {
 989       /* Add branch for zero iterations.  */
 990       p = REG_BR_PROB_BASE / (max_unroll + 1);
 991       swtch = ezc_swtch;
 992       preheader = split_edge (loop_preheader_edge (loop));
 993       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
 994                                           block_label (preheader), p,
 995                                           NULL);
 996       gcc_assert (branch_code != NULL_RTX);
 997
 998       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
 999       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1000       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1001       e = make_edge (swtch, preheader,
1002                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1003       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1004       e->probability = p;
1005     }
1006
1007   /* Recount dominators for outer blocks.  */
1008   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1009
1010   /* And unroll loop.  */
1011
1012   bitmap_ones (wont_exit);
1013   bitmap_clear_bit (wont_exit, may_exit_copy);
1014   opt_info_start_duplication (opt_info);
1015
1016   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1017                                       max_unroll,
1018                                       wont_exit, desc->out_edge,
1019                                       &remove_edges,
1020                                       DLTHE_FLAG_UPDATE_FREQ
1021                                       | (opt_info
1022                                          ? DLTHE_RECORD_COPY_NUMBER
1023                                            : 0));
1024   gcc_assert (ok);
1025
1026   if (opt_info)
1027     {
1028       apply_opt_in_copies (opt_info, max_unroll, true, true);
1029       free_opt_info (opt_info);
1030     }
1031
1032   free (wont_exit);
1033
1034   if (exit_at_end)
1035     {
1036       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1037       /* Find a new in and out edge; they are in the last copy we have
1038          made.  */
1039
1040       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1041         {
1042           desc->out_edge = EDGE_SUCC (exit_block, 0);
1043           desc->in_edge = EDGE_SUCC (exit_block, 1);
1044         }
1045       else
1046         {
1047           desc->out_edge = EDGE_SUCC (exit_block, 1);
1048           desc->in_edge = EDGE_SUCC (exit_block, 0);
1049         }
1050     }
1051
1052   /* Remove the edges.  */
1053   FOR_EACH_VEC_ELT (remove_edges, i, e)
1054     remove_path (e);
1055
1056   /* We must be careful when updating the number of iterations due to
1057      preconditioning and the fact that the value must be valid at entry
1058      of the loop.  After passing through the above code, we see that
1059      the correct new number of iterations is this:  */
1060   gcc_assert (!desc->const_iter);
1061   desc->niter_expr =
1062     simplify_gen_binary (UDIV, desc->mode, old_niter,
1063                          gen_int_mode (max_unroll + 1, desc->mode));
1064   loop->nb_iterations_upper_bound
1065     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1066   if (loop->any_estimate)
1067     loop->nb_iterations_estimate
1068       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1069   if (exit_at_end)
1070     {
1071       desc->niter_expr =
1072         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1073       desc->noloop_assumptions = NULL_RTX;
1074       --loop->nb_iterations_upper_bound;
1075       if (loop->any_estimate
1076           && loop->nb_iterations_estimate != 0)
1077         --loop->nb_iterations_estimate;
1078       else
1079         loop->any_estimate = false;
1080     }
1081
1082   if (dump_file)
1083     fprintf (dump_file,
1084              ";; Unrolled loop %d times, counting # of iterations "
1085              "in runtime, %i insns\n",
1086              max_unroll, num_loop_insns (loop));
1087 }
1088
1089 /* Decide whether to unroll LOOP stupidly and how much.  */
1090 static void
1091 decide_unroll_stupid (struct loop *loop, int flags)
1092 {
1093   unsigned nunroll, nunroll_by_av, i;
1094   struct niter_desc *desc;
1095   widest_int iterations;
1096
1097   if (!(flags & UAP_UNROLL_ALL))
1098     {
1099       /* We were not asked to, just return back silently.  */
1100       return;
1101     }
1102
1103   if (dump_file)
1104     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1105
1106   /* nunroll = total number of copies of the original loop body in
1107      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1108   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1109   nunroll_by_av
1110     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1111   if (nunroll > nunroll_by_av)
1112     nunroll = nunroll_by_av;
1113   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1114     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1115
1116   if (targetm.loop_unroll_adjust)
1117     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1118
1119   /* Skip big loops.  */
1120   if (nunroll <= 1)
1121     {
1122       if (dump_file)
1123         fprintf (dump_file, ";; Not considering loop, is too big\n");
1124       return;
1125     }
1126
1127   /* Check for simple loops.  */
1128   desc = get_simple_loop_desc (loop);
1129
1130   /* Check simpleness.  */
1131   if (desc->simple_p && !desc->assumptions)
1132     {
1133       if (dump_file)
1134         fprintf (dump_file, ";; The loop is simple\n");
1135       return;
1136     }
1137
1138   /* Do not unroll loops with branches inside -- it increases number
1139      of mispredicts.
1140      TODO: this heuristic needs tunning; call inside the loop body
1141      is also relatively good reason to not unroll.  */
1142   if (num_loop_branches (loop) > 1)
1143     {
1144       if (dump_file)
1145         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1146       return;
1147     }
1148
1149   /* Check whether the loop rolls.  */
1150   if ((get_estimated_loop_iterations (loop, &iterations)
1151        || get_max_loop_iterations (loop, &iterations))
1152       && wi::ltu_p (iterations, 2 * nunroll))
1153     {
1154       if (dump_file)
1155         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1156       return;
1157     }
1158
1159   /* Success.  Now force nunroll to be power of 2, as it seems that this
1160      improves results (partially because of better alignments, partially
1161      because of some dark magic).  */
1162   for (i = 1; 2 * i <= nunroll; i *= 2)
1163     continue;
1164
1165   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1166   loop->lpt_decision.times = i - 1;
1167 }
1168
1169 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1170
1171    while (cond)
1172      body;
1173
1174    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1175
1176    while (cond)
1177      {
1178        body;
1179        if (!cond) break;
1180        body;
1181        if (!cond) break;
1182        body;
1183        if (!cond) break;
1184        body;
1185      }
1186    */
1187 static void
1188 unroll_loop_stupid (struct loop *loop)
1189 {
1190   sbitmap wont_exit;
1191   unsigned nunroll = loop->lpt_decision.times;
1192   struct niter_desc *desc = get_simple_loop_desc (loop);
1193   struct opt_info *opt_info = NULL;
1194   bool ok;
1195
1196   if (flag_split_ivs_in_unroller
1197       || flag_variable_expansion_in_unroller)
1198     opt_info = analyze_insns_in_loop (loop);
1199
1200
1201   wont_exit = sbitmap_alloc (nunroll + 1);
1202   bitmap_clear (wont_exit);
1203   opt_info_start_duplication (opt_info);
1204
1205   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1206                                       nunroll, wont_exit,
1207                                       NULL, NULL,
1208                                       DLTHE_FLAG_UPDATE_FREQ
1209                                       | (opt_info
1210                                          ? DLTHE_RECORD_COPY_NUMBER
1211                                            : 0));
1212   gcc_assert (ok);
1213
1214   if (opt_info)
1215     {
1216       apply_opt_in_copies (opt_info, nunroll, true, true);
1217       free_opt_info (opt_info);
1218     }
1219
1220   free (wont_exit);
1221
1222   if (desc->simple_p)
1223     {
1224       /* We indeed may get here provided that there are nontrivial assumptions
1225          for a loop to be really simple.  We could update the counts, but the
1226          problem is that we are unable to decide which exit will be taken
1227          (not really true in case the number of iterations is constant,
1228          but no one will do anything with this information, so we do not
1229          worry about it).  */
1230       desc->simple_p = false;
1231     }
1232
1233   if (dump_file)
1234     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1235              nunroll, num_loop_insns (loop));
1236 }
1237
1238 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1239    Set *DEBUG_USES to the number of debug insns that reference the
1240    variable.  */
1241
1242 static bool
1243 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1244                                   int *debug_uses)
1245 {
1246   basic_block *body, bb;
1247   unsigned i;
1248   int count_ref = 0;
1249   rtx_insn *insn;
1250
1251   body = get_loop_body (loop);
1252   for (i = 0; i < loop->num_nodes; i++)
1253     {
1254       bb = body[i];
1255
1256       FOR_BB_INSNS (bb, insn)
1257         if (!rtx_referenced_p (reg, insn))
1258           continue;
1259         else if (DEBUG_INSN_P (insn))
1260           ++*debug_uses;
1261         else if (++count_ref > 1)
1262           break;
1263     }
1264   free (body);
1265   return (count_ref  == 1);
1266 }
1267
1268 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1269
1270 static void
1271 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1272 {
1273   basic_block *body, bb;
1274   unsigned i;
1275   rtx_insn *insn;
1276
1277   body = get_loop_body (loop);
1278   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1279     {
1280       bb = body[i];
1281
1282       FOR_BB_INSNS (bb, insn)
1283         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1284           continue;
1285         else
1286           {
1287             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1288                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1289             if (!--debug_uses)
1290               break;
1291           }
1292     }
1293   free (body);
1294 }
1295
1296 /* Determine whether INSN contains an accumulator
1297    which can be expanded into separate copies,
1298    one for each copy of the LOOP body.
1299
1300    for (i = 0 ; i < n; i++)
1301      sum += a[i];
1302
1303    ==>
1304
1305    sum += a[i]
1306    ....
1307    i = i+1;
1308    sum1 += a[i]
1309    ....
1310    i = i+1
1311    sum2 += a[i];
1312    ....
1313
1314    Return NULL if INSN contains no opportunity for expansion of accumulator.
1315    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1316    information and return a pointer to it.
1317 */
1318
1319 static struct var_to_expand *
1320 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1321 {
1322   rtx set, dest, src;
1323   struct var_to_expand *ves;
1324   unsigned accum_pos;
1325   enum rtx_code code;
1326   int debug_uses = 0;
1327
1328   set = single_set (insn);
1329   if (!set)
1330     return NULL;
1331
1332   dest = SET_DEST (set);
1333   src = SET_SRC (set);
1334   code = GET_CODE (src);
1335
1336   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1337     return NULL;
1338
1339   if (FLOAT_MODE_P (GET_MODE (dest)))
1340     {
1341       if (!flag_associative_math)
1342         return NULL;
1343       /* In the case of FMA, we're also changing the rounding.  */
1344       if (code == FMA && !flag_unsafe_math_optimizations)
1345         return NULL;
1346     }
1347
1348   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1349      in MD.  But if there is no optab to generate the insn, we can not
1350      perform the variable expansion.  This can happen if an MD provides
1351      an insn but not a named pattern to generate it, for example to avoid
1352      producing code that needs additional mode switches like for x87/mmx.
1353
1354      So we check have_insn_for which looks for an optab for the operation
1355      in SRC.  If it doesn't exist, we can't perform the expansion even
1356      though INSN is valid.  */
1357   if (!have_insn_for (code, GET_MODE (src)))
1358     return NULL;
1359
1360   if (!REG_P (dest)
1361       && !(GET_CODE (dest) == SUBREG
1362            && REG_P (SUBREG_REG (dest))))
1363     return NULL;
1364
1365   /* Find the accumulator use within the operation.  */
1366   if (code == FMA)
1367     {
1368       /* We only support accumulation via FMA in the ADD position.  */
1369       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1370         return NULL;
1371       accum_pos = 2;
1372     }
1373   else if (rtx_equal_p (dest, XEXP (src, 0)))
1374     accum_pos = 0;
1375   else if (rtx_equal_p (dest, XEXP (src, 1)))
1376     {
1377       /* The method of expansion that we are using; which includes the
1378          initialization of the expansions with zero and the summation of
1379          the expansions at the end of the computation will yield wrong
1380          results for (x = something - x) thus avoid using it in that case.  */
1381       if (code == MINUS)
1382         return NULL;
1383       accum_pos = 1;
1384     }
1385   else
1386     return NULL;
1387
1388   /* It must not otherwise be used.  */
1389   if (code == FMA)
1390     {
1391       if (rtx_referenced_p (dest, XEXP (src, 0))
1392           || rtx_referenced_p (dest, XEXP (src, 1)))
1393         return NULL;
1394     }
1395   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1396     return NULL;
1397
1398   /* It must be used in exactly one insn.  */
1399   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1400     return NULL;
1401
1402   if (dump_file)
1403     {
1404       fprintf (dump_file, "\n;; Expanding Accumulator ");
1405       print_rtl (dump_file, dest);
1406       fprintf (dump_file, "\n");
1407     }
1408
1409   if (debug_uses)
1410     /* Instead of resetting the debug insns, we could replace each
1411        debug use in the loop with the sum or product of all expanded
1412        accummulators.  Since we'll only know of all expansions at the
1413        end, we'd have to keep track of which vars_to_expand a debug
1414        insn in the loop references, take note of each copy of the
1415        debug insn during unrolling, and when it's all done, compute
1416        the sum or product of each variable and adjust the original
1417        debug insn and each copy thereof.  What a pain!  */
1418     reset_debug_uses_in_loop (loop, dest, debug_uses);
1419
1420   /* Record the accumulator to expand.  */
1421   ves = XNEW (struct var_to_expand);
1422   ves->insn = insn;
1423   ves->reg = copy_rtx (dest);
1424   ves->var_expansions.create (1);
1425   ves->next = NULL;
1426   ves->op = GET_CODE (src);
1427   ves->expansion_count = 0;
1428   ves->reuse_expansion = 0;
1429   return ves;
1430 }
1431
1432 /* Determine whether there is an induction variable in INSN that
1433    we would like to split during unrolling.
1434
1435    I.e. replace
1436
1437    i = i + 1;
1438    ...
1439    i = i + 1;
1440    ...
1441    i = i + 1;
1442    ...
1443
1444    type chains by
1445
1446    i0 = i + 1
1447    ...
1448    i = i0 + 1
1449    ...
1450    i = i0 + 2
1451    ...
1452
1453    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1454    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1455    pointer to it.  */
1456
1457 static struct iv_to_split *
1458 analyze_iv_to_split_insn (rtx_insn *insn)
1459 {
1460   rtx set, dest;
1461   struct rtx_iv iv;
1462   struct iv_to_split *ivts;
1463   bool ok;
1464
1465   /* For now we just split the basic induction variables.  Later this may be
1466      extended for example by selecting also addresses of memory references.  */
1467   set = single_set (insn);
1468   if (!set)
1469     return NULL;
1470
1471   dest = SET_DEST (set);
1472   if (!REG_P (dest))
1473     return NULL;
1474
1475   if (!biv_p (insn, dest))
1476     return NULL;
1477
1478   ok = iv_analyze_result (insn, dest, &iv);
1479
1480   /* This used to be an assert under the assumption that if biv_p returns
1481      true that iv_analyze_result must also return true.  However, that
1482      assumption is not strictly correct as evidenced by pr25569.
1483
1484      Returning NULL when iv_analyze_result returns false is safe and
1485      avoids the problems in pr25569 until the iv_analyze_* routines
1486      can be fixed, which is apparently hard and time consuming
1487      according to their author.  */
1488   if (! ok)
1489     return NULL;
1490
1491   if (iv.step == const0_rtx
1492       || iv.mode != iv.extend_mode)
1493     return NULL;
1494
1495   /* Record the insn to split.  */
1496   ivts = XNEW (struct iv_to_split);
1497   ivts->insn = insn;
1498   ivts->orig_var = dest;
1499   ivts->base_var = NULL_RTX;
1500   ivts->step = iv.step;
1501   ivts->next = NULL;
1502
1503   return ivts;
1504 }
1505
1506 /* Determines which of insns in LOOP can be optimized.
1507    Return a OPT_INFO struct with the relevant hash tables filled
1508    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1509    is undefined for the return value.  */
1510
1511 static struct opt_info *
1512 analyze_insns_in_loop (struct loop *loop)
1513 {
1514   basic_block *body, bb;
1515   unsigned i;
1516   struct opt_info *opt_info = XCNEW (struct opt_info);
1517   rtx_insn *insn;
1518   struct iv_to_split *ivts = NULL;
1519   struct var_to_expand *ves = NULL;
1520   iv_to_split **slot1;
1521   var_to_expand **slot2;
1522   vec<edge> edges = get_loop_exit_edges (loop);
1523   edge exit;
1524   bool can_apply = false;
1525
1526   iv_analysis_loop_init (loop);
1527
1528   body = get_loop_body (loop);
1529
1530   if (flag_split_ivs_in_unroller)
1531     {
1532       opt_info->insns_to_split
1533         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1534       opt_info->iv_to_split_head = NULL;
1535       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1536     }
1537
1538   /* Record the loop exit bb and loop preheader before the unrolling.  */
1539   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1540
1541   if (edges.length () == 1)
1542     {
1543       exit = edges[0];
1544       if (!(exit->flags & EDGE_COMPLEX))
1545         {
1546           opt_info->loop_exit = split_edge (exit);
1547           can_apply = true;
1548         }
1549     }
1550
1551   if (flag_variable_expansion_in_unroller
1552       && can_apply)
1553     {
1554       opt_info->insns_with_var_to_expand
1555         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1556       opt_info->var_to_expand_head = NULL;
1557       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1558     }
1559
1560   for (i = 0; i < loop->num_nodes; i++)
1561     {
1562       bb = body[i];
1563       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1564         continue;
1565
1566       FOR_BB_INSNS (bb, insn)
1567       {
1568         if (!INSN_P (insn))
1569           continue;
1570
1571         if (opt_info->insns_to_split)
1572           ivts = analyze_iv_to_split_insn (insn);
1573
1574         if (ivts)
1575           {
1576             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1577             gcc_assert (*slot1 == NULL);
1578             *slot1 = ivts;
1579             *opt_info->iv_to_split_tail = ivts;
1580             opt_info->iv_to_split_tail = &ivts->next;
1581             continue;
1582           }
1583
1584         if (opt_info->insns_with_var_to_expand)
1585           ves = analyze_insn_to_expand_var (loop, insn);
1586
1587         if (ves)
1588           {
1589             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1590             gcc_assert (*slot2 == NULL);
1591             *slot2 = ves;
1592             *opt_info->var_to_expand_tail = ves;
1593             opt_info->var_to_expand_tail = &ves->next;
1594           }
1595       }
1596     }
1597
1598   edges.release ();
1599   free (body);
1600   return opt_info;
1601 }
1602
1603 /* Called just before loop duplication.  Records start of duplicated area
1604    to OPT_INFO.  */
1605
1606 static void
1607 opt_info_start_duplication (struct opt_info *opt_info)
1608 {
1609   if (opt_info)
1610     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1611 }
1612
1613 /* Determine the number of iterations between initialization of the base
1614    variable and the current copy (N_COPY).  N_COPIES is the total number
1615    of newly created copies.  UNROLLING is true if we are unrolling
1616    (not peeling) the loop.  */
1617
1618 static unsigned
1619 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1620 {
1621   if (unrolling)
1622     {
1623       /* If we are unrolling, initialization is done in the original loop
1624          body (number 0).  */
1625       return n_copy;
1626     }
1627   else
1628     {
1629       /* If we are peeling, the copy in that the initialization occurs has
1630          number 1.  The original loop (number 0) is the last.  */
1631       if (n_copy)
1632         return n_copy - 1;
1633       else
1634         return n_copies;
1635     }
1636 }
1637
1638 /* Allocate basic variable for the induction variable chain.  */
1639
1640 static void
1641 allocate_basic_variable (struct iv_to_split *ivts)
1642 {
1643   rtx expr = SET_SRC (single_set (ivts->insn));
1644
1645   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1646 }
1647
1648 /* Insert initialization of basic variable of IVTS before INSN, taking
1649    the initial value from INSN.  */
1650
1651 static void
1652 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1653 {
1654   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1655   rtx_insn *seq;
1656
1657   start_sequence ();
1658   expr = force_operand (expr, ivts->base_var);
1659   if (expr != ivts->base_var)
1660     emit_move_insn (ivts->base_var, expr);
1661   seq = get_insns ();
1662   end_sequence ();
1663
1664   emit_insn_before (seq, insn);
1665 }
1666
1667 /* Replace the use of induction variable described in IVTS in INSN
1668    by base variable + DELTA * step.  */
1669
1670 static void
1671 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1672 {
1673   rtx expr, *loc, incr, var;
1674   rtx_insn *seq;
1675   machine_mode mode = GET_MODE (ivts->base_var);
1676   rtx src, dest, set;
1677
1678   /* Construct base + DELTA * step.  */
1679   if (!delta)
1680     expr = ivts->base_var;
1681   else
1682     {
1683       incr = simplify_gen_binary (MULT, mode,
1684                                   ivts->step, gen_int_mode (delta, mode));
1685       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1686                                   ivts->base_var, incr);
1687     }
1688
1689   /* Figure out where to do the replacement.  */
1690   loc = &SET_SRC (single_set (insn));
1691
1692   /* If we can make the replacement right away, we're done.  */
1693   if (validate_change (insn, loc, expr, 0))
1694     return;
1695
1696   /* Otherwise, force EXPR into a register and try again.  */
1697   start_sequence ();
1698   var = gen_reg_rtx (mode);
1699   expr = force_operand (expr, var);
1700   if (expr != var)
1701     emit_move_insn (var, expr);
1702   seq = get_insns ();
1703   end_sequence ();
1704   emit_insn_before (seq, insn);
1705
1706   if (validate_change (insn, loc, var, 0))
1707     return;
1708
1709   /* The last chance.  Try recreating the assignment in insn
1710      completely from scratch.  */
1711   set = single_set (insn);
1712   gcc_assert (set);
1713
1714   start_sequence ();
1715   *loc = var;
1716   src = copy_rtx (SET_SRC (set));
1717   dest = copy_rtx (SET_DEST (set));
1718   src = force_operand (src, dest);
1719   if (src != dest)
1720     emit_move_insn (dest, src);
1721   seq = get_insns ();
1722   end_sequence ();
1723
1724   emit_insn_before (seq, insn);
1725   delete_insn (insn);
1726 }
1727
1728
1729 /* Return one expansion of the accumulator recorded in struct VE.  */
1730
1731 static rtx
1732 get_expansion (struct var_to_expand *ve)
1733 {
1734   rtx reg;
1735
1736   if (ve->reuse_expansion == 0)
1737     reg = ve->reg;
1738   else
1739     reg = ve->var_expansions[ve->reuse_expansion - 1];
1740
1741   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1742     ve->reuse_expansion = 0;
1743   else
1744     ve->reuse_expansion++;
1745
1746   return reg;
1747 }
1748
1749
1750 /* Given INSN replace the uses of the accumulator recorded in VE
1751    with a new register.  */
1752
1753 static void
1754 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1755 {
1756   rtx new_reg, set;
1757   bool really_new_expansion = false;
1758
1759   set = single_set (insn);
1760   gcc_assert (set);
1761
1762   /* Generate a new register only if the expansion limit has not been
1763      reached.  Else reuse an already existing expansion.  */
1764   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1765     {
1766       really_new_expansion = true;
1767       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1768     }
1769   else
1770     new_reg = get_expansion (ve);
1771
1772   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1773   if (apply_change_group ())
1774     if (really_new_expansion)
1775       {
1776         ve->var_expansions.safe_push (new_reg);
1777         ve->expansion_count++;
1778       }
1779 }
1780
1781 /* Initialize the variable expansions in loop preheader.  PLACE is the
1782    loop-preheader basic block where the initialization of the
1783    expansions should take place.  The expansions are initialized with
1784    (-0) when the operation is plus or minus to honor sign zero.  This
1785    way we can prevent cases where the sign of the final result is
1786    effected by the sign of the expansion.  Here is an example to
1787    demonstrate this:
1788
1789    for (i = 0 ; i < n; i++)
1790      sum += something;
1791
1792    ==>
1793
1794    sum += something
1795    ....
1796    i = i+1;
1797    sum1 += something
1798    ....
1799    i = i+1
1800    sum2 += something;
1801    ....
1802
1803    When SUM is initialized with -zero and SOMETHING is also -zero; the
1804    final result of sum should be -zero thus the expansions sum1 and sum2
1805    should be initialized with -zero as well (otherwise we will get +zero
1806    as the final result).  */
1807
1808 static void
1809 insert_var_expansion_initialization (struct var_to_expand *ve,
1810                                      basic_block place)
1811 {
1812   rtx_insn *seq;
1813   rtx var, zero_init;
1814   unsigned i;
1815   machine_mode mode = GET_MODE (ve->reg);
1816   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1817
1818   if (ve->var_expansions.length () == 0)
1819     return;
1820
1821   start_sequence ();
1822   switch (ve->op)
1823     {
1824     case FMA:
1825       /* Note that we only accumulate FMA via the ADD operand.  */
1826     case PLUS:
1827     case MINUS:
1828       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1829         {
1830           if (honor_signed_zero_p)
1831             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1832           else
1833             zero_init = CONST0_RTX (mode);
1834           emit_move_insn (var, zero_init);
1835         }
1836       break;
1837
1838     case MULT:
1839       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1840         {
1841           zero_init = CONST1_RTX (GET_MODE (var));
1842           emit_move_insn (var, zero_init);
1843         }
1844       break;
1845
1846     default:
1847       gcc_unreachable ();
1848     }
1849
1850   seq = get_insns ();
1851   end_sequence ();
1852
1853   emit_insn_after (seq, BB_END (place));
1854 }
1855
1856 /* Combine the variable expansions at the loop exit.  PLACE is the
1857    loop exit basic block where the summation of the expansions should
1858    take place.  */
1859
1860 static void
1861 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1862 {
1863   rtx sum = ve->reg;
1864   rtx expr, var;
1865   rtx_insn *seq, *insn;
1866   unsigned i;
1867
1868   if (ve->var_expansions.length () == 0)
1869     return;
1870
1871   start_sequence ();
1872   switch (ve->op)
1873     {
1874     case FMA:
1875       /* Note that we only accumulate FMA via the ADD operand.  */
1876     case PLUS:
1877     case MINUS:
1878       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1879         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1880       break;
1881
1882     case MULT:
1883       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1884         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1885       break;
1886
1887     default:
1888       gcc_unreachable ();
1889     }
1890
1891   expr = force_operand (sum, ve->reg);
1892   if (expr != ve->reg)
1893     emit_move_insn (ve->reg, expr);
1894   seq = get_insns ();
1895   end_sequence ();
1896
1897   insn = BB_HEAD (place);
1898   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1899     insn = NEXT_INSN (insn);
1900
1901   emit_insn_after (seq, insn);
1902 }
1903
1904 /* Strip away REG_EQUAL notes for IVs we're splitting.
1905
1906    Updating REG_EQUAL notes for IVs we split is tricky: We
1907    cannot tell until after unrolling, DF-rescanning, and liveness
1908    updating, whether an EQ_USE is reached by the split IV while
1909    the IV reg is still live.  See PR55006.
1910
1911    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1912    because RTL loop-iv requires us to defer rescanning insns and
1913    any notes attached to them.  So resort to old techniques...  */
1914
1915 static void
1916 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1917 {
1918   struct iv_to_split *ivts;
1919   rtx note = find_reg_equal_equiv_note (insn);
1920   if (! note)
1921     return;
1922   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1923     if (reg_mentioned_p (ivts->orig_var, note))
1924       {
1925         remove_note (insn, note);
1926         return;
1927       }
1928 }
1929
1930 /* Apply loop optimizations in loop copies using the
1931    data which gathered during the unrolling.  Structure
1932    OPT_INFO record that data.
1933
1934    UNROLLING is true if we unrolled (not peeled) the loop.
1935    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1936    the loop (as it should happen in complete unrolling, but not in ordinary
1937    peeling of the loop).  */
1938
1939 static void
1940 apply_opt_in_copies (struct opt_info *opt_info,
1941                      unsigned n_copies, bool unrolling,
1942                      bool rewrite_original_loop)
1943 {
1944   unsigned i, delta;
1945   basic_block bb, orig_bb;
1946   rtx_insn *insn, *orig_insn, *next;
1947   struct iv_to_split ivts_templ, *ivts;
1948   struct var_to_expand ve_templ, *ves;
1949
1950   /* Sanity check -- we need to put initialization in the original loop
1951      body.  */
1952   gcc_assert (!unrolling || rewrite_original_loop);
1953
1954   /* Allocate the basic variables (i0).  */
1955   if (opt_info->insns_to_split)
1956     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1957       allocate_basic_variable (ivts);
1958
1959   for (i = opt_info->first_new_block;
1960        i < (unsigned) last_basic_block_for_fn (cfun);
1961        i++)
1962     {
1963       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1964       orig_bb = get_bb_original (bb);
1965
1966       /* bb->aux holds position in copy sequence initialized by
1967          duplicate_loop_to_header_edge.  */
1968       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1969                                         unrolling);
1970       bb->aux = 0;
1971       orig_insn = BB_HEAD (orig_bb);
1972       FOR_BB_INSNS_SAFE (bb, insn, next)
1973         {
1974           if (!INSN_P (insn)
1975               || (DEBUG_INSN_P (insn)
1976                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
1977             continue;
1978
1979           while (!INSN_P (orig_insn)
1980                  || (DEBUG_INSN_P (orig_insn)
1981                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
1982                          == LABEL_DECL)))
1983             orig_insn = NEXT_INSN (orig_insn);
1984
1985           ivts_templ.insn = orig_insn;
1986           ve_templ.insn = orig_insn;
1987
1988           /* Apply splitting iv optimization.  */
1989           if (opt_info->insns_to_split)
1990             {
1991               maybe_strip_eq_note_for_split_iv (opt_info, insn);
1992
1993               ivts = opt_info->insns_to_split->find (&ivts_templ);
1994
1995               if (ivts)
1996                 {
1997                   gcc_assert (GET_CODE (PATTERN (insn))
1998                               == GET_CODE (PATTERN (orig_insn)));
1999
2000                   if (!delta)
2001                     insert_base_initialization (ivts, insn);
2002                   split_iv (ivts, insn, delta);
2003                 }
2004             }
2005           /* Apply variable expansion optimization.  */
2006           if (unrolling && opt_info->insns_with_var_to_expand)
2007             {
2008               ves = (struct var_to_expand *)
2009                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2010               if (ves)
2011                 {
2012                   gcc_assert (GET_CODE (PATTERN (insn))
2013                               == GET_CODE (PATTERN (orig_insn)));
2014                   expand_var_during_unrolling (ves, insn);
2015                 }
2016             }
2017           orig_insn = NEXT_INSN (orig_insn);
2018         }
2019     }
2020
2021   if (!rewrite_original_loop)
2022     return;
2023
2024   /* Initialize the variable expansions in the loop preheader
2025      and take care of combining them at the loop exit.  */
2026   if (opt_info->insns_with_var_to_expand)
2027     {
2028       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2029         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2030       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2031         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2032     }
2033
2034   /* Rewrite also the original loop body.  Find them as originals of the blocks
2035      in the last copied iteration, i.e. those that have
2036      get_bb_copy (get_bb_original (bb)) == bb.  */
2037   for (i = opt_info->first_new_block;
2038        i < (unsigned) last_basic_block_for_fn (cfun);
2039        i++)
2040     {
2041       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2042       orig_bb = get_bb_original (bb);
2043       if (get_bb_copy (orig_bb) != bb)
2044         continue;
2045
2046       delta = determine_split_iv_delta (0, n_copies, unrolling);
2047       for (orig_insn = BB_HEAD (orig_bb);
2048            orig_insn != NEXT_INSN (BB_END (bb));
2049            orig_insn = next)
2050         {
2051           next = NEXT_INSN (orig_insn);
2052
2053           if (!INSN_P (orig_insn))
2054             continue;
2055
2056           ivts_templ.insn = orig_insn;
2057           if (opt_info->insns_to_split)
2058             {
2059               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2060
2061               ivts = (struct iv_to_split *)
2062                 opt_info->insns_to_split->find (&ivts_templ);
2063               if (ivts)
2064                 {
2065                   if (!delta)
2066                     insert_base_initialization (ivts, orig_insn);
2067                   split_iv (ivts, orig_insn, delta);
2068                   continue;
2069                 }
2070             }
2071
2072         }
2073     }
2074 }
2075
2076 /* Release OPT_INFO.  */
2077
2078 static void
2079 free_opt_info (struct opt_info *opt_info)
2080 {
2081   delete opt_info->insns_to_split;
2082   opt_info->insns_to_split = NULL;
2083   if (opt_info->insns_with_var_to_expand)
2084     {
2085       struct var_to_expand *ves;
2086
2087       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2088         ves->var_expansions.release ();
2089       delete opt_info->insns_with_var_to_expand;
2090       opt_info->insns_with_var_to_expand = NULL;
2091     }
2092   free (opt_info);
2093 }