gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "params.h"
  31 #include "expr.h"
  32 #include "hash-table.h"
  33 #include "recog.h"
  34 #include "target.h"
  35 #include "dumpfile.h"
  36
  37 /* This pass performs loop unrolling.  We only perform this
  38    optimization on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- unrolling of loops that roll constant times; this is almost always
  48       win, as we get rid of exit condition tests.
  49    -- unrolling of loops that roll number of times that we can compute
  50       in runtime; we also get rid of exit condition tests here, but there
  51       is the extra expense for calculating the number of iterations
  52    -- simple unrolling of remaining loops; this is performed only if we
  53       are asked to, as the gain is questionable in this case and often
  54       it may even slow down the code
  55    For more detailed descriptions of each of those, see comments at
  56    appropriate function below.
  57
  58    There is a lot of parameters (defined and described in params.def) that
  59    control how much we unroll.
  60
  61    ??? A great problem is that we don't have a good way how to determine
  62    how many times we should unroll the loop; the experiments I have made
  63    showed that this choice may affect performance in order of several %.
  64    */
  65
  66 /* Information about induction variables to split.  */
  67
  68 struct iv_to_split
  69 {
  70   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  71   rtx orig_var;         /* The variable (register) for the IV before split.  */
  72   rtx base_var;         /* The variable on that the values in the further
  73                            iterations are based.  */
  74   rtx step;             /* Step of the induction variable.  */
  75   struct iv_to_split *next; /* Next entry in walking order.  */
  76 };
  77
  78 /* Information about accumulators to expand.  */
  79
  80 struct var_to_expand
  81 {
  82   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  83   rtx reg;                         /* The accumulator which is expanded.  */
  84   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  85   struct var_to_expand *next;      /* Next entry in walking order.  */
  86   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  87                                       or multiplication.  */
  88   int expansion_count;             /* Count the number of expansions generated so far.  */
  89   int reuse_expansion;             /* The expansion we intend to reuse to expand
  90                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  91                                       the original accumulator.  Else use
  92                                       var_expansions[REUSE_EXPANSION - 1].  */
  93 };
  94
  95 /* Hashtable helper for iv_to_split.  */
  96
  97 struct iv_split_hasher : typed_free_remove <iv_to_split>
  98 {
  99   typedef iv_to_split value_type;
 100   typedef iv_to_split compare_type;
 101   static inline hashval_t hash (const value_type *);
 102   static inline bool equal (const value_type *, const compare_type *);
 103 };
 104
 105
 106 /* A hash function for information about insns to split.  */
 107
 108 inline hashval_t
 109 iv_split_hasher::hash (const value_type *ivts)
 110 {
 111   return (hashval_t) INSN_UID (ivts->insn);
 112 }
 113
 114 /* An equality functions for information about insns to split.  */
 115
 116 inline bool
 117 iv_split_hasher::equal (const value_type *i1, const compare_type *i2)
 118 {
 119   return i1->insn == i2->insn;
 120 }
 121
 122 /* Hashtable helper for iv_to_split.  */
 123
 124 struct var_expand_hasher : typed_free_remove <var_to_expand>
 125 {
 126   typedef var_to_expand value_type;
 127   typedef var_to_expand compare_type;
 128   static inline hashval_t hash (const value_type *);
 129   static inline bool equal (const value_type *, const compare_type *);
 130 };
 131
 132 /* Return a hash for VES.  */
 133
 134 inline hashval_t
 135 var_expand_hasher::hash (const value_type *ves)
 136 {
 137   return (hashval_t) INSN_UID (ves->insn);
 138 }
 139
 140 /* Return true if I1 and I2 refer to the same instruction.  */
 141
 142 inline bool
 143 var_expand_hasher::equal (const value_type *i1, const compare_type *i2)
 144 {
 145   return i1->insn == i2->insn;
 146 }
 147
 148 /* Information about optimization applied in
 149    the unrolled loop.  */
 150
 151 struct opt_info
 152 {
 153   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 154                                                   split.  */
 155   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 156   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 157   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 158                                         insns with accumulators to expand.  */
 159   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 160   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 161   unsigned first_new_block;        /* The first basic block that was
 162                                       duplicated.  */
 163   basic_block loop_exit;           /* The loop exit basic block.  */
 164   basic_block loop_preheader;      /* The loop preheader basic block.  */
 165 };
 166
 167 static void decide_unroll_stupid (struct loop *, int);
 168 static void decide_unroll_constant_iterations (struct loop *, int);
 169 static void decide_unroll_runtime_iterations (struct loop *, int);
 170 static void unroll_loop_stupid (struct loop *);
 171 static void decide_unrolling (int);
 172 static void unroll_loop_constant_iterations (struct loop *);
 173 static void unroll_loop_runtime_iterations (struct loop *);
 174 static struct opt_info *analyze_insns_in_loop (struct loop *);
 175 static void opt_info_start_duplication (struct opt_info *);
 176 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 177 static void free_opt_info (struct opt_info *);
 178 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 179 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 180 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 181 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 182 static void insert_var_expansion_initialization (struct var_to_expand *,
 183                                                  basic_block);
 184 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 185                                              basic_block);
 186 static rtx get_expansion (struct var_to_expand *);
 187
 188 /* Emit a message summarizing the unroll that will be
 189    performed for LOOP, along with the loop's location LOCUS, if
 190    appropriate given the dump or -fopt-info settings.  */
 191
 192 static void
 193 report_unroll (struct loop *loop, location_t locus)
 194 {
 195   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 196
 197   if (loop->lpt_decision.decision == LPT_NONE)
 198     return;
 199
 200   if (!dump_enabled_p ())
 201     return;
 202
 203   dump_printf_loc (report_flags, locus,
 204                    "loop unrolled %d times",
 205                    loop->lpt_decision.times);
 206   if (profile_info)
 207     dump_printf (report_flags,
 208                  " (header execution count %d)",
 209                  (int)loop->header->count);
 210
 211   dump_printf (report_flags, "\n");
 212 }
 213
 214 /* Decide whether unroll loops and how much.  */
 215 static void
 216 decide_unrolling (int flags)
 217 {
 218   struct loop *loop;
 219
 220   /* Scan the loops, inner ones first.  */
 221   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 222     {
 223       loop->lpt_decision.decision = LPT_NONE;
 224       location_t locus = get_loop_location (loop);
 225
 226       if (dump_enabled_p ())
 227         dump_printf_loc (TDF_RTL, locus,
 228                          ";; *** Considering loop %d at BB %d for "
 229                          "unrolling ***\n",
 230                          loop->num, loop->header->index);
 231
 232       /* Do not peel cold areas.  */
 233       if (optimize_loop_for_size_p (loop))
 234         {
 235           if (dump_file)
 236             fprintf (dump_file, ";; Not considering loop, cold area\n");
 237           continue;
 238         }
 239
 240       /* Can the loop be manipulated?  */
 241       if (!can_duplicate_loop_p (loop))
 242         {
 243           if (dump_file)
 244             fprintf (dump_file,
 245                      ";; Not considering loop, cannot duplicate\n");
 246           continue;
 247         }
 248
 249       /* Skip non-innermost loops.  */
 250       if (loop->inner)
 251         {
 252           if (dump_file)
 253             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 254           continue;
 255         }
 256
 257       loop->ninsns = num_loop_insns (loop);
 258       loop->av_ninsns = average_num_loop_insns (loop);
 259
 260       /* Try transformations one by one in decreasing order of
 261          priority.  */
 262
 263       decide_unroll_constant_iterations (loop, flags);
 264       if (loop->lpt_decision.decision == LPT_NONE)
 265         decide_unroll_runtime_iterations (loop, flags);
 266       if (loop->lpt_decision.decision == LPT_NONE)
 267         decide_unroll_stupid (loop, flags);
 268
 269       report_unroll (loop, locus);
 270     }
 271 }
 272
 273 /* Unroll LOOPS.  */
 274 void
 275 unroll_loops (int flags)
 276 {
 277   struct loop *loop;
 278   bool changed = false;
 279
 280   /* Now decide rest of unrolling.  */
 281   decide_unrolling (flags);
 282
 283   /* Scan the loops, inner ones first.  */
 284   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 285     {
 286       /* And perform the appropriate transformations.  */
 287       switch (loop->lpt_decision.decision)
 288         {
 289         case LPT_UNROLL_CONSTANT:
 290           unroll_loop_constant_iterations (loop);
 291           changed = true;
 292           break;
 293         case LPT_UNROLL_RUNTIME:
 294           unroll_loop_runtime_iterations (loop);
 295           changed = true;
 296           break;
 297         case LPT_UNROLL_STUPID:
 298           unroll_loop_stupid (loop);
 299           changed = true;
 300           break;
 301         case LPT_NONE:
 302           break;
 303         default:
 304           gcc_unreachable ();
 305         }
 306     }
 307
 308     if (changed)
 309       {
 310         calculate_dominance_info (CDI_DOMINATORS);
 311         fix_loop_structure (NULL);
 312       }
 313
 314   iv_analysis_done ();
 315 }
 316
 317 /* Check whether exit of the LOOP is at the end of loop body.  */
 318
 319 static bool
 320 loop_exit_at_end_p (struct loop *loop)
 321 {
 322   struct niter_desc *desc = get_simple_loop_desc (loop);
 323   rtx_insn *insn;
 324
 325   /* We should never have conditional in latch block.  */
 326   gcc_assert (desc->in_edge->dest != loop->header);
 327
 328   if (desc->in_edge->dest != loop->latch)
 329     return false;
 330
 331   /* Check that the latch is empty.  */
 332   FOR_BB_INSNS (loop->latch, insn)
 333     {
 334       if (INSN_P (insn) && active_insn_p (insn))
 335         return false;
 336     }
 337
 338   return true;
 339 }
 340
 341 /* Decide whether to unroll LOOP iterating constant number of times
 342    and how much.  */
 343
 344 static void
 345 decide_unroll_constant_iterations (struct loop *loop, int flags)
 346 {
 347   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 348   struct niter_desc *desc;
 349   widest_int iterations;
 350
 351   if (!(flags & UAP_UNROLL))
 352     {
 353       /* We were not asked to, just return back silently.  */
 354       return;
 355     }
 356
 357   if (dump_file)
 358     fprintf (dump_file,
 359              "\n;; Considering unrolling loop with constant "
 360              "number of iterations\n");
 361
 362   /* nunroll = total number of copies of the original loop body in
 363      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 364   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 365   nunroll_by_av
 366     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 367   if (nunroll > nunroll_by_av)
 368     nunroll = nunroll_by_av;
 369   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 370     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 371
 372   if (targetm.loop_unroll_adjust)
 373     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 374
 375   /* Skip big loops.  */
 376   if (nunroll <= 1)
 377     {
 378       if (dump_file)
 379         fprintf (dump_file, ";; Not considering loop, is too big\n");
 380       return;
 381     }
 382
 383   /* Check for simple loops.  */
 384   desc = get_simple_loop_desc (loop);
 385
 386   /* Check number of iterations.  */
 387   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 388     {
 389       if (dump_file)
 390         fprintf (dump_file,
 391                  ";; Unable to prove that the loop iterates constant times\n");
 392       return;
 393     }
 394
 395   /* Check whether the loop rolls enough to consider.
 396      Consult also loop bounds and profile; in the case the loop has more
 397      than one exit it may well loop less than determined maximal number
 398      of iterations.  */
 399   if (desc->niter < 2 * nunroll
 400       || ((get_estimated_loop_iterations (loop, &iterations)
 401            || get_max_loop_iterations (loop, &iterations))
 402           && wi::ltu_p (iterations, 2 * nunroll)))
 403     {
 404       if (dump_file)
 405         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 406       return;
 407     }
 408
 409   /* Success; now compute number of iterations to unroll.  We alter
 410      nunroll so that as few as possible copies of loop body are
 411      necessary, while still not decreasing the number of unrollings
 412      too much (at most by 1).  */
 413   best_copies = 2 * nunroll + 10;
 414
 415   i = 2 * nunroll + 2;
 416   if (i - 1 >= desc->niter)
 417     i = desc->niter - 2;
 418
 419   for (; i >= nunroll - 1; i--)
 420     {
 421       unsigned exit_mod = desc->niter % (i + 1);
 422
 423       if (!loop_exit_at_end_p (loop))
 424         n_copies = exit_mod + i + 1;
 425       else if (exit_mod != (unsigned) i
 426                || desc->noloop_assumptions != NULL_RTX)
 427         n_copies = exit_mod + i + 2;
 428       else
 429         n_copies = i + 1;
 430
 431       if (n_copies < best_copies)
 432         {
 433           best_copies = n_copies;
 434           best_unroll = i;
 435         }
 436     }
 437
 438   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 439   loop->lpt_decision.times = best_unroll;
 440 }
 441
 442 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 443    The transformation does this:
 444
 445    for (i = 0; i < 102; i++)
 446      body;
 447
 448    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 449
 450    i = 0;
 451    body; i++;
 452    body; i++;
 453    while (i < 102)
 454      {
 455        body; i++;
 456        body; i++;
 457        body; i++;
 458        body; i++;
 459      }
 460   */
 461 static void
 462 unroll_loop_constant_iterations (struct loop *loop)
 463 {
 464   unsigned HOST_WIDE_INT niter;
 465   unsigned exit_mod;
 466   sbitmap wont_exit;
 467   unsigned i;
 468   edge e;
 469   unsigned max_unroll = loop->lpt_decision.times;
 470   struct niter_desc *desc = get_simple_loop_desc (loop);
 471   bool exit_at_end = loop_exit_at_end_p (loop);
 472   struct opt_info *opt_info = NULL;
 473   bool ok;
 474
 475   niter = desc->niter;
 476
 477   /* Should not get here (such loop should be peeled instead).  */
 478   gcc_assert (niter > max_unroll + 1);
 479
 480   exit_mod = niter % (max_unroll + 1);
 481
 482   wont_exit = sbitmap_alloc (max_unroll + 1);
 483   bitmap_ones (wont_exit);
 484
 485   auto_vec<edge> remove_edges;
 486   if (flag_split_ivs_in_unroller
 487       || flag_variable_expansion_in_unroller)
 488     opt_info = analyze_insns_in_loop (loop);
 489
 490   if (!exit_at_end)
 491     {
 492       /* The exit is not at the end of the loop; leave exit test
 493          in the first copy, so that the loops that start with test
 494          of exit condition have continuous body after unrolling.  */
 495
 496       if (dump_file)
 497         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 498
 499       /* Peel exit_mod iterations.  */
 500       bitmap_clear_bit (wont_exit, 0);
 501       if (desc->noloop_assumptions)
 502         bitmap_clear_bit (wont_exit, 1);
 503
 504       if (exit_mod)
 505         {
 506           opt_info_start_duplication (opt_info);
 507           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 508                                               exit_mod,
 509                                               wont_exit, desc->out_edge,
 510                                               &remove_edges,
 511                                               DLTHE_FLAG_UPDATE_FREQ
 512                                               | (opt_info && exit_mod > 1
 513                                                  ? DLTHE_RECORD_COPY_NUMBER
 514                                                    : 0));
 515           gcc_assert (ok);
 516
 517           if (opt_info && exit_mod > 1)
 518             apply_opt_in_copies (opt_info, exit_mod, false, false);
 519
 520           desc->noloop_assumptions = NULL_RTX;
 521           desc->niter -= exit_mod;
 522           loop->nb_iterations_upper_bound -= exit_mod;
 523           if (loop->any_estimate
 524               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 525             loop->nb_iterations_estimate -= exit_mod;
 526           else
 527             loop->any_estimate = false;
 528         }
 529
 530       bitmap_set_bit (wont_exit, 1);
 531     }
 532   else
 533     {
 534       /* Leave exit test in last copy, for the same reason as above if
 535          the loop tests the condition at the end of loop body.  */
 536
 537       if (dump_file)
 538         fprintf (dump_file, ";; Condition at end of loop.\n");
 539
 540       /* We know that niter >= max_unroll + 2; so we do not need to care of
 541          case when we would exit before reaching the loop.  So just peel
 542          exit_mod + 1 iterations.  */
 543       if (exit_mod != max_unroll
 544           || desc->noloop_assumptions)
 545         {
 546           bitmap_clear_bit (wont_exit, 0);
 547           if (desc->noloop_assumptions)
 548             bitmap_clear_bit (wont_exit, 1);
 549
 550           opt_info_start_duplication (opt_info);
 551           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 552                                               exit_mod + 1,
 553                                               wont_exit, desc->out_edge,
 554                                               &remove_edges,
 555                                               DLTHE_FLAG_UPDATE_FREQ
 556                                               | (opt_info && exit_mod > 0
 557                                                  ? DLTHE_RECORD_COPY_NUMBER
 558                                                    : 0));
 559           gcc_assert (ok);
 560
 561           if (opt_info && exit_mod > 0)
 562             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 563
 564           desc->niter -= exit_mod + 1;
 565           loop->nb_iterations_upper_bound -= exit_mod + 1;
 566           if (loop->any_estimate
 567               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 568             loop->nb_iterations_estimate -= exit_mod + 1;
 569           else
 570             loop->any_estimate = false;
 571           desc->noloop_assumptions = NULL_RTX;
 572
 573           bitmap_set_bit (wont_exit, 0);
 574           bitmap_set_bit (wont_exit, 1);
 575         }
 576
 577       bitmap_clear_bit (wont_exit, max_unroll);
 578     }
 579
 580   /* Now unroll the loop.  */
 581
 582   opt_info_start_duplication (opt_info);
 583   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 584                                       max_unroll,
 585                                       wont_exit, desc->out_edge,
 586                                       &remove_edges,
 587                                       DLTHE_FLAG_UPDATE_FREQ
 588                                       | (opt_info
 589                                          ? DLTHE_RECORD_COPY_NUMBER
 590                                            : 0));
 591   gcc_assert (ok);
 592
 593   if (opt_info)
 594     {
 595       apply_opt_in_copies (opt_info, max_unroll, true, true);
 596       free_opt_info (opt_info);
 597     }
 598
 599   free (wont_exit);
 600
 601   if (exit_at_end)
 602     {
 603       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 604       /* Find a new in and out edge; they are in the last copy we have made.  */
 605
 606       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 607         {
 608           desc->out_edge = EDGE_SUCC (exit_block, 0);
 609           desc->in_edge = EDGE_SUCC (exit_block, 1);
 610         }
 611       else
 612         {
 613           desc->out_edge = EDGE_SUCC (exit_block, 1);
 614           desc->in_edge = EDGE_SUCC (exit_block, 0);
 615         }
 616     }
 617
 618   desc->niter /= max_unroll + 1;
 619   loop->nb_iterations_upper_bound
 620     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 621   if (loop->any_estimate)
 622     loop->nb_iterations_estimate
 623       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 624   desc->niter_expr = GEN_INT (desc->niter);
 625
 626   /* Remove the edges.  */
 627   FOR_EACH_VEC_ELT (remove_edges, i, e)
 628     remove_path (e);
 629
 630   if (dump_file)
 631     fprintf (dump_file,
 632              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 633              max_unroll, num_loop_insns (loop));
 634 }
 635
 636 /* Decide whether to unroll LOOP iterating runtime computable number of times
 637    and how much.  */
 638 static void
 639 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 640 {
 641   unsigned nunroll, nunroll_by_av, i;
 642   struct niter_desc *desc;
 643   widest_int iterations;
 644
 645   if (!(flags & UAP_UNROLL))
 646     {
 647       /* We were not asked to, just return back silently.  */
 648       return;
 649     }
 650
 651   if (dump_file)
 652     fprintf (dump_file,
 653              "\n;; Considering unrolling loop with runtime "
 654              "computable number of iterations\n");
 655
 656   /* nunroll = total number of copies of the original loop body in
 657      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 658   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 659   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 660   if (nunroll > nunroll_by_av)
 661     nunroll = nunroll_by_av;
 662   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 663     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 664
 665   if (targetm.loop_unroll_adjust)
 666     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 667
 668   /* Skip big loops.  */
 669   if (nunroll <= 1)
 670     {
 671       if (dump_file)
 672         fprintf (dump_file, ";; Not considering loop, is too big\n");
 673       return;
 674     }
 675
 676   /* Check for simple loops.  */
 677   desc = get_simple_loop_desc (loop);
 678
 679   /* Check simpleness.  */
 680   if (!desc->simple_p || desc->assumptions)
 681     {
 682       if (dump_file)
 683         fprintf (dump_file,
 684                  ";; Unable to prove that the number of iterations "
 685                  "can be counted in runtime\n");
 686       return;
 687     }
 688
 689   if (desc->const_iter)
 690     {
 691       if (dump_file)
 692         fprintf (dump_file, ";; Loop iterates constant times\n");
 693       return;
 694     }
 695
 696   /* Check whether the loop rolls.  */
 697   if ((get_estimated_loop_iterations (loop, &iterations)
 698        || get_max_loop_iterations (loop, &iterations))
 699       && wi::ltu_p (iterations, 2 * nunroll))
 700     {
 701       if (dump_file)
 702         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 703       return;
 704     }
 705
 706   /* Success; now force nunroll to be power of 2, as we are unable to
 707      cope with overflows in computation of number of iterations.  */
 708   for (i = 1; 2 * i <= nunroll; i *= 2)
 709     continue;
 710
 711   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 712   loop->lpt_decision.times = i - 1;
 713 }
 714
 715 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 716    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 717    and NULL is returned instead.  */
 718
 719 basic_block
 720 split_edge_and_insert (edge e, rtx_insn *insns)
 721 {
 722   basic_block bb;
 723
 724   if (!insns)
 725     return NULL;
 726   bb = split_edge (e);
 727   emit_insn_after (insns, BB_END (bb));
 728
 729   /* ??? We used to assume that INSNS can contain control flow insns, and
 730      that we had to try to find sub basic blocks in BB to maintain a valid
 731      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 732      and call break_superblocks when going out of cfglayout mode.  But it
 733      turns out that this never happens; and that if it does ever happen,
 734      the verify_flow_info at the end of the RTL loop passes would fail.
 735
 736      There are two reasons why we expected we could have control flow insns
 737      in INSNS.  The first is when a comparison has to be done in parts, and
 738      the second is when the number of iterations is computed for loops with
 739      the number of iterations known at runtime.  In both cases, test cases
 740      to get control flow in INSNS appear to be impossible to construct:
 741
 742       * If do_compare_rtx_and_jump needs several branches to do comparison
 743         in a mode that needs comparison by parts, we cannot analyze the
 744         number of iterations of the loop, and we never get to unrolling it.
 745
 746       * The code in expand_divmod that was suspected to cause creation of
 747         branching code seems to be only accessed for signed division.  The
 748         divisions used by # of iterations analysis are always unsigned.
 749         Problems might arise on architectures that emits branching code
 750         for some operations that may appear in the unroller (especially
 751         for division), but we have no such architectures.
 752
 753      Considering all this, it was decided that we should for now assume
 754      that INSNS can in theory contain control flow insns, but in practice
 755      it never does.  So we don't handle the theoretical case, and should
 756      a real failure ever show up, we have a pretty good clue for how to
 757      fix it.  */
 758
 759   return bb;
 760 }
 761
 762 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 763    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 764    in order to create a jump.  */
 765
 766 static rtx_insn *
 767 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
 768                       rtx_insn *cinsn)
 769 {
 770   rtx_insn *seq, *jump;
 771   rtx cond;
 772   enum machine_mode mode;
 773
 774   mode = GET_MODE (op0);
 775   if (mode == VOIDmode)
 776     mode = GET_MODE (op1);
 777
 778   start_sequence ();
 779   if (GET_MODE_CLASS (mode) == MODE_CC)
 780     {
 781       /* A hack -- there seems to be no easy generic way how to make a
 782          conditional jump from a ccmode comparison.  */
 783       gcc_assert (cinsn);
 784       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 785       gcc_assert (GET_CODE (cond) == comp);
 786       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 787       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 788       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 789       jump = get_last_insn ();
 790       gcc_assert (JUMP_P (jump));
 791       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 792       LABEL_NUSES (JUMP_LABEL (jump))++;
 793       redirect_jump (jump, label, 0);
 794     }
 795   else
 796     {
 797       gcc_assert (!cinsn);
 798
 799       op0 = force_operand (op0, NULL_RTX);
 800       op1 = force_operand (op1, NULL_RTX);
 801       do_compare_rtx_and_jump (op0, op1, comp, 0,
 802                                mode, NULL_RTX, NULL_RTX, label, -1);
 803       jump = get_last_insn ();
 804       gcc_assert (JUMP_P (jump));
 805       JUMP_LABEL (jump) = label;
 806       LABEL_NUSES (label)++;
 807     }
 808   add_int_reg_note (jump, REG_BR_PROB, prob);
 809
 810   seq = get_insns ();
 811   end_sequence ();
 812
 813   return seq;
 814 }
 815
 816 /* Unroll LOOP for which we are able to count number of iterations in runtime
 817    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 818    extra care for case n < 0):
 819
 820    for (i = 0; i < n; i++)
 821      body;
 822
 823    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 824
 825    i = 0;
 826    mod = n % 4;
 827
 828    switch (mod)
 829      {
 830        case 3:
 831          body; i++;
 832        case 2:
 833          body; i++;
 834        case 1:
 835          body; i++;
 836        case 0: ;
 837      }
 838
 839    while (i < n)
 840      {
 841        body; i++;
 842        body; i++;
 843        body; i++;
 844        body; i++;
 845      }
 846    */
 847 static void
 848 unroll_loop_runtime_iterations (struct loop *loop)
 849 {
 850   rtx old_niter, niter, tmp;
 851   rtx_insn *init_code, *branch_code;
 852   unsigned i, j, p;
 853   basic_block preheader, *body, swtch, ezc_swtch;
 854   sbitmap wont_exit;
 855   int may_exit_copy;
 856   unsigned n_peel;
 857   edge e;
 858   bool extra_zero_check, last_may_exit;
 859   unsigned max_unroll = loop->lpt_decision.times;
 860   struct niter_desc *desc = get_simple_loop_desc (loop);
 861   bool exit_at_end = loop_exit_at_end_p (loop);
 862   struct opt_info *opt_info = NULL;
 863   bool ok;
 864
 865   if (flag_split_ivs_in_unroller
 866       || flag_variable_expansion_in_unroller)
 867     opt_info = analyze_insns_in_loop (loop);
 868
 869   /* Remember blocks whose dominators will have to be updated.  */
 870   auto_vec<basic_block> dom_bbs;
 871
 872   body = get_loop_body (loop);
 873   for (i = 0; i < loop->num_nodes; i++)
 874     {
 875       vec<basic_block> ldom;
 876       basic_block bb;
 877
 878       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 879       FOR_EACH_VEC_ELT (ldom, j, bb)
 880         if (!flow_bb_inside_loop_p (loop, bb))
 881           dom_bbs.safe_push (bb);
 882
 883       ldom.release ();
 884     }
 885   free (body);
 886
 887   if (!exit_at_end)
 888     {
 889       /* Leave exit in first copy (for explanation why see comment in
 890          unroll_loop_constant_iterations).  */
 891       may_exit_copy = 0;
 892       n_peel = max_unroll - 1;
 893       extra_zero_check = true;
 894       last_may_exit = false;
 895     }
 896   else
 897     {
 898       /* Leave exit in last copy (for explanation why see comment in
 899          unroll_loop_constant_iterations).  */
 900       may_exit_copy = max_unroll;
 901       n_peel = max_unroll;
 902       extra_zero_check = false;
 903       last_may_exit = true;
 904     }
 905
 906   /* Get expression for number of iterations.  */
 907   start_sequence ();
 908   old_niter = niter = gen_reg_rtx (desc->mode);
 909   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 910   if (tmp != niter)
 911     emit_move_insn (niter, tmp);
 912
 913   /* Count modulo by ANDing it with max_unroll; we use the fact that
 914      the number of unrollings is a power of two, and thus this is correct
 915      even if there is overflow in the computation.  */
 916   niter = expand_simple_binop (desc->mode, AND,
 917                                niter, gen_int_mode (max_unroll, desc->mode),
 918                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 919
 920   init_code = get_insns ();
 921   end_sequence ();
 922   unshare_all_rtl_in_chain (init_code);
 923
 924   /* Precondition the loop.  */
 925   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 926
 927   auto_vec<edge> remove_edges;
 928
 929   wont_exit = sbitmap_alloc (max_unroll + 2);
 930
 931   /* Peel the first copy of loop body (almost always we must leave exit test
 932      here; the only exception is when we have extra zero check and the number
 933      of iterations is reliable.  Also record the place of (possible) extra
 934      zero check.  */
 935   bitmap_clear (wont_exit);
 936   if (extra_zero_check
 937       && !desc->noloop_assumptions)
 938     bitmap_set_bit (wont_exit, 1);
 939   ezc_swtch = loop_preheader_edge (loop)->src;
 940   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 941                                       1, wont_exit, desc->out_edge,
 942                                       &remove_edges,
 943                                       DLTHE_FLAG_UPDATE_FREQ);
 944   gcc_assert (ok);
 945
 946   /* Record the place where switch will be built for preconditioning.  */
 947   swtch = split_edge (loop_preheader_edge (loop));
 948
 949   for (i = 0; i < n_peel; i++)
 950     {
 951       /* Peel the copy.  */
 952       bitmap_clear (wont_exit);
 953       if (i != n_peel - 1 || !last_may_exit)
 954         bitmap_set_bit (wont_exit, 1);
 955       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 956                                           1, wont_exit, desc->out_edge,
 957                                           &remove_edges,
 958                                           DLTHE_FLAG_UPDATE_FREQ);
 959       gcc_assert (ok);
 960
 961       /* Create item for switch.  */
 962       j = n_peel - i - (extra_zero_check ? 0 : 1);
 963       p = REG_BR_PROB_BASE / (i + 2);
 964
 965       preheader = split_edge (loop_preheader_edge (loop));
 966       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
 967                                           block_label (preheader), p,
 968                                           NULL);
 969
 970       /* We rely on the fact that the compare and jump cannot be optimized out,
 971          and hence the cfg we create is correct.  */
 972       gcc_assert (branch_code != NULL_RTX);
 973
 974       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
 975       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
 976       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
 977       e = make_edge (swtch, preheader,
 978                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
 979       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
 980       e->probability = p;
 981     }
 982
 983   if (extra_zero_check)
 984     {
 985       /* Add branch for zero iterations.  */
 986       p = REG_BR_PROB_BASE / (max_unroll + 1);
 987       swtch = ezc_swtch;
 988       preheader = split_edge (loop_preheader_edge (loop));
 989       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
 990                                           block_label (preheader), p,
 991                                           NULL);
 992       gcc_assert (branch_code != NULL_RTX);
 993
 994       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
 995       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
 996       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
 997       e = make_edge (swtch, preheader,
 998                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
 999       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1000       e->probability = p;
1001     }
1002
1003   /* Recount dominators for outer blocks.  */
1004   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1005
1006   /* And unroll loop.  */
1007
1008   bitmap_ones (wont_exit);
1009   bitmap_clear_bit (wont_exit, may_exit_copy);
1010   opt_info_start_duplication (opt_info);
1011
1012   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1013                                       max_unroll,
1014                                       wont_exit, desc->out_edge,
1015                                       &remove_edges,
1016                                       DLTHE_FLAG_UPDATE_FREQ
1017                                       | (opt_info
1018                                          ? DLTHE_RECORD_COPY_NUMBER
1019                                            : 0));
1020   gcc_assert (ok);
1021
1022   if (opt_info)
1023     {
1024       apply_opt_in_copies (opt_info, max_unroll, true, true);
1025       free_opt_info (opt_info);
1026     }
1027
1028   free (wont_exit);
1029
1030   if (exit_at_end)
1031     {
1032       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1033       /* Find a new in and out edge; they are in the last copy we have
1034          made.  */
1035
1036       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1037         {
1038           desc->out_edge = EDGE_SUCC (exit_block, 0);
1039           desc->in_edge = EDGE_SUCC (exit_block, 1);
1040         }
1041       else
1042         {
1043           desc->out_edge = EDGE_SUCC (exit_block, 1);
1044           desc->in_edge = EDGE_SUCC (exit_block, 0);
1045         }
1046     }
1047
1048   /* Remove the edges.  */
1049   FOR_EACH_VEC_ELT (remove_edges, i, e)
1050     remove_path (e);
1051
1052   /* We must be careful when updating the number of iterations due to
1053      preconditioning and the fact that the value must be valid at entry
1054      of the loop.  After passing through the above code, we see that
1055      the correct new number of iterations is this:  */
1056   gcc_assert (!desc->const_iter);
1057   desc->niter_expr =
1058     simplify_gen_binary (UDIV, desc->mode, old_niter,
1059                          gen_int_mode (max_unroll + 1, desc->mode));
1060   loop->nb_iterations_upper_bound
1061     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1062   if (loop->any_estimate)
1063     loop->nb_iterations_estimate
1064       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1065   if (exit_at_end)
1066     {
1067       desc->niter_expr =
1068         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1069       desc->noloop_assumptions = NULL_RTX;
1070       --loop->nb_iterations_upper_bound;
1071       if (loop->any_estimate
1072           && loop->nb_iterations_estimate != 0)
1073         --loop->nb_iterations_estimate;
1074       else
1075         loop->any_estimate = false;
1076     }
1077
1078   if (dump_file)
1079     fprintf (dump_file,
1080              ";; Unrolled loop %d times, counting # of iterations "
1081              "in runtime, %i insns\n",
1082              max_unroll, num_loop_insns (loop));
1083 }
1084
1085 /* Decide whether to unroll LOOP stupidly and how much.  */
1086 static void
1087 decide_unroll_stupid (struct loop *loop, int flags)
1088 {
1089   unsigned nunroll, nunroll_by_av, i;
1090   struct niter_desc *desc;
1091   widest_int iterations;
1092
1093   if (!(flags & UAP_UNROLL_ALL))
1094     {
1095       /* We were not asked to, just return back silently.  */
1096       return;
1097     }
1098
1099   if (dump_file)
1100     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1101
1102   /* nunroll = total number of copies of the original loop body in
1103      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1104   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1105   nunroll_by_av
1106     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1107   if (nunroll > nunroll_by_av)
1108     nunroll = nunroll_by_av;
1109   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1110     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1111
1112   if (targetm.loop_unroll_adjust)
1113     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1114
1115   /* Skip big loops.  */
1116   if (nunroll <= 1)
1117     {
1118       if (dump_file)
1119         fprintf (dump_file, ";; Not considering loop, is too big\n");
1120       return;
1121     }
1122
1123   /* Check for simple loops.  */
1124   desc = get_simple_loop_desc (loop);
1125
1126   /* Check simpleness.  */
1127   if (desc->simple_p && !desc->assumptions)
1128     {
1129       if (dump_file)
1130         fprintf (dump_file, ";; The loop is simple\n");
1131       return;
1132     }
1133
1134   /* Do not unroll loops with branches inside -- it increases number
1135      of mispredicts.
1136      TODO: this heuristic needs tunning; call inside the loop body
1137      is also relatively good reason to not unroll.  */
1138   if (num_loop_branches (loop) > 1)
1139     {
1140       if (dump_file)
1141         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1142       return;
1143     }
1144
1145   /* Check whether the loop rolls.  */
1146   if ((get_estimated_loop_iterations (loop, &iterations)
1147        || get_max_loop_iterations (loop, &iterations))
1148       && wi::ltu_p (iterations, 2 * nunroll))
1149     {
1150       if (dump_file)
1151         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1152       return;
1153     }
1154
1155   /* Success.  Now force nunroll to be power of 2, as it seems that this
1156      improves results (partially because of better alignments, partially
1157      because of some dark magic).  */
1158   for (i = 1; 2 * i <= nunroll; i *= 2)
1159     continue;
1160
1161   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1162   loop->lpt_decision.times = i - 1;
1163 }
1164
1165 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1166
1167    while (cond)
1168      body;
1169
1170    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1171
1172    while (cond)
1173      {
1174        body;
1175        if (!cond) break;
1176        body;
1177        if (!cond) break;
1178        body;
1179        if (!cond) break;
1180        body;
1181      }
1182    */
1183 static void
1184 unroll_loop_stupid (struct loop *loop)
1185 {
1186   sbitmap wont_exit;
1187   unsigned nunroll = loop->lpt_decision.times;
1188   struct niter_desc *desc = get_simple_loop_desc (loop);
1189   struct opt_info *opt_info = NULL;
1190   bool ok;
1191
1192   if (flag_split_ivs_in_unroller
1193       || flag_variable_expansion_in_unroller)
1194     opt_info = analyze_insns_in_loop (loop);
1195
1196
1197   wont_exit = sbitmap_alloc (nunroll + 1);
1198   bitmap_clear (wont_exit);
1199   opt_info_start_duplication (opt_info);
1200
1201   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1202                                       nunroll, wont_exit,
1203                                       NULL, NULL,
1204                                       DLTHE_FLAG_UPDATE_FREQ
1205                                       | (opt_info
1206                                          ? DLTHE_RECORD_COPY_NUMBER
1207                                            : 0));
1208   gcc_assert (ok);
1209
1210   if (opt_info)
1211     {
1212       apply_opt_in_copies (opt_info, nunroll, true, true);
1213       free_opt_info (opt_info);
1214     }
1215
1216   free (wont_exit);
1217
1218   if (desc->simple_p)
1219     {
1220       /* We indeed may get here provided that there are nontrivial assumptions
1221          for a loop to be really simple.  We could update the counts, but the
1222          problem is that we are unable to decide which exit will be taken
1223          (not really true in case the number of iterations is constant,
1224          but no one will do anything with this information, so we do not
1225          worry about it).  */
1226       desc->simple_p = false;
1227     }
1228
1229   if (dump_file)
1230     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1231              nunroll, num_loop_insns (loop));
1232 }
1233
1234 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1235    Set *DEBUG_USES to the number of debug insns that reference the
1236    variable.  */
1237
1238 bool
1239 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1240                                   int *debug_uses)
1241 {
1242   basic_block *body, bb;
1243   unsigned i;
1244   int count_ref = 0;
1245   rtx_insn *insn;
1246
1247   body = get_loop_body (loop);
1248   for (i = 0; i < loop->num_nodes; i++)
1249     {
1250       bb = body[i];
1251
1252       FOR_BB_INSNS (bb, insn)
1253         if (!rtx_referenced_p (reg, insn))
1254           continue;
1255         else if (DEBUG_INSN_P (insn))
1256           ++*debug_uses;
1257         else if (++count_ref > 1)
1258           break;
1259     }
1260   free (body);
1261   return (count_ref  == 1);
1262 }
1263
1264 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1265
1266 static void
1267 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1268 {
1269   basic_block *body, bb;
1270   unsigned i;
1271   rtx_insn *insn;
1272
1273   body = get_loop_body (loop);
1274   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1275     {
1276       bb = body[i];
1277
1278       FOR_BB_INSNS (bb, insn)
1279         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1280           continue;
1281         else
1282           {
1283             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1284                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1285             if (!--debug_uses)
1286               break;
1287           }
1288     }
1289   free (body);
1290 }
1291
1292 /* Determine whether INSN contains an accumulator
1293    which can be expanded into separate copies,
1294    one for each copy of the LOOP body.
1295
1296    for (i = 0 ; i < n; i++)
1297      sum += a[i];
1298
1299    ==>
1300
1301    sum += a[i]
1302    ....
1303    i = i+1;
1304    sum1 += a[i]
1305    ....
1306    i = i+1
1307    sum2 += a[i];
1308    ....
1309
1310    Return NULL if INSN contains no opportunity for expansion of accumulator.
1311    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1312    information and return a pointer to it.
1313 */
1314
1315 static struct var_to_expand *
1316 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1317 {
1318   rtx set, dest, src;
1319   struct var_to_expand *ves;
1320   unsigned accum_pos;
1321   enum rtx_code code;
1322   int debug_uses = 0;
1323
1324   set = single_set (insn);
1325   if (!set)
1326     return NULL;
1327
1328   dest = SET_DEST (set);
1329   src = SET_SRC (set);
1330   code = GET_CODE (src);
1331
1332   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1333     return NULL;
1334
1335   if (FLOAT_MODE_P (GET_MODE (dest)))
1336     {
1337       if (!flag_associative_math)
1338         return NULL;
1339       /* In the case of FMA, we're also changing the rounding.  */
1340       if (code == FMA && !flag_unsafe_math_optimizations)
1341         return NULL;
1342     }
1343
1344   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1345      in MD.  But if there is no optab to generate the insn, we can not
1346      perform the variable expansion.  This can happen if an MD provides
1347      an insn but not a named pattern to generate it, for example to avoid
1348      producing code that needs additional mode switches like for x87/mmx.
1349
1350      So we check have_insn_for which looks for an optab for the operation
1351      in SRC.  If it doesn't exist, we can't perform the expansion even
1352      though INSN is valid.  */
1353   if (!have_insn_for (code, GET_MODE (src)))
1354     return NULL;
1355
1356   if (!REG_P (dest)
1357       && !(GET_CODE (dest) == SUBREG
1358            && REG_P (SUBREG_REG (dest))))
1359     return NULL;
1360
1361   /* Find the accumulator use within the operation.  */
1362   if (code == FMA)
1363     {
1364       /* We only support accumulation via FMA in the ADD position.  */
1365       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1366         return NULL;
1367       accum_pos = 2;
1368     }
1369   else if (rtx_equal_p (dest, XEXP (src, 0)))
1370     accum_pos = 0;
1371   else if (rtx_equal_p (dest, XEXP (src, 1)))
1372     {
1373       /* The method of expansion that we are using; which includes the
1374          initialization of the expansions with zero and the summation of
1375          the expansions at the end of the computation will yield wrong
1376          results for (x = something - x) thus avoid using it in that case.  */
1377       if (code == MINUS)
1378         return NULL;
1379       accum_pos = 1;
1380     }
1381   else
1382     return NULL;
1383
1384   /* It must not otherwise be used.  */
1385   if (code == FMA)
1386     {
1387       if (rtx_referenced_p (dest, XEXP (src, 0))
1388           || rtx_referenced_p (dest, XEXP (src, 1)))
1389         return NULL;
1390     }
1391   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1392     return NULL;
1393
1394   /* It must be used in exactly one insn.  */
1395   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1396     return NULL;
1397
1398   if (dump_file)
1399     {
1400       fprintf (dump_file, "\n;; Expanding Accumulator ");
1401       print_rtl (dump_file, dest);
1402       fprintf (dump_file, "\n");
1403     }
1404
1405   if (debug_uses)
1406     /* Instead of resetting the debug insns, we could replace each
1407        debug use in the loop with the sum or product of all expanded
1408        accummulators.  Since we'll only know of all expansions at the
1409        end, we'd have to keep track of which vars_to_expand a debug
1410        insn in the loop references, take note of each copy of the
1411        debug insn during unrolling, and when it's all done, compute
1412        the sum or product of each variable and adjust the original
1413        debug insn and each copy thereof.  What a pain!  */
1414     reset_debug_uses_in_loop (loop, dest, debug_uses);
1415
1416   /* Record the accumulator to expand.  */
1417   ves = XNEW (struct var_to_expand);
1418   ves->insn = insn;
1419   ves->reg = copy_rtx (dest);
1420   ves->var_expansions.create (1);
1421   ves->next = NULL;
1422   ves->op = GET_CODE (src);
1423   ves->expansion_count = 0;
1424   ves->reuse_expansion = 0;
1425   return ves;
1426 }
1427
1428 /* Determine whether there is an induction variable in INSN that
1429    we would like to split during unrolling.
1430
1431    I.e. replace
1432
1433    i = i + 1;
1434    ...
1435    i = i + 1;
1436    ...
1437    i = i + 1;
1438    ...
1439
1440    type chains by
1441
1442    i0 = i + 1
1443    ...
1444    i = i0 + 1
1445    ...
1446    i = i0 + 2
1447    ...
1448
1449    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1450    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1451    pointer to it.  */
1452
1453 static struct iv_to_split *
1454 analyze_iv_to_split_insn (rtx_insn *insn)
1455 {
1456   rtx set, dest;
1457   struct rtx_iv iv;
1458   struct iv_to_split *ivts;
1459   bool ok;
1460
1461   /* For now we just split the basic induction variables.  Later this may be
1462      extended for example by selecting also addresses of memory references.  */
1463   set = single_set (insn);
1464   if (!set)
1465     return NULL;
1466
1467   dest = SET_DEST (set);
1468   if (!REG_P (dest))
1469     return NULL;
1470
1471   if (!biv_p (insn, dest))
1472     return NULL;
1473
1474   ok = iv_analyze_result (insn, dest, &iv);
1475
1476   /* This used to be an assert under the assumption that if biv_p returns
1477      true that iv_analyze_result must also return true.  However, that
1478      assumption is not strictly correct as evidenced by pr25569.
1479
1480      Returning NULL when iv_analyze_result returns false is safe and
1481      avoids the problems in pr25569 until the iv_analyze_* routines
1482      can be fixed, which is apparently hard and time consuming
1483      according to their author.  */
1484   if (! ok)
1485     return NULL;
1486
1487   if (iv.step == const0_rtx
1488       || iv.mode != iv.extend_mode)
1489     return NULL;
1490
1491   /* Record the insn to split.  */
1492   ivts = XNEW (struct iv_to_split);
1493   ivts->insn = insn;
1494   ivts->orig_var = dest;
1495   ivts->base_var = NULL_RTX;
1496   ivts->step = iv.step;
1497   ivts->next = NULL;
1498
1499   return ivts;
1500 }
1501
1502 /* Determines which of insns in LOOP can be optimized.
1503    Return a OPT_INFO struct with the relevant hash tables filled
1504    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1505    is undefined for the return value.  */
1506
1507 static struct opt_info *
1508 analyze_insns_in_loop (struct loop *loop)
1509 {
1510   basic_block *body, bb;
1511   unsigned i;
1512   struct opt_info *opt_info = XCNEW (struct opt_info);
1513   rtx_insn *insn;
1514   struct iv_to_split *ivts = NULL;
1515   struct var_to_expand *ves = NULL;
1516   iv_to_split **slot1;
1517   var_to_expand **slot2;
1518   vec<edge> edges = get_loop_exit_edges (loop);
1519   edge exit;
1520   bool can_apply = false;
1521
1522   iv_analysis_loop_init (loop);
1523
1524   body = get_loop_body (loop);
1525
1526   if (flag_split_ivs_in_unroller)
1527     {
1528       opt_info->insns_to_split
1529         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1530       opt_info->iv_to_split_head = NULL;
1531       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1532     }
1533
1534   /* Record the loop exit bb and loop preheader before the unrolling.  */
1535   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1536
1537   if (edges.length () == 1)
1538     {
1539       exit = edges[0];
1540       if (!(exit->flags & EDGE_COMPLEX))
1541         {
1542           opt_info->loop_exit = split_edge (exit);
1543           can_apply = true;
1544         }
1545     }
1546
1547   if (flag_variable_expansion_in_unroller
1548       && can_apply)
1549     {
1550       opt_info->insns_with_var_to_expand
1551         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1552       opt_info->var_to_expand_head = NULL;
1553       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1554     }
1555
1556   for (i = 0; i < loop->num_nodes; i++)
1557     {
1558       bb = body[i];
1559       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1560         continue;
1561
1562       FOR_BB_INSNS (bb, insn)
1563       {
1564         if (!INSN_P (insn))
1565           continue;
1566
1567         if (opt_info->insns_to_split)
1568           ivts = analyze_iv_to_split_insn (insn);
1569
1570         if (ivts)
1571           {
1572             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1573             gcc_assert (*slot1 == NULL);
1574             *slot1 = ivts;
1575             *opt_info->iv_to_split_tail = ivts;
1576             opt_info->iv_to_split_tail = &ivts->next;
1577             continue;
1578           }
1579
1580         if (opt_info->insns_with_var_to_expand)
1581           ves = analyze_insn_to_expand_var (loop, insn);
1582
1583         if (ves)
1584           {
1585             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1586             gcc_assert (*slot2 == NULL);
1587             *slot2 = ves;
1588             *opt_info->var_to_expand_tail = ves;
1589             opt_info->var_to_expand_tail = &ves->next;
1590           }
1591       }
1592     }
1593
1594   edges.release ();
1595   free (body);
1596   return opt_info;
1597 }
1598
1599 /* Called just before loop duplication.  Records start of duplicated area
1600    to OPT_INFO.  */
1601
1602 static void
1603 opt_info_start_duplication (struct opt_info *opt_info)
1604 {
1605   if (opt_info)
1606     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1607 }
1608
1609 /* Determine the number of iterations between initialization of the base
1610    variable and the current copy (N_COPY).  N_COPIES is the total number
1611    of newly created copies.  UNROLLING is true if we are unrolling
1612    (not peeling) the loop.  */
1613
1614 static unsigned
1615 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1616 {
1617   if (unrolling)
1618     {
1619       /* If we are unrolling, initialization is done in the original loop
1620          body (number 0).  */
1621       return n_copy;
1622     }
1623   else
1624     {
1625       /* If we are peeling, the copy in that the initialization occurs has
1626          number 1.  The original loop (number 0) is the last.  */
1627       if (n_copy)
1628         return n_copy - 1;
1629       else
1630         return n_copies;
1631     }
1632 }
1633
1634 /* Allocate basic variable for the induction variable chain.  */
1635
1636 static void
1637 allocate_basic_variable (struct iv_to_split *ivts)
1638 {
1639   rtx expr = SET_SRC (single_set (ivts->insn));
1640
1641   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1642 }
1643
1644 /* Insert initialization of basic variable of IVTS before INSN, taking
1645    the initial value from INSN.  */
1646
1647 static void
1648 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1649 {
1650   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1651   rtx_insn *seq;
1652
1653   start_sequence ();
1654   expr = force_operand (expr, ivts->base_var);
1655   if (expr != ivts->base_var)
1656     emit_move_insn (ivts->base_var, expr);
1657   seq = get_insns ();
1658   end_sequence ();
1659
1660   emit_insn_before (seq, insn);
1661 }
1662
1663 /* Replace the use of induction variable described in IVTS in INSN
1664    by base variable + DELTA * step.  */
1665
1666 static void
1667 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1668 {
1669   rtx expr, *loc, incr, var;
1670   rtx_insn *seq;
1671   enum machine_mode mode = GET_MODE (ivts->base_var);
1672   rtx src, dest, set;
1673
1674   /* Construct base + DELTA * step.  */
1675   if (!delta)
1676     expr = ivts->base_var;
1677   else
1678     {
1679       incr = simplify_gen_binary (MULT, mode,
1680                                   ivts->step, gen_int_mode (delta, mode));
1681       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1682                                   ivts->base_var, incr);
1683     }
1684
1685   /* Figure out where to do the replacement.  */
1686   loc = &SET_SRC (single_set (insn));
1687
1688   /* If we can make the replacement right away, we're done.  */
1689   if (validate_change (insn, loc, expr, 0))
1690     return;
1691
1692   /* Otherwise, force EXPR into a register and try again.  */
1693   start_sequence ();
1694   var = gen_reg_rtx (mode);
1695   expr = force_operand (expr, var);
1696   if (expr != var)
1697     emit_move_insn (var, expr);
1698   seq = get_insns ();
1699   end_sequence ();
1700   emit_insn_before (seq, insn);
1701
1702   if (validate_change (insn, loc, var, 0))
1703     return;
1704
1705   /* The last chance.  Try recreating the assignment in insn
1706      completely from scratch.  */
1707   set = single_set (insn);
1708   gcc_assert (set);
1709
1710   start_sequence ();
1711   *loc = var;
1712   src = copy_rtx (SET_SRC (set));
1713   dest = copy_rtx (SET_DEST (set));
1714   src = force_operand (src, dest);
1715   if (src != dest)
1716     emit_move_insn (dest, src);
1717   seq = get_insns ();
1718   end_sequence ();
1719
1720   emit_insn_before (seq, insn);
1721   delete_insn (insn);
1722 }
1723
1724
1725 /* Return one expansion of the accumulator recorded in struct VE.  */
1726
1727 static rtx
1728 get_expansion (struct var_to_expand *ve)
1729 {
1730   rtx reg;
1731
1732   if (ve->reuse_expansion == 0)
1733     reg = ve->reg;
1734   else
1735     reg = ve->var_expansions[ve->reuse_expansion - 1];
1736
1737   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1738     ve->reuse_expansion = 0;
1739   else
1740     ve->reuse_expansion++;
1741
1742   return reg;
1743 }
1744
1745
1746 /* Given INSN replace the uses of the accumulator recorded in VE
1747    with a new register.  */
1748
1749 static void
1750 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1751 {
1752   rtx new_reg, set;
1753   bool really_new_expansion = false;
1754
1755   set = single_set (insn);
1756   gcc_assert (set);
1757
1758   /* Generate a new register only if the expansion limit has not been
1759      reached.  Else reuse an already existing expansion.  */
1760   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1761     {
1762       really_new_expansion = true;
1763       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1764     }
1765   else
1766     new_reg = get_expansion (ve);
1767
1768   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1769   if (apply_change_group ())
1770     if (really_new_expansion)
1771       {
1772         ve->var_expansions.safe_push (new_reg);
1773         ve->expansion_count++;
1774       }
1775 }
1776
1777 /* Initialize the variable expansions in loop preheader.  PLACE is the
1778    loop-preheader basic block where the initialization of the
1779    expansions should take place.  The expansions are initialized with
1780    (-0) when the operation is plus or minus to honor sign zero.  This
1781    way we can prevent cases where the sign of the final result is
1782    effected by the sign of the expansion.  Here is an example to
1783    demonstrate this:
1784
1785    for (i = 0 ; i < n; i++)
1786      sum += something;
1787
1788    ==>
1789
1790    sum += something
1791    ....
1792    i = i+1;
1793    sum1 += something
1794    ....
1795    i = i+1
1796    sum2 += something;
1797    ....
1798
1799    When SUM is initialized with -zero and SOMETHING is also -zero; the
1800    final result of sum should be -zero thus the expansions sum1 and sum2
1801    should be initialized with -zero as well (otherwise we will get +zero
1802    as the final result).  */
1803
1804 static void
1805 insert_var_expansion_initialization (struct var_to_expand *ve,
1806                                      basic_block place)
1807 {
1808   rtx_insn *seq;
1809   rtx var, zero_init;
1810   unsigned i;
1811   enum machine_mode mode = GET_MODE (ve->reg);
1812   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1813
1814   if (ve->var_expansions.length () == 0)
1815     return;
1816
1817   start_sequence ();
1818   switch (ve->op)
1819     {
1820     case FMA:
1821       /* Note that we only accumulate FMA via the ADD operand.  */
1822     case PLUS:
1823     case MINUS:
1824       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1825         {
1826           if (honor_signed_zero_p)
1827             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1828           else
1829             zero_init = CONST0_RTX (mode);
1830           emit_move_insn (var, zero_init);
1831         }
1832       break;
1833
1834     case MULT:
1835       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1836         {
1837           zero_init = CONST1_RTX (GET_MODE (var));
1838           emit_move_insn (var, zero_init);
1839         }
1840       break;
1841
1842     default:
1843       gcc_unreachable ();
1844     }
1845
1846   seq = get_insns ();
1847   end_sequence ();
1848
1849   emit_insn_after (seq, BB_END (place));
1850 }
1851
1852 /* Combine the variable expansions at the loop exit.  PLACE is the
1853    loop exit basic block where the summation of the expansions should
1854    take place.  */
1855
1856 static void
1857 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1858 {
1859   rtx sum = ve->reg;
1860   rtx expr, var;
1861   rtx_insn *seq, *insn;
1862   unsigned i;
1863
1864   if (ve->var_expansions.length () == 0)
1865     return;
1866
1867   start_sequence ();
1868   switch (ve->op)
1869     {
1870     case FMA:
1871       /* Note that we only accumulate FMA via the ADD operand.  */
1872     case PLUS:
1873     case MINUS:
1874       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1875         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1876       break;
1877
1878     case MULT:
1879       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1880         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1881       break;
1882
1883     default:
1884       gcc_unreachable ();
1885     }
1886
1887   expr = force_operand (sum, ve->reg);
1888   if (expr != ve->reg)
1889     emit_move_insn (ve->reg, expr);
1890   seq = get_insns ();
1891   end_sequence ();
1892
1893   insn = BB_HEAD (place);
1894   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1895     insn = NEXT_INSN (insn);
1896
1897   emit_insn_after (seq, insn);
1898 }
1899
1900 /* Strip away REG_EQUAL notes for IVs we're splitting.
1901
1902    Updating REG_EQUAL notes for IVs we split is tricky: We
1903    cannot tell until after unrolling, DF-rescanning, and liveness
1904    updating, whether an EQ_USE is reached by the split IV while
1905    the IV reg is still live.  See PR55006.
1906
1907    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1908    because RTL loop-iv requires us to defer rescanning insns and
1909    any notes attached to them.  So resort to old techniques...  */
1910
1911 static void
1912 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1913 {
1914   struct iv_to_split *ivts;
1915   rtx note = find_reg_equal_equiv_note (insn);
1916   if (! note)
1917     return;
1918   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1919     if (reg_mentioned_p (ivts->orig_var, note))
1920       {
1921         remove_note (insn, note);
1922         return;
1923       }
1924 }
1925
1926 /* Apply loop optimizations in loop copies using the
1927    data which gathered during the unrolling.  Structure
1928    OPT_INFO record that data.
1929
1930    UNROLLING is true if we unrolled (not peeled) the loop.
1931    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1932    the loop (as it should happen in complete unrolling, but not in ordinary
1933    peeling of the loop).  */
1934
1935 static void
1936 apply_opt_in_copies (struct opt_info *opt_info,
1937                      unsigned n_copies, bool unrolling,
1938                      bool rewrite_original_loop)
1939 {
1940   unsigned i, delta;
1941   basic_block bb, orig_bb;
1942   rtx_insn *insn, *orig_insn, *next;
1943   struct iv_to_split ivts_templ, *ivts;
1944   struct var_to_expand ve_templ, *ves;
1945
1946   /* Sanity check -- we need to put initialization in the original loop
1947      body.  */
1948   gcc_assert (!unrolling || rewrite_original_loop);
1949
1950   /* Allocate the basic variables (i0).  */
1951   if (opt_info->insns_to_split)
1952     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1953       allocate_basic_variable (ivts);
1954
1955   for (i = opt_info->first_new_block;
1956        i < (unsigned) last_basic_block_for_fn (cfun);
1957        i++)
1958     {
1959       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1960       orig_bb = get_bb_original (bb);
1961
1962       /* bb->aux holds position in copy sequence initialized by
1963          duplicate_loop_to_header_edge.  */
1964       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1965                                         unrolling);
1966       bb->aux = 0;
1967       orig_insn = BB_HEAD (orig_bb);
1968       FOR_BB_INSNS_SAFE (bb, insn, next)
1969         {
1970           if (!INSN_P (insn)
1971               || (DEBUG_INSN_P (insn)
1972                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
1973             continue;
1974
1975           while (!INSN_P (orig_insn)
1976                  || (DEBUG_INSN_P (orig_insn)
1977                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
1978                          == LABEL_DECL)))
1979             orig_insn = NEXT_INSN (orig_insn);
1980
1981           ivts_templ.insn = orig_insn;
1982           ve_templ.insn = orig_insn;
1983
1984           /* Apply splitting iv optimization.  */
1985           if (opt_info->insns_to_split)
1986             {
1987               maybe_strip_eq_note_for_split_iv (opt_info, insn);
1988
1989               ivts = opt_info->insns_to_split->find (&ivts_templ);
1990
1991               if (ivts)
1992                 {
1993                   gcc_assert (GET_CODE (PATTERN (insn))
1994                               == GET_CODE (PATTERN (orig_insn)));
1995
1996                   if (!delta)
1997                     insert_base_initialization (ivts, insn);
1998                   split_iv (ivts, insn, delta);
1999                 }
2000             }
2001           /* Apply variable expansion optimization.  */
2002           if (unrolling && opt_info->insns_with_var_to_expand)
2003             {
2004               ves = (struct var_to_expand *)
2005                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2006               if (ves)
2007                 {
2008                   gcc_assert (GET_CODE (PATTERN (insn))
2009                               == GET_CODE (PATTERN (orig_insn)));
2010                   expand_var_during_unrolling (ves, insn);
2011                 }
2012             }
2013           orig_insn = NEXT_INSN (orig_insn);
2014         }
2015     }
2016
2017   if (!rewrite_original_loop)
2018     return;
2019
2020   /* Initialize the variable expansions in the loop preheader
2021      and take care of combining them at the loop exit.  */
2022   if (opt_info->insns_with_var_to_expand)
2023     {
2024       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2025         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2026       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2027         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2028     }
2029
2030   /* Rewrite also the original loop body.  Find them as originals of the blocks
2031      in the last copied iteration, i.e. those that have
2032      get_bb_copy (get_bb_original (bb)) == bb.  */
2033   for (i = opt_info->first_new_block;
2034        i < (unsigned) last_basic_block_for_fn (cfun);
2035        i++)
2036     {
2037       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2038       orig_bb = get_bb_original (bb);
2039       if (get_bb_copy (orig_bb) != bb)
2040         continue;
2041
2042       delta = determine_split_iv_delta (0, n_copies, unrolling);
2043       for (orig_insn = BB_HEAD (orig_bb);
2044            orig_insn != NEXT_INSN (BB_END (bb));
2045            orig_insn = next)
2046         {
2047           next = NEXT_INSN (orig_insn);
2048
2049           if (!INSN_P (orig_insn))
2050             continue;
2051
2052           ivts_templ.insn = orig_insn;
2053           if (opt_info->insns_to_split)
2054             {
2055               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2056
2057               ivts = (struct iv_to_split *)
2058                 opt_info->insns_to_split->find (&ivts_templ);
2059               if (ivts)
2060                 {
2061                   if (!delta)
2062                     insert_base_initialization (ivts, orig_insn);
2063                   split_iv (ivts, orig_insn, delta);
2064                   continue;
2065                 }
2066             }
2067
2068         }
2069     }
2070 }
2071
2072 /* Release OPT_INFO.  */
2073
2074 static void
2075 free_opt_info (struct opt_info *opt_info)
2076 {
2077   delete opt_info->insns_to_split;
2078   opt_info->insns_to_split = NULL;
2079   if (opt_info->insns_with_var_to_expand)
2080     {
2081       struct var_to_expand *ves;
2082
2083       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2084         ves->var_expansions.release ();
2085       delete opt_info->insns_with_var_to_expand;
2086       opt_info->insns_with_var_to_expand = NULL;
2087     }
2088   free (opt_info);
2089 }