gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "backend.h"
  42 #include "tree.h"
  43 #include "gimple.h"
  44 #include "cfghooks.h"
  45 #include "tree-pass.h"
  46 #include "ssa.h"
  47 #include "cgraph.h"
  48 #include "gimple-pretty-print.h"
  49 #include "fold-const.h"
  50 #include "profile.h"
  51 #include "gimple-fold.h"
  52 #include "tree-eh.h"
  53 #include "gimple-iterator.h"
  54 #include "tree-cfg.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-ssa-loop-niter.h"
  57 #include "tree-ssa-loop.h"
  58 #include "tree-into-ssa.h"
  59 #include "cfgloop.h"
  60 #include "tree-chrec.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "tree-inline.h"
  63 #include "tree-cfgcleanup.h"
  64 #include "builtins.h"
  65 #include "tree-ssa-sccvn.h"
  66 #include "dbgcnt.h"
  67
  68 /* Specifies types of loops that may be unrolled.  */
  69
  70 enum unroll_level
  71 {
  72   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  73                            iteration.  */
  74   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  75                            of code size.  */
  76   UL_ALL                /* All suitable loops.  */
  77 };
  78
  79 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  80    is the exit edge whose condition is replaced.  The ssa versions of the new
  81    IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
  82    if they are not NULL.  */
  83
  84 void
  85 create_canonical_iv (class loop *loop, edge exit, tree niter,
  86                      tree *var_before = NULL, tree *var_after = NULL)
  87 {
  88   edge in;
  89   tree type, var;
  90   gcond *cond;
  91   gimple_stmt_iterator incr_at;
  92   enum tree_code cmp;
  93
  94   if (dump_file && (dump_flags & TDF_DETAILS))
  95     {
  96       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  97       print_generic_expr (dump_file, niter, TDF_SLIM);
  98       fprintf (dump_file, " iterations.\n");
  99     }
 100
 101   cond = as_a <gcond *> (last_stmt (exit->src));
 102   in = EDGE_SUCC (exit->src, 0);
 103   if (in == exit)
 104     in = EDGE_SUCC (exit->src, 1);
 105
 106   /* Note that we do not need to worry about overflows, since
 107      type of niter is always unsigned and all comparisons are
 108      just for equality/nonequality -- i.e. everything works
 109      with a modulo arithmetics.  */
 110
 111   type = TREE_TYPE (niter);
 112   niter = fold_build2 (PLUS_EXPR, type,
 113                        niter,
 114                        build_int_cst (type, 1));
 115   incr_at = gsi_last_bb (in->src);
 116   create_iv (niter,
 117              build_int_cst (type, -1),
 118              NULL_TREE, loop,
 119              &incr_at, false, var_before, &var);
 120   if (var_after)
 121     *var_after = var;
 122
 123   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 124   gimple_cond_set_code (cond, cmp);
 125   gimple_cond_set_lhs (cond, var);
 126   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 127   update_stmt (cond);
 128 }
 129
 130 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 131 struct loop_size
 132 {
 133   /* Number of instructions in the loop.  */
 134   int overall;
 135
 136   /* Number of instructions that will be likely optimized out in
 137      peeled iterations of loop  (i.e. computation based on induction
 138      variable where induction variable starts at known constant.)  */
 139   int eliminated_by_peeling;
 140
 141   /* Same statistics for last iteration of loop: it is smaller because
 142      instructions after exit are not executed.  */
 143   int last_iteration;
 144   int last_iteration_eliminated_by_peeling;
 145
 146   /* If some IV computation will become constant.  */
 147   bool constant_iv;
 148
 149   /* Number of call stmts that are not a builtin and are pure or const
 150      present on the hot path.  */
 151   int num_pure_calls_on_hot_path;
 152   /* Number of call stmts that are not a builtin and are not pure nor const
 153      present on the hot path.  */
 154   int num_non_pure_calls_on_hot_path;
 155   /* Number of statements other than calls in the loop.  */
 156   int non_call_stmts_on_hot_path;
 157   /* Number of branches seen on the hot path.  */
 158   int num_branches_on_hot_path;
 159 };
 160
 161 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 162
 163 static bool
 164 constant_after_peeling (tree op, gimple *stmt, class loop *loop)
 165 {
 166   if (is_gimple_min_invariant (op))
 167     return true;
 168
 169   /* We can still fold accesses to constant arrays when index is known.  */
 170   if (TREE_CODE (op) != SSA_NAME)
 171     {
 172       tree base = op;
 173
 174       /* First make fast look if we see constant array inside.  */
 175       while (handled_component_p (base))
 176         base = TREE_OPERAND (base, 0);
 177       if ((DECL_P (base)
 178            && ctor_for_folding (base) != error_mark_node)
 179           || CONSTANT_CLASS_P (base))
 180         {
 181           /* If so, see if we understand all the indices.  */
 182           base = op;
 183           while (handled_component_p (base))
 184             {
 185               if (TREE_CODE (base) == ARRAY_REF
 186                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 187                 return false;
 188               base = TREE_OPERAND (base, 0);
 189             }
 190           return true;
 191         }
 192       return false;
 193     }
 194
 195   /* Induction variables are constants when defined in loop.  */
 196   if (loop_containing_stmt (stmt) != loop)
 197     return false;
 198   tree ev = instantiate_parameters (loop, analyze_scalar_evolution (loop, op));
 199   if (chrec_contains_undetermined (ev))
 200     return false;
 201   return true;
 202 }
 203
 204 /* Computes an estimated number of insns in LOOP.
 205    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 206    iteration of the loop.
 207    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 208    of loop.
 209    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 210    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 211
 212 static bool
 213 tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
 214                          struct loop_size *size, int upper_bound)
 215 {
 216   basic_block *body = get_loop_body (loop);
 217   gimple_stmt_iterator gsi;
 218   unsigned int i;
 219   bool after_exit;
 220   vec<basic_block> path = get_loop_hot_path (loop);
 221
 222   size->overall = 0;
 223   size->eliminated_by_peeling = 0;
 224   size->last_iteration = 0;
 225   size->last_iteration_eliminated_by_peeling = 0;
 226   size->num_pure_calls_on_hot_path = 0;
 227   size->num_non_pure_calls_on_hot_path = 0;
 228   size->non_call_stmts_on_hot_path = 0;
 229   size->num_branches_on_hot_path = 0;
 230   size->constant_iv = 0;
 231
 232   if (dump_file && (dump_flags & TDF_DETAILS))
 233     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 234   for (i = 0; i < loop->num_nodes; i++)
 235     {
 236       if (edge_to_cancel && body[i] != edge_to_cancel->src
 237           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 238         after_exit = true;
 239       else
 240         after_exit = false;
 241       if (dump_file && (dump_flags & TDF_DETAILS))
 242         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 243                  after_exit);
 244
 245       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 246         {
 247           gimple *stmt = gsi_stmt (gsi);
 248           int num = estimate_num_insns (stmt, &eni_size_weights);
 249           bool likely_eliminated = false;
 250           bool likely_eliminated_last = false;
 251           bool likely_eliminated_peeled = false;
 252
 253           if (dump_file && (dump_flags & TDF_DETAILS))
 254             {
 255               fprintf (dump_file, "  size: %3i ", num);
 256               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 257             }
 258
 259           /* Look for reasons why we might optimize this stmt away. */
 260
 261           if (!gimple_has_side_effects (stmt))
 262             {
 263               /* Exit conditional.  */
 264               if (exit && body[i] == exit->src
 265                   && stmt == last_stmt (exit->src))
 266                 {
 267                   if (dump_file && (dump_flags & TDF_DETAILS))
 268                     fprintf (dump_file, "   Exit condition will be eliminated "
 269                              "in peeled copies.\n");
 270                   likely_eliminated_peeled = true;
 271                 }
 272               if (edge_to_cancel && body[i] == edge_to_cancel->src
 273                   && stmt == last_stmt (edge_to_cancel->src))
 274                 {
 275                   if (dump_file && (dump_flags & TDF_DETAILS))
 276                     fprintf (dump_file, "   Exit condition will be eliminated "
 277                              "in last copy.\n");
 278                   likely_eliminated_last = true;
 279                 }
 280               /* Sets of IV variables  */
 281               if (gimple_code (stmt) == GIMPLE_ASSIGN
 282                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 283                 {
 284                   if (dump_file && (dump_flags & TDF_DETAILS))
 285                     fprintf (dump_file, "   Induction variable computation will"
 286                              " be folded away.\n");
 287                   likely_eliminated = true;
 288                 }
 289               /* Assignments of IV variables.  */
 290               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 291                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 292                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 293                                                   stmt, loop)
 294                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 295                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 296                                                       stmt, loop)))
 297                 {
 298                   size->constant_iv = true;
 299                   if (dump_file && (dump_flags & TDF_DETAILS))
 300                     fprintf (dump_file,
 301                              "   Constant expression will be folded away.\n");
 302                   likely_eliminated = true;
 303                 }
 304               /* Conditionals.  */
 305               else if ((gimple_code (stmt) == GIMPLE_COND
 306                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 307                                                    loop)
 308                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 309                                                    loop)
 310                         /* We don't simplify all constant compares so make sure
 311                            they are not both constant already.  See PR70288.  */
 312                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 313                             || ! is_gimple_min_invariant
 314                                  (gimple_cond_rhs (stmt))))
 315                        || (gimple_code (stmt) == GIMPLE_SWITCH
 316                            && constant_after_peeling (gimple_switch_index (
 317                                                         as_a <gswitch *>
 318                                                           (stmt)),
 319                                                       stmt, loop)
 320                            && ! is_gimple_min_invariant
 321                                    (gimple_switch_index
 322                                       (as_a <gswitch *> (stmt)))))
 323                 {
 324                   if (dump_file && (dump_flags & TDF_DETAILS))
 325                     fprintf (dump_file, "   Constant conditional.\n");
 326                   likely_eliminated = true;
 327                 }
 328             }
 329
 330           size->overall += num;
 331           if (likely_eliminated || likely_eliminated_peeled)
 332             size->eliminated_by_peeling += num;
 333           if (!after_exit)
 334             {
 335               size->last_iteration += num;
 336               if (likely_eliminated || likely_eliminated_last)
 337                 size->last_iteration_eliminated_by_peeling += num;
 338             }
 339           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 340               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 341             {
 342               free (body);
 343               path.release ();
 344               return true;
 345             }
 346         }
 347     }
 348   while (path.length ())
 349     {
 350       basic_block bb = path.pop ();
 351       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 352         {
 353           gimple *stmt = gsi_stmt (gsi);
 354           if (gimple_code (stmt) == GIMPLE_CALL
 355               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 356             {
 357               int flags = gimple_call_flags (stmt);
 358               if (flags & (ECF_PURE | ECF_CONST))
 359                 size->num_pure_calls_on_hot_path++;
 360               else
 361                 size->num_non_pure_calls_on_hot_path++;
 362               size->num_branches_on_hot_path ++;
 363             }
 364           /* Count inexpensive calls as non-calls, because they will likely
 365              expand inline.  */
 366           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 367             size->non_call_stmts_on_hot_path++;
 368           if (((gimple_code (stmt) == GIMPLE_COND
 369                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 370                     || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 371                                                 loop)))
 372                || (gimple_code (stmt) == GIMPLE_SWITCH
 373                    && !constant_after_peeling (gimple_switch_index (
 374                                                  as_a <gswitch *> (stmt)),
 375                                                stmt, loop)))
 376               && (!exit || bb != exit->src))
 377             size->num_branches_on_hot_path++;
 378         }
 379     }
 380   path.release ();
 381   if (dump_file && (dump_flags & TDF_DETAILS))
 382     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 383              size->eliminated_by_peeling, size->last_iteration,
 384              size->last_iteration_eliminated_by_peeling);
 385
 386   free (body);
 387   return false;
 388 }
 389
 390 /* Estimate number of insns of completely unrolled loop.
 391    It is (NUNROLL + 1) * size of loop body with taking into account
 392    the fact that in last copy everything after exit conditional
 393    is dead and that some instructions will be eliminated after
 394    peeling.
 395
 396    Loop body is likely going to simplify further, this is difficult
 397    to guess, we just decrease the result by 1/3.  */
 398
 399 static unsigned HOST_WIDE_INT
 400 estimated_unrolled_size (struct loop_size *size,
 401                          unsigned HOST_WIDE_INT nunroll)
 402 {
 403   HOST_WIDE_INT unr_insns = ((nunroll)
 404                              * (HOST_WIDE_INT) (size->overall
 405                                                 - size->eliminated_by_peeling));
 406   if (!nunroll)
 407     unr_insns = 0;
 408   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 409
 410   unr_insns = unr_insns * 2 / 3;
 411   if (unr_insns <= 0)
 412     unr_insns = 1;
 413
 414   return unr_insns;
 415 }
 416
 417 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 418    body that can be remove to make the loop to always exit and at
 419    the same time it does not make any code potentially executed
 420    during the last iteration dead.
 421
 422    After complete unrolling we still may get rid of the conditional
 423    on the exit in the last copy even if we have no idea what it does.
 424    This is quite common case for loops of form
 425
 426      int a[5];
 427      for (i=0;i<b;i++)
 428        a[i]=0;
 429
 430    Here we prove the loop to iterate 5 times but we do not know
 431    it from induction variable.
 432
 433    For now we handle only simple case where there is exit condition
 434    just before the latch block and the latch block contains no statements
 435    with side effect that may otherwise terminate the execution of loop
 436    (such as by EH or by terminating the program or longjmp).
 437
 438    In the general case we may want to cancel the paths leading to statements
 439    loop-niter identified as having undefined effect in the last iteration.
 440    The other cases are hopefully rare and will be cleaned up later.  */
 441
 442 static edge
 443 loop_edge_to_cancel (class loop *loop)
 444 {
 445   vec<edge> exits;
 446   unsigned i;
 447   edge edge_to_cancel;
 448   gimple_stmt_iterator gsi;
 449
 450   /* We want only one predecestor of the loop.  */
 451   if (EDGE_COUNT (loop->latch->preds) > 1)
 452     return NULL;
 453
 454   exits = get_loop_exit_edges (loop);
 455
 456   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 457     {
 458        /* Find the other edge than the loop exit
 459           leaving the conditoinal.  */
 460        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 461          continue;
 462        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 463          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 464        else
 465          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 466
 467       /* We only can handle conditionals.  */
 468       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 469         continue;
 470
 471       /* We should never have conditionals in the loop latch. */
 472       gcc_assert (edge_to_cancel->dest != loop->header);
 473
 474       /* Check that it leads to loop latch.  */
 475       if (edge_to_cancel->dest != loop->latch)
 476         continue;
 477
 478       exits.release ();
 479
 480       /* Verify that the code in loop latch does nothing that may end program
 481          execution without really reaching the exit.  This may include
 482          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 483       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 484         if (gimple_has_side_effects (gsi_stmt (gsi)))
 485            return NULL;
 486       return edge_to_cancel;
 487     }
 488   exits.release ();
 489   return NULL;
 490 }
 491
 492 /* Remove all tests for exits that are known to be taken after LOOP was
 493    peeled NPEELED times. Put gcc_unreachable before every statement
 494    known to not be executed.  */
 495
 496 static bool
 497 remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
 498 {
 499   class nb_iter_bound *elt;
 500   bool changed = false;
 501
 502   for (elt = loop->bounds; elt; elt = elt->next)
 503     {
 504       /* If statement is known to be undefined after peeling, turn it
 505          into unreachable (or trap when debugging experience is supposed
 506          to be good).  */
 507       if (!elt->is_exit
 508           && wi::ltu_p (elt->bound, npeeled))
 509         {
 510           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 511           gcall *stmt = gimple_build_call
 512               (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 513           gimple_set_location (stmt, gimple_location (elt->stmt));
 514           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 515           split_block (gimple_bb (stmt), stmt);
 516           changed = true;
 517           if (dump_file && (dump_flags & TDF_DETAILS))
 518             {
 519               fprintf (dump_file, "Forced statement unreachable: ");
 520               print_gimple_stmt (dump_file, elt->stmt, 0);
 521             }
 522         }
 523       /* If we know the exit will be taken after peeling, update.  */
 524       else if (elt->is_exit
 525                && wi::leu_p (elt->bound, npeeled))
 526         {
 527           basic_block bb = gimple_bb (elt->stmt);
 528           edge exit_edge = EDGE_SUCC (bb, 0);
 529
 530           if (dump_file && (dump_flags & TDF_DETAILS))
 531             {
 532               fprintf (dump_file, "Forced exit to be taken: ");
 533               print_gimple_stmt (dump_file, elt->stmt, 0);
 534             }
 535           if (!loop_exit_edge_p (loop, exit_edge))
 536             exit_edge = EDGE_SUCC (bb, 1);
 537           exit_edge->probability = profile_probability::always ();
 538           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 539           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 540           if (exit_edge->flags & EDGE_TRUE_VALUE)
 541             gimple_cond_make_true (cond_stmt);
 542           else
 543             gimple_cond_make_false (cond_stmt);
 544           update_stmt (cond_stmt);
 545           changed = true;
 546         }
 547     }
 548   return changed;
 549 }
 550
 551 /* Remove all exits that are known to be never taken because of the loop bound
 552    discovered.  */
 553
 554 static bool
 555 remove_redundant_iv_tests (class loop *loop)
 556 {
 557   class nb_iter_bound *elt;
 558   bool changed = false;
 559
 560   if (!loop->any_upper_bound)
 561     return false;
 562   for (elt = loop->bounds; elt; elt = elt->next)
 563     {
 564       /* Exit is pointless if it won't be taken before loop reaches
 565          upper bound.  */
 566       if (elt->is_exit && loop->any_upper_bound
 567           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 568         {
 569           basic_block bb = gimple_bb (elt->stmt);
 570           edge exit_edge = EDGE_SUCC (bb, 0);
 571           class tree_niter_desc niter;
 572
 573           if (!loop_exit_edge_p (loop, exit_edge))
 574             exit_edge = EDGE_SUCC (bb, 1);
 575
 576           /* Only when we know the actual number of iterations, not
 577              just a bound, we can remove the exit.  */
 578           if (!number_of_iterations_exit (loop, exit_edge,
 579                                           &niter, false, false)
 580               || !integer_onep (niter.assumptions)
 581               || !integer_zerop (niter.may_be_zero)
 582               || !niter.niter
 583               || TREE_CODE (niter.niter) != INTEGER_CST
 584               || !wi::ltu_p (loop->nb_iterations_upper_bound,
 585                              wi::to_widest (niter.niter)))
 586             continue;
 587
 588           if (dump_file && (dump_flags & TDF_DETAILS))
 589             {
 590               fprintf (dump_file, "Removed pointless exit: ");
 591               print_gimple_stmt (dump_file, elt->stmt, 0);
 592             }
 593           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 594           if (exit_edge->flags & EDGE_TRUE_VALUE)
 595             gimple_cond_make_false (cond_stmt);
 596           else
 597             gimple_cond_make_true (cond_stmt);
 598           update_stmt (cond_stmt);
 599           changed = true;
 600         }
 601     }
 602   return changed;
 603 }
 604
 605 /* Stores loops that will be unlooped and edges that will be removed
 606    after we process whole loop tree. */
 607 static vec<loop_p> loops_to_unloop;
 608 static vec<int> loops_to_unloop_nunroll;
 609 static vec<edge> edges_to_remove;
 610 /* Stores loops that has been peeled.  */
 611 static bitmap peeled_loops;
 612
 613 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 614    on the latch edge.
 615    We do it after all unrolling since unlooping moves basic blocks
 616    across loop boundaries trashing loop closed SSA form as well
 617    as SCEV info needed to be intact during unrolling.
 618
 619    IRRED_INVALIDATED is used to bookkeep if information about
 620    irreducible regions may become invalid as a result
 621    of the transformation.
 622    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 623    when we need to go into loop closed SSA form.  */
 624
 625 static void
 626 unloop_loops (bitmap loop_closed_ssa_invalidated,
 627               bool *irred_invalidated)
 628 {
 629   while (loops_to_unloop.length ())
 630     {
 631       class loop *loop = loops_to_unloop.pop ();
 632       int n_unroll = loops_to_unloop_nunroll.pop ();
 633       basic_block latch = loop->latch;
 634       edge latch_edge = loop_latch_edge (loop);
 635       int flags = latch_edge->flags;
 636       location_t locus = latch_edge->goto_locus;
 637       gcall *stmt;
 638       gimple_stmt_iterator gsi;
 639
 640       remove_exits_and_undefined_stmts (loop, n_unroll);
 641
 642       /* Unloop destroys the latch edge.  */
 643       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 644
 645       /* Create new basic block for the latch edge destination and wire
 646          it in.  */
 647       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 648       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 649       latch_edge->probability = profile_probability::never ();
 650       latch_edge->flags |= flags;
 651       latch_edge->goto_locus = locus;
 652
 653       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 654       latch_edge->dest->count = profile_count::zero ();
 655       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 656
 657       gsi = gsi_start_bb (latch_edge->dest);
 658       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 659     }
 660   loops_to_unloop.release ();
 661   loops_to_unloop_nunroll.release ();
 662
 663   /* Remove edges in peeled copies.  Given remove_path removes dominated
 664      regions we need to cope with removal of already removed paths.  */
 665   unsigned i;
 666   edge e;
 667   auto_vec<int, 20> src_bbs;
 668   src_bbs.reserve_exact (edges_to_remove.length ());
 669   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 670     src_bbs.quick_push (e->src->index);
 671   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 672     if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
 673       {
 674         bool ok = remove_path (e, irred_invalidated,
 675                                loop_closed_ssa_invalidated);
 676         gcc_assert (ok);
 677       }
 678   edges_to_remove.release ();
 679 }
 680
 681 /* Tries to unroll LOOP completely, i.e. NITER times.
 682    UL determines which loops we are allowed to unroll.
 683    EXIT is the exit of the loop that should be eliminated.
 684    MAXITER specfy bound on number of iterations, -1 if it is
 685    not known or too large for HOST_WIDE_INT.  The location
 686    LOCUS corresponding to the loop is used when emitting
 687    a summary of the unroll to the dump file.  */
 688
 689 static bool
 690 try_unroll_loop_completely (class loop *loop,
 691                             edge exit, tree niter, bool may_be_zero,
 692                             enum unroll_level ul,
 693                             HOST_WIDE_INT maxiter,
 694                             dump_user_location_t locus, bool allow_peel)
 695 {
 696   unsigned HOST_WIDE_INT n_unroll = 0;
 697   bool n_unroll_found = false;
 698   edge edge_to_cancel = NULL;
 699
 700   /* See if we proved number of iterations to be low constant.
 701
 702      EXIT is an edge that will be removed in all but last iteration of
 703      the loop.
 704
 705      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 706      of the unrolled sequence and is expected to make the final loop not
 707      rolling.
 708
 709      If the number of execution of loop is determined by standard induction
 710      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 711      from the iv test.  */
 712   if (tree_fits_uhwi_p (niter))
 713     {
 714       n_unroll = tree_to_uhwi (niter);
 715       n_unroll_found = true;
 716       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 717       if (edge_to_cancel == exit)
 718         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 719     }
 720   /* We do not know the number of iterations and thus we cannot eliminate
 721      the EXIT edge.  */
 722   else
 723     exit = NULL;
 724
 725   /* See if we can improve our estimate by using recorded loop bounds.  */
 726   if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
 727       && maxiter >= 0
 728       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 729     {
 730       n_unroll = maxiter;
 731       n_unroll_found = true;
 732       /* Loop terminates before the IV variable test, so we cannot
 733          remove it in the last iteration.  */
 734       edge_to_cancel = NULL;
 735     }
 736
 737   if (!n_unroll_found)
 738     return false;
 739
 740   if (!loop->unroll
 741       && n_unroll > (unsigned) param_max_completely_peel_times)
 742     {
 743       if (dump_file && (dump_flags & TDF_DETAILS))
 744         fprintf (dump_file, "Not unrolling loop %d "
 745                  "(--param max-completely-peel-times limit reached).\n",
 746                  loop->num);
 747       return false;
 748     }
 749
 750   if (!edge_to_cancel)
 751     edge_to_cancel = loop_edge_to_cancel (loop);
 752
 753   if (n_unroll)
 754     {
 755       if (ul == UL_SINGLE_ITER)
 756         return false;
 757
 758       if (loop->unroll)
 759         {
 760           /* If the unrolling factor is too large, bail out.  */
 761           if (n_unroll > (unsigned)loop->unroll)
 762             {
 763               if (dump_file && (dump_flags & TDF_DETAILS))
 764                 fprintf (dump_file,
 765                          "Not unrolling loop %d: "
 766                          "user didn't want it unrolled completely.\n",
 767                          loop->num);
 768               return false;
 769             }
 770         }
 771       else
 772         {
 773           struct loop_size size;
 774           /* EXIT can be removed only if we are sure it passes first N_UNROLL
 775              iterations.  */
 776           bool remove_exit = (exit && niter
 777                               && TREE_CODE (niter) == INTEGER_CST
 778                               && wi::leu_p (n_unroll, wi::to_widest (niter)));
 779           bool large
 780             = tree_estimate_loop_size
 781                 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 782                  param_max_completely_peeled_insns);
 783           if (large)
 784             {
 785               if (dump_file && (dump_flags & TDF_DETAILS))
 786                 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 787                          loop->num);
 788               return false;
 789             }
 790
 791           unsigned HOST_WIDE_INT ninsns = size.overall;
 792           unsigned HOST_WIDE_INT unr_insns
 793             = estimated_unrolled_size (&size, n_unroll);
 794           if (dump_file && (dump_flags & TDF_DETAILS))
 795             {
 796               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 797               fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 798                        (int) unr_insns);
 799             }
 800
 801           /* If the code is going to shrink, we don't need to be extra
 802              cautious on guessing if the unrolling is going to be
 803              profitable.  */
 804           if (unr_insns
 805               /* If there is IV variable that will become constant, we
 806                  save one instruction in the loop prologue we do not
 807                  account otherwise.  */
 808               <= ninsns + (size.constant_iv != false))
 809             ;
 810           /* We unroll only inner loops, because we do not consider it
 811              profitable otheriwse.  We still can cancel loopback edge
 812              of not rolling loop; this is always a good idea.  */
 813           else if (ul == UL_NO_GROWTH)
 814             {
 815               if (dump_file && (dump_flags & TDF_DETAILS))
 816                 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 817                          loop->num);
 818               return false;
 819             }
 820           /* Outer loops tend to be less interesting candidates for
 821              complete unrolling unless we can do a lot of propagation
 822              into the inner loop body.  For now we disable outer loop
 823              unrolling when the code would grow.  */
 824           else if (loop->inner)
 825             {
 826               if (dump_file && (dump_flags & TDF_DETAILS))
 827                 fprintf (dump_file, "Not unrolling loop %d: "
 828                          "it is not innermost and code would grow.\n",
 829                          loop->num);
 830               return false;
 831             }
 832           /* If there is call on a hot path through the loop, then
 833              there is most probably not much to optimize.  */
 834           else if (size.num_non_pure_calls_on_hot_path)
 835             {
 836               if (dump_file && (dump_flags & TDF_DETAILS))
 837                 fprintf (dump_file, "Not unrolling loop %d: "
 838                          "contains call and code would grow.\n",
 839                          loop->num);
 840               return false;
 841             }
 842           /* If there is pure/const call in the function, then we can
 843              still optimize the unrolled loop body if it contains some
 844              other interesting code than the calls and code storing or
 845              cumulating the return value.  */
 846           else if (size.num_pure_calls_on_hot_path
 847                    /* One IV increment, one test, one ivtmp store and
 848                       one useful stmt.  That is about minimal loop
 849                       doing pure call.  */
 850                    && (size.non_call_stmts_on_hot_path
 851                        <= 3 + size.num_pure_calls_on_hot_path))
 852             {
 853               if (dump_file && (dump_flags & TDF_DETAILS))
 854                 fprintf (dump_file, "Not unrolling loop %d: "
 855                          "contains just pure calls and code would grow.\n",
 856                          loop->num);
 857               return false;
 858             }
 859           /* Complete unrolling is major win when control flow is
 860              removed and one big basic block is created.  If the loop
 861              contains control flow the optimization may still be a win
 862              because of eliminating the loop overhead but it also may
 863              blow the branch predictor tables.  Limit number of
 864              branches on the hot path through the peeled sequence.  */
 865           else if (size.num_branches_on_hot_path * (int)n_unroll
 866                    > param_max_peel_branches)
 867             {
 868               if (dump_file && (dump_flags & TDF_DETAILS))
 869                 fprintf (dump_file, "Not unrolling loop %d: "
 870                          "number of branches on hot path in the unrolled "
 871                          "sequence reaches --param max-peel-branches limit.\n",
 872                          loop->num);
 873               return false;
 874             }
 875           else if (unr_insns
 876                    > (unsigned) param_max_completely_peeled_insns)
 877             {
 878               if (dump_file && (dump_flags & TDF_DETAILS))
 879                 fprintf (dump_file, "Not unrolling loop %d: "
 880                          "number of insns in the unrolled sequence reaches "
 881                          "--param max-completely-peeled-insns limit.\n",
 882                          loop->num);
 883               return false;
 884             }
 885         }
 886
 887       if (!dbg_cnt (gimple_unroll))
 888         return false;
 889
 890       initialize_original_copy_tables ();
 891       auto_sbitmap wont_exit (n_unroll + 1);
 892       if (exit && niter
 893           && TREE_CODE (niter) == INTEGER_CST
 894           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 895         {
 896           bitmap_ones (wont_exit);
 897           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 898               || edge_to_cancel)
 899             bitmap_clear_bit (wont_exit, 0);
 900         }
 901       else
 902         {
 903           exit = NULL;
 904           bitmap_clear (wont_exit);
 905         }
 906       if (may_be_zero)
 907         bitmap_clear_bit (wont_exit, 1);
 908
 909       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 910                                                  n_unroll, wont_exit,
 911                                                  exit, &edges_to_remove,
 912                                                  DLTHE_FLAG_UPDATE_FREQ
 913                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 914         {
 915           free_original_copy_tables ();
 916           if (dump_file && (dump_flags & TDF_DETAILS))
 917             fprintf (dump_file, "Failed to duplicate the loop\n");
 918           return false;
 919         }
 920
 921       free_original_copy_tables ();
 922     }
 923
 924   /* Remove the conditional from the last copy of the loop.  */
 925   if (edge_to_cancel)
 926     {
 927       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
 928       force_edge_cold (edge_to_cancel, true);
 929       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 930         gimple_cond_make_false (cond);
 931       else
 932         gimple_cond_make_true (cond);
 933       update_stmt (cond);
 934       /* Do not remove the path, as doing so may remove outer loop and
 935          confuse bookkeeping code in tree_unroll_loops_completely.  */
 936     }
 937
 938   /* Store the loop for later unlooping and exit removal.  */
 939   loops_to_unloop.safe_push (loop);
 940   loops_to_unloop_nunroll.safe_push (n_unroll);
 941
 942   if (dump_enabled_p ())
 943     {
 944       if (!n_unroll)
 945         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 946                          "loop turned into non-loop; it never loops\n");
 947       else
 948         {
 949           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 950                            "loop with %d iterations completely unrolled",
 951                            (int) n_unroll);
 952           if (loop->header->count.initialized_p ())
 953             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 954                          " (header execution count %d)",
 955                          (int)loop->header->count.to_gcov_type ());
 956           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 957         }
 958     }
 959
 960   if (dump_file && (dump_flags & TDF_DETAILS))
 961     {
 962       if (exit)
 963         fprintf (dump_file, "Exit condition of peeled iterations was "
 964                  "eliminated.\n");
 965       if (edge_to_cancel)
 966         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
 967       else
 968         fprintf (dump_file, "Latch of last iteration was marked by "
 969                  "__builtin_unreachable ().\n");
 970     }
 971
 972   return true;
 973 }
 974
 975 /* Return number of instructions after peeling.  */
 976 static unsigned HOST_WIDE_INT
 977 estimated_peeled_sequence_size (struct loop_size *size,
 978                                 unsigned HOST_WIDE_INT npeel)
 979 {
 980   return MAX (npeel * (HOST_WIDE_INT) (size->overall
 981                                        - size->eliminated_by_peeling), 1);
 982 }
 983
 984 /* If the loop is expected to iterate N times and is
 985    small enough, duplicate the loop body N+1 times before
 986    the loop itself.  This way the hot path will never
 987    enter the loop.
 988    Parameters are the same as for try_unroll_loops_completely */
 989
 990 static bool
 991 try_peel_loop (class loop *loop,
 992                edge exit, tree niter, bool may_be_zero,
 993                HOST_WIDE_INT maxiter)
 994 {
 995   HOST_WIDE_INT npeel;
 996   struct loop_size size;
 997   int peeled_size;
 998
 999   if (!flag_peel_loops
1000       || param_max_peel_times <= 0
1001       || !peeled_loops)
1002     return false;
1003
1004   if (bitmap_bit_p (peeled_loops, loop->num))
1005     {
1006       if (dump_file)
1007         fprintf (dump_file, "Not peeling: loop is already peeled\n");
1008       return false;
1009     }
1010
1011   /* We don't peel loops that will be unrolled as this can duplicate a
1012      loop more times than the user requested.  */
1013   if (loop->unroll)
1014     {
1015       if (dump_file)
1016         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1017       return false;
1018     }
1019
1020   /* Peel only innermost loops.
1021      While the code is perfectly capable of peeling non-innermost loops,
1022      the heuristics would probably need some improvements. */
1023   if (loop->inner)
1024     {
1025       if (dump_file)
1026         fprintf (dump_file, "Not peeling: outer loop\n");
1027       return false;
1028     }
1029
1030   if (!optimize_loop_for_speed_p (loop))
1031     {
1032       if (dump_file)
1033         fprintf (dump_file, "Not peeling: cold loop\n");
1034       return false;
1035     }
1036
1037   /* Check if there is an estimate on the number of iterations.  */
1038   npeel = estimated_loop_iterations_int (loop);
1039   if (npeel < 0)
1040     npeel = likely_max_loop_iterations_int (loop);
1041   if (npeel < 0)
1042     {
1043       if (dump_file)
1044         fprintf (dump_file, "Not peeling: number of iterations is not "
1045                  "estimated\n");
1046       return false;
1047     }
1048   if (maxiter >= 0 && maxiter <= npeel)
1049     {
1050       if (dump_file)
1051         fprintf (dump_file, "Not peeling: upper bound is known so can "
1052                  "unroll completely\n");
1053       return false;
1054     }
1055
1056   /* We want to peel estimated number of iterations + 1 (so we never
1057      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1058      and be sure to avoid overflows.  */
1059   if (npeel > param_max_peel_times - 1)
1060     {
1061       if (dump_file)
1062         fprintf (dump_file, "Not peeling: rolls too much "
1063                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1064       return false;
1065     }
1066   npeel++;
1067
1068   /* Check peeled loops size.  */
1069   tree_estimate_loop_size (loop, exit, NULL, &size,
1070                            param_max_peeled_insns);
1071   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1072       > param_max_peeled_insns)
1073     {
1074       if (dump_file)
1075         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1076                  "(%i insns > --param max-peel-insns)", peeled_size);
1077       return false;
1078     }
1079
1080   if (!dbg_cnt (gimple_unroll))
1081     return false;
1082
1083   /* Duplicate possibly eliminating the exits.  */
1084   initialize_original_copy_tables ();
1085   auto_sbitmap wont_exit (npeel + 1);
1086   if (exit && niter
1087       && TREE_CODE (niter) == INTEGER_CST
1088       && wi::leu_p (npeel, wi::to_widest (niter)))
1089     {
1090       bitmap_ones (wont_exit);
1091       bitmap_clear_bit (wont_exit, 0);
1092     }
1093   else
1094     {
1095       exit = NULL;
1096       bitmap_clear (wont_exit);
1097     }
1098   if (may_be_zero)
1099     bitmap_clear_bit (wont_exit, 1);
1100   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1101                                              npeel, wont_exit,
1102                                              exit, &edges_to_remove,
1103                                              DLTHE_FLAG_UPDATE_FREQ))
1104     {
1105       free_original_copy_tables ();
1106       return false;
1107     }
1108   free_original_copy_tables ();
1109   if (dump_file && (dump_flags & TDF_DETAILS))
1110     {
1111       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1112                loop->num, (int) npeel);
1113     }
1114   if (loop->any_estimate)
1115     {
1116       if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1117         loop->nb_iterations_estimate -= npeel;
1118       else
1119         loop->nb_iterations_estimate = 0;
1120     }
1121   if (loop->any_upper_bound)
1122     {
1123       if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
1124         loop->nb_iterations_upper_bound -= npeel;
1125       else
1126         loop->nb_iterations_upper_bound = 0;
1127     }
1128   if (loop->any_likely_upper_bound)
1129     {
1130       if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
1131         loop->nb_iterations_likely_upper_bound -= npeel;
1132       else
1133         {
1134           loop->any_estimate = true;
1135           loop->nb_iterations_estimate = 0;
1136           loop->nb_iterations_likely_upper_bound = 0;
1137         }
1138     }
1139   profile_count entry_count = profile_count::zero ();
1140
1141   edge e;
1142   edge_iterator ei;
1143   FOR_EACH_EDGE (e, ei, loop->header->preds)
1144     if (e->src != loop->latch)
1145       {
1146         if (e->src->count.initialized_p ())
1147           entry_count += e->src->count;
1148         gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1149       }
1150   profile_probability p;
1151   p = entry_count.probability_in (loop->header->count);
1152   scale_loop_profile (loop, p, 0);
1153   bitmap_set_bit (peeled_loops, loop->num);
1154   return true;
1155 }
1156 /* Adds a canonical induction variable to LOOP if suitable.
1157    CREATE_IV is true if we may create a new iv.  UL determines
1158    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1159    to determine the number of iterations of a loop by direct evaluation.
1160    Returns true if cfg is changed.   */
1161
1162 static bool
1163 canonicalize_loop_induction_variables (class loop *loop,
1164                                        bool create_iv, enum unroll_level ul,
1165                                        bool try_eval, bool allow_peel)
1166 {
1167   edge exit = NULL;
1168   tree niter;
1169   HOST_WIDE_INT maxiter;
1170   bool modified = false;
1171   dump_user_location_t locus;
1172   class tree_niter_desc niter_desc;
1173   bool may_be_zero = false;
1174
1175   /* For unrolling allow conditional constant or zero iterations, thus
1176      perform loop-header copying on-the-fly.  */
1177   exit = single_exit (loop);
1178   niter = chrec_dont_know;
1179   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1180     {
1181       niter = niter_desc.niter;
1182       may_be_zero
1183         = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1184     }
1185   if (TREE_CODE (niter) == INTEGER_CST)
1186     locus = last_stmt (exit->src);
1187   else
1188     {
1189       /* For non-constant niter fold may_be_zero into niter again.  */
1190       if (may_be_zero)
1191         {
1192           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1193             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1194                                  niter_desc.may_be_zero,
1195                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1196           else
1197             niter = chrec_dont_know;
1198           may_be_zero = false;
1199         }
1200
1201       /* If the loop has more than one exit, try checking all of them
1202          for # of iterations determinable through scev.  */
1203       if (!exit)
1204         niter = find_loop_niter (loop, &exit);
1205
1206       /* Finally if everything else fails, try brute force evaluation.  */
1207       if (try_eval
1208           && (chrec_contains_undetermined (niter)
1209               || TREE_CODE (niter) != INTEGER_CST))
1210         niter = find_loop_niter_by_eval (loop, &exit);
1211
1212       if (exit)
1213         locus = last_stmt (exit->src);
1214
1215       if (TREE_CODE (niter) != INTEGER_CST)
1216         exit = NULL;
1217     }
1218
1219   /* We work exceptionally hard here to estimate the bound
1220      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1221   if (niter && TREE_CODE (niter) == INTEGER_CST)
1222     {
1223       record_niter_bound (loop, wi::to_widest (niter),
1224                           exit == single_likely_exit (loop), true);
1225     }
1226
1227   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1228   maxiter = max_loop_iterations_int (loop);
1229
1230   if (dump_file && (dump_flags & TDF_DETAILS)
1231       && TREE_CODE (niter) == INTEGER_CST)
1232     {
1233       fprintf (dump_file, "Loop %d iterates ", loop->num);
1234       print_generic_expr (dump_file, niter, TDF_SLIM);
1235       fprintf (dump_file, " times.\n");
1236     }
1237   if (dump_file && (dump_flags & TDF_DETAILS)
1238       && maxiter >= 0)
1239     {
1240       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1241                (int)maxiter);
1242     }
1243   if (dump_file && (dump_flags & TDF_DETAILS)
1244       && likely_max_loop_iterations_int (loop) >= 0)
1245     {
1246       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1247                loop->num, (int)likely_max_loop_iterations_int (loop));
1248     }
1249
1250   /* Remove exits that are known to be never taken based on loop bound.
1251      Needs to be called after compilation of max_loop_iterations_int that
1252      populates the loop bounds.  */
1253   modified |= remove_redundant_iv_tests (loop);
1254
1255   if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1256                                   maxiter, locus, allow_peel))
1257     return true;
1258
1259   if (create_iv
1260       && niter && !chrec_contains_undetermined (niter)
1261       && exit && just_once_each_iteration_p (loop, exit->src))
1262     {
1263       tree iv_niter = niter;
1264       if (may_be_zero)
1265         {
1266           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1267             iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1268                                     niter_desc.may_be_zero,
1269                                     build_int_cst (TREE_TYPE (iv_niter), 0),
1270                                     iv_niter);
1271           else
1272             iv_niter = NULL_TREE;
1273         }
1274       if (iv_niter)
1275         create_canonical_iv (loop, exit, iv_niter);
1276     }
1277
1278   if (ul == UL_ALL)
1279     modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1280
1281   return modified;
1282 }
1283
1284 /* The main entry point of the pass.  Adds canonical induction variables
1285    to the suitable loops.  */
1286
1287 unsigned int
1288 canonicalize_induction_variables (void)
1289 {
1290   class loop *loop;
1291   bool changed = false;
1292   bool irred_invalidated = false;
1293   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1294
1295   estimate_numbers_of_iterations (cfun);
1296
1297   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1298     {
1299       changed |= canonicalize_loop_induction_variables (loop,
1300                                                         true, UL_SINGLE_ITER,
1301                                                         true, false);
1302     }
1303   gcc_assert (!need_ssa_update_p (cfun));
1304
1305   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1306   if (irred_invalidated
1307       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1308     mark_irreducible_loops ();
1309
1310   /* Clean up the information about numbers of iterations, since brute force
1311      evaluation could reveal new information.  */
1312   free_numbers_of_iterations_estimates (cfun);
1313   scev_reset ();
1314
1315   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1316     {
1317       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1318       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1319     }
1320   BITMAP_FREE (loop_closed_ssa_invalidated);
1321
1322   if (changed)
1323     return TODO_cleanup_cfg;
1324   return 0;
1325 }
1326
1327 /* Process loops from innermost to outer, stopping at the innermost
1328    loop we unrolled.  */
1329
1330 static bool
1331 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1332                                 bitmap father_bbs, class loop *loop)
1333 {
1334   class loop *loop_father;
1335   bool changed = false;
1336   class loop *inner;
1337   enum unroll_level ul;
1338   unsigned num = number_of_loops (cfun);
1339
1340   /* Process inner loops first.  Don't walk loops added by the recursive
1341      calls because SSA form is not up-to-date.  They can be handled in the
1342      next iteration.  */
1343   bitmap child_father_bbs = NULL;
1344   for (inner = loop->inner; inner != NULL; inner = inner->next)
1345     if ((unsigned) inner->num < num)
1346       {
1347         if (!child_father_bbs)
1348           child_father_bbs = BITMAP_ALLOC (NULL);
1349         if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1350                                             child_father_bbs, inner))
1351           {
1352             bitmap_ior_into (father_bbs, child_father_bbs);
1353             bitmap_clear (child_father_bbs);
1354             changed = true;
1355           }
1356       }
1357   if (child_father_bbs)
1358     BITMAP_FREE (child_father_bbs);
1359
1360   /* If we changed an inner loop we cannot process outer loops in this
1361      iteration because SSA form is not up-to-date.  Continue with
1362      siblings of outer loops instead.  */
1363   if (changed)
1364     {
1365       /* If we are recorded as father clear all other fathers that
1366          are necessarily covered already to avoid redundant work.  */
1367       if (bitmap_bit_p (father_bbs, loop->header->index))
1368         {
1369           bitmap_clear (father_bbs);
1370           bitmap_set_bit (father_bbs, loop->header->index);
1371         }
1372       return true;
1373     }
1374
1375   /* Don't unroll #pragma omp simd loops until the vectorizer
1376      attempts to vectorize those.  */
1377   if (loop->force_vectorize)
1378     return false;
1379
1380   /* Try to unroll this loop.  */
1381   loop_father = loop_outer (loop);
1382   if (!loop_father)
1383     return false;
1384
1385   if (loop->unroll > 1)
1386     ul = UL_ALL;
1387   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1388       /* Unroll outermost loops only if asked to do so or they do
1389          not cause code growth.  */
1390       && (unroll_outer || loop_outer (loop_father)))
1391     ul = UL_ALL;
1392   else
1393     ul = UL_NO_GROWTH;
1394
1395   if (canonicalize_loop_induction_variables
1396         (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
1397     {
1398       /* If we'll continue unrolling, we need to propagate constants
1399          within the new basic blocks to fold away induction variable
1400          computations; otherwise, the size might blow up before the
1401          iteration is complete and the IR eventually cleaned up.  */
1402       if (loop_outer (loop_father))
1403         {
1404           /* Once we process our father we will have processed
1405              the fathers of our children as well, so avoid doing
1406              redundant work and clear fathers we've gathered sofar.  */
1407           bitmap_clear (father_bbs);
1408           bitmap_set_bit (father_bbs, loop_father->header->index);
1409         }
1410
1411       return true;
1412     }
1413
1414   return false;
1415 }
1416
1417 /* Unroll LOOPS completely if they iterate just few times.  Unless
1418    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1419    size of the code does not increase.  */
1420
1421 static unsigned int
1422 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1423 {
1424   bitmap father_bbs = BITMAP_ALLOC (NULL);
1425   bool changed;
1426   int iteration = 0;
1427   bool irred_invalidated = false;
1428
1429   estimate_numbers_of_iterations (cfun);
1430
1431   do
1432     {
1433       changed = false;
1434       bitmap loop_closed_ssa_invalidated = NULL;
1435
1436       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1437         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1438
1439       free_numbers_of_iterations_estimates (cfun);
1440       estimate_numbers_of_iterations (cfun);
1441
1442       changed = tree_unroll_loops_completely_1 (may_increase_size,
1443                                                 unroll_outer, father_bbs,
1444                                                 current_loops->tree_root);
1445       if (changed)
1446         {
1447           unsigned i;
1448
1449           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1450
1451           /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
1452           if (loop_closed_ssa_invalidated
1453               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1454             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1455                                           TODO_update_ssa);
1456           else
1457             update_ssa (TODO_update_ssa);
1458
1459           /* father_bbs is a bitmap of loop father header BB indices.
1460              Translate that to what non-root loops these BBs belong to now.  */
1461           bitmap_iterator bi;
1462           bitmap fathers = BITMAP_ALLOC (NULL);
1463           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1464             {
1465               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1466               if (! unrolled_loop_bb)
1467                 continue;
1468               if (loop_outer (unrolled_loop_bb->loop_father))
1469                 bitmap_set_bit (fathers,
1470                                 unrolled_loop_bb->loop_father->num);
1471             }
1472           bitmap_clear (father_bbs);
1473           /* Propagate the constants within the new basic blocks.  */
1474           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1475             {
1476               loop_p father = get_loop (cfun, i);
1477               bitmap exit_bbs = BITMAP_ALLOC (NULL);
1478               loop_exit *exit = father->exits->next;
1479               while (exit->e)
1480                 {
1481                   bitmap_set_bit (exit_bbs, exit->e->dest->index);
1482                   exit = exit->next;
1483                 }
1484               do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1485             }
1486           BITMAP_FREE (fathers);
1487
1488           /* This will take care of removing completely unrolled loops
1489              from the loop structures so we can continue unrolling now
1490              innermost loops.  */
1491           if (cleanup_tree_cfg ())
1492             update_ssa (TODO_update_ssa_only_virtuals);
1493
1494           /* Clean up the information about numbers of iterations, since
1495              complete unrolling might have invalidated it.  */
1496           scev_reset ();
1497           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1498             verify_loop_closed_ssa (true);
1499         }
1500       if (loop_closed_ssa_invalidated)
1501         BITMAP_FREE (loop_closed_ssa_invalidated);
1502     }
1503   while (changed
1504          && ++iteration <= param_max_unroll_iterations);
1505
1506   BITMAP_FREE (father_bbs);
1507
1508   if (irred_invalidated
1509       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1510     mark_irreducible_loops ();
1511
1512   return 0;
1513 }
1514
1515 /* Canonical induction variable creation pass.  */
1516
1517 namespace {
1518
1519 const pass_data pass_data_iv_canon =
1520 {
1521   GIMPLE_PASS, /* type */
1522   "ivcanon", /* name */
1523   OPTGROUP_LOOP, /* optinfo_flags */
1524   TV_TREE_LOOP_IVCANON, /* tv_id */
1525   ( PROP_cfg | PROP_ssa ), /* properties_required */
1526   0, /* properties_provided */
1527   0, /* properties_destroyed */
1528   0, /* todo_flags_start */
1529   0, /* todo_flags_finish */
1530 };
1531
1532 class pass_iv_canon : public gimple_opt_pass
1533 {
1534 public:
1535   pass_iv_canon (gcc::context *ctxt)
1536     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1537   {}
1538
1539   /* opt_pass methods: */
1540   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1541   virtual unsigned int execute (function *fun);
1542
1543 }; // class pass_iv_canon
1544
1545 unsigned int
1546 pass_iv_canon::execute (function *fun)
1547 {
1548   if (number_of_loops (fun) <= 1)
1549     return 0;
1550
1551   return canonicalize_induction_variables ();
1552 }
1553
1554 } // anon namespace
1555
1556 gimple_opt_pass *
1557 make_pass_iv_canon (gcc::context *ctxt)
1558 {
1559   return new pass_iv_canon (ctxt);
1560 }
1561
1562 /* Complete unrolling of loops.  */
1563
1564 namespace {
1565
1566 const pass_data pass_data_complete_unroll =
1567 {
1568   GIMPLE_PASS, /* type */
1569   "cunroll", /* name */
1570   OPTGROUP_LOOP, /* optinfo_flags */
1571   TV_COMPLETE_UNROLL, /* tv_id */
1572   ( PROP_cfg | PROP_ssa ), /* properties_required */
1573   0, /* properties_provided */
1574   0, /* properties_destroyed */
1575   0, /* todo_flags_start */
1576   0, /* todo_flags_finish */
1577 };
1578
1579 class pass_complete_unroll : public gimple_opt_pass
1580 {
1581 public:
1582   pass_complete_unroll (gcc::context *ctxt)
1583     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1584   {}
1585
1586   /* opt_pass methods: */
1587   virtual unsigned int execute (function *);
1588
1589 }; // class pass_complete_unroll
1590
1591 unsigned int
1592 pass_complete_unroll::execute (function *fun)
1593 {
1594   if (number_of_loops (fun) <= 1)
1595     return 0;
1596
1597   /* If we ever decide to run loop peeling more than once, we will need to
1598      track loops already peeled in loop structures themselves to avoid
1599      re-peeling the same loop multiple times.  */
1600   if (flag_peel_loops)
1601     peeled_loops = BITMAP_ALLOC (NULL);
1602   unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
1603                                                    || flag_peel_loops
1604                                                    || optimize >= 3, true);
1605   if (peeled_loops)
1606     {
1607       BITMAP_FREE (peeled_loops);
1608       peeled_loops = NULL;
1609     }
1610   return val;
1611 }
1612
1613 } // anon namespace
1614
1615 gimple_opt_pass *
1616 make_pass_complete_unroll (gcc::context *ctxt)
1617 {
1618   return new pass_complete_unroll (ctxt);
1619 }
1620
1621 /* Complete unrolling of inner loops.  */
1622
1623 namespace {
1624
1625 const pass_data pass_data_complete_unrolli =
1626 {
1627   GIMPLE_PASS, /* type */
1628   "cunrolli", /* name */
1629   OPTGROUP_LOOP, /* optinfo_flags */
1630   TV_COMPLETE_UNROLL, /* tv_id */
1631   ( PROP_cfg | PROP_ssa ), /* properties_required */
1632   0, /* properties_provided */
1633   0, /* properties_destroyed */
1634   0, /* todo_flags_start */
1635   0, /* todo_flags_finish */
1636 };
1637
1638 class pass_complete_unrolli : public gimple_opt_pass
1639 {
1640 public:
1641   pass_complete_unrolli (gcc::context *ctxt)
1642     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1643   {}
1644
1645   /* opt_pass methods: */
1646   virtual bool gate (function *) { return optimize >= 2; }
1647   virtual unsigned int execute (function *);
1648
1649 }; // class pass_complete_unrolli
1650
1651 unsigned int
1652 pass_complete_unrolli::execute (function *fun)
1653 {
1654   unsigned ret = 0;
1655
1656   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1657   if (number_of_loops (fun) > 1)
1658     {
1659       scev_initialize ();
1660       ret = tree_unroll_loops_completely (optimize >= 3, false);
1661       scev_finalize ();
1662     }
1663   loop_optimizer_finalize ();
1664
1665   return ret;
1666 }
1667
1668 } // anon namespace
1669
1670 gimple_opt_pass *
1671 make_pass_complete_unrolli (gcc::context *ctxt)
1672 {
1673   return new pass_complete_unrolli (ctxt);
1674 }
1675
1676