gcc/loop-doloop.c

   1 /* Perform doloop optimizations
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3    Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "flags.h"
  27 #include "symtab.h"
  28 #include "hard-reg-set.h"
  29 #include "function.h"
  30 #include "alias.h"
  31 #include "tree.h"
  32 #include "insn-config.h"
  33 #include "expmed.h"
  34 #include "dojump.h"
  35 #include "explow.h"
  36 #include "calls.h"
  37 #include "emit-rtl.h"
  38 #include "varasm.h"
  39 #include "stmt.h"
  40 #include "expr.h"
  41 #include "diagnostic-core.h"
  42 #include "tm_p.h"
  43 #include "predict.h"
  44 #include "dominance.h"
  45 #include "cfg.h"
  46 #include "cfgloop.h"
  47 #include "cfgrtl.h"
  48 #include "basic-block.h"
  49 #include "params.h"
  50 #include "target.h"
  51 #include "dumpfile.h"
  52 #include "loop-unroll.h"
  53
  54 /* This module is used to modify loops with a determinable number of
  55    iterations to use special low-overhead looping instructions.
  56
  57    It first validates whether the loop is well behaved and has a
  58    determinable number of iterations (either at compile or run-time).
  59    It then modifies the loop to use a low-overhead looping pattern as
  60    follows:
  61
  62    1. A pseudo register is allocated as the loop iteration counter.
  63
  64    2. The number of loop iterations is calculated and is stored
  65       in the loop counter.
  66
  67    3. At the end of the loop, the jump insn is replaced by the
  68       doloop_end pattern.  The compare must remain because it might be
  69       used elsewhere.  If the loop-variable or condition register are
  70       used elsewhere, they will be eliminated by flow.
  71
  72    4. An optional doloop_begin pattern is inserted at the top of the
  73       loop.
  74
  75    TODO The optimization should only performed when either the biv used for exit
  76    condition is unused at all except for the exit test, or if we do not have to
  77    change its value, since otherwise we have to add a new induction variable,
  78    which usually will not pay up (unless the cost of the doloop pattern is
  79    somehow extremely lower than the cost of compare & jump, or unless the bct
  80    register cannot be used for anything else but doloop -- ??? detect these
  81    cases).  */
  82
  83 #ifdef HAVE_doloop_end
  84
  85 /* Return the loop termination condition for PATTERN or zero
  86    if it is not a decrement and branch jump insn.  */
  87
  88 rtx
  89 doloop_condition_get (rtx doloop_pat)
  90 {
  91   rtx cmp;
  92   rtx inc;
  93   rtx reg;
  94   rtx inc_src;
  95   rtx condition;
  96   rtx pattern;
  97   rtx cc_reg = NULL_RTX;
  98   rtx reg_orig = NULL_RTX;
  99
 100   /* The canonical doloop pattern we expect has one of the following
 101      forms:
 102
 103      1)  (parallel [(set (pc) (if_then_else (condition)
 104                                             (label_ref (label))
 105                                             (pc)))
 106                      (set (reg) (plus (reg) (const_int -1)))
 107                      (additional clobbers and uses)])
 108
 109      The branch must be the first entry of the parallel (also required
 110      by jump.c), and the second entry of the parallel must be a set of
 111      the loop counter register.  Some targets (IA-64) wrap the set of
 112      the loop counter in an if_then_else too.
 113
 114      2)  (set (reg) (plus (reg) (const_int -1))
 115          (set (pc) (if_then_else (reg != 0)
 116                                  (label_ref (label))
 117                                  (pc))).
 118
 119      Some targets (ARM) do the comparison before the branch, as in the
 120      following form:
 121
 122      3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
 123                    (set (reg) (plus (reg) (const_int -1)))])
 124         (set (pc) (if_then_else (cc == NE)
 125                                 (label_ref (label))
 126                                 (pc))) */
 127
 128   pattern = PATTERN (doloop_pat);
 129
 130   if (GET_CODE (pattern) != PARALLEL)
 131     {
 132       rtx cond;
 133       rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
 134       rtx cmp_arg1, cmp_arg2;
 135       rtx cmp_orig;
 136
 137       /* In case the pattern is not PARALLEL we expect two forms
 138          of doloop which are cases 2) and 3) above: in case 2) the
 139          decrement immediately precedes the branch, while in case 3)
 140          the compare and decrement instructions immediately precede
 141          the branch.  */
 142
 143       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
 144         return 0;
 145
 146       cmp = pattern;
 147       if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
 148         {
 149           /* The third case: the compare and decrement instructions
 150              immediately precede the branch.  */
 151           cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
 152           if (GET_CODE (cmp_orig) != SET)
 153             return 0;
 154           if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
 155             return 0;
 156           cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
 157           cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
 158           if (cmp_arg2 != const0_rtx
 159               || GET_CODE (cmp_arg1) != PLUS)
 160             return 0;
 161           reg_orig = XEXP (cmp_arg1, 0);
 162           if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
 163               || !REG_P (reg_orig))
 164             return 0;
 165           cc_reg = SET_DEST (cmp_orig);
 166
 167           inc = XVECEXP (PATTERN (prev_insn), 0, 1);
 168         }
 169       else
 170         inc = PATTERN (prev_insn);
 171       /* We expect the condition to be of the form (reg != 0)  */
 172       cond = XEXP (SET_SRC (cmp), 0);
 173       if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
 174         return 0;
 175     }
 176   else
 177     {
 178       cmp = XVECEXP (pattern, 0, 0);
 179       inc = XVECEXP (pattern, 0, 1);
 180     }
 181
 182   /* Check for (set (reg) (something)).  */
 183   if (GET_CODE (inc) != SET)
 184     return 0;
 185   reg = SET_DEST (inc);
 186   if (! REG_P (reg))
 187     return 0;
 188
 189   /* Check if something = (plus (reg) (const_int -1)).
 190      On IA-64, this decrement is wrapped in an if_then_else.  */
 191   inc_src = SET_SRC (inc);
 192   if (GET_CODE (inc_src) == IF_THEN_ELSE)
 193     inc_src = XEXP (inc_src, 1);
 194   if (GET_CODE (inc_src) != PLUS
 195       || XEXP (inc_src, 0) != reg
 196       || XEXP (inc_src, 1) != constm1_rtx)
 197     return 0;
 198
 199   /* Check for (set (pc) (if_then_else (condition)
 200                                        (label_ref (label))
 201                                        (pc))).  */
 202   if (GET_CODE (cmp) != SET
 203       || SET_DEST (cmp) != pc_rtx
 204       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
 205       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
 206       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
 207     return 0;
 208
 209   /* Extract loop termination condition.  */
 210   condition = XEXP (SET_SRC (cmp), 0);
 211
 212   /* We expect a GE or NE comparison with 0 or 1.  */
 213   if ((GET_CODE (condition) != GE
 214        && GET_CODE (condition) != NE)
 215       || (XEXP (condition, 1) != const0_rtx
 216           && XEXP (condition, 1) != const1_rtx))
 217     return 0;
 218
 219   if ((XEXP (condition, 0) == reg)
 220       /* For the third case:  */
 221       || ((cc_reg != NULL_RTX)
 222           && (XEXP (condition, 0) == cc_reg)
 223           && (reg_orig == reg))
 224       || (GET_CODE (XEXP (condition, 0)) == PLUS
 225           && XEXP (XEXP (condition, 0), 0) == reg))
 226    {
 227      if (GET_CODE (pattern) != PARALLEL)
 228      /*  For the second form we expect:
 229
 230          (set (reg) (plus (reg) (const_int -1))
 231          (set (pc) (if_then_else (reg != 0)
 232                                  (label_ref (label))
 233                                  (pc))).
 234
 235          is equivalent to the following:
 236
 237          (parallel [(set (pc) (if_then_else (reg != 1)
 238                                             (label_ref (label))
 239                                             (pc)))
 240                      (set (reg) (plus (reg) (const_int -1)))
 241                      (additional clobbers and uses)])
 242
 243         For the third form we expect:
 244
 245         (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
 246                    (set (reg) (plus (reg) (const_int -1)))])
 247         (set (pc) (if_then_else (cc == NE)
 248                                 (label_ref (label))
 249                                 (pc)))
 250
 251         which is equivalent to the following:
 252
 253         (parallel [(set (cc) (compare (reg,  1))
 254                    (set (reg) (plus (reg) (const_int -1)))
 255                    (set (pc) (if_then_else (NE == cc)
 256                                            (label_ref (label))
 257                                            (pc))))])
 258
 259         So we return the second form instead for the two cases.
 260
 261      */
 262         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
 263
 264     return condition;
 265    }
 266
 267   /* ??? If a machine uses a funny comparison, we could return a
 268      canonicalized form here.  */
 269
 270   return 0;
 271 }
 272
 273 /* Return nonzero if the loop specified by LOOP is suitable for
 274    the use of special low-overhead looping instructions.  DESC
 275    describes the number of iterations of the loop.  */
 276
 277 static bool
 278 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
 279 {
 280   basic_block *body = get_loop_body (loop), bb;
 281   rtx_insn *insn;
 282   unsigned i;
 283   bool result = true;
 284
 285   /* Check for loops that may not terminate under special conditions.  */
 286   if (!desc->simple_p
 287       || desc->assumptions
 288       || desc->infinite)
 289     {
 290       /* There are some cases that would require a special attention.
 291          For example if the comparison is LEU and the comparison value
 292          is UINT_MAX then the loop will not terminate.  Similarly, if the
 293          comparison code is GEU and the comparison value is 0, the
 294          loop will not terminate.
 295
 296          If the absolute increment is not 1, the loop can be infinite
 297          even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
 298
 299          ??? We could compute these conditions at run-time and have a
 300          additional jump around the loop to ensure an infinite loop.
 301          However, it is very unlikely that this is the intended
 302          behavior of the loop and checking for these rare boundary
 303          conditions would pessimize all other code.
 304
 305          If the loop is executed only a few times an extra check to
 306          restart the loop could use up most of the benefits of using a
 307          count register loop.  Note however, that normally, this
 308          restart branch would never execute, so it could be predicted
 309          well by the CPU.  We should generate the pessimistic code by
 310          default, and have an option, e.g. -funsafe-loops that would
 311          enable count-register loops in this case.  */
 312       if (dump_file)
 313         fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
 314       result = false;
 315       goto cleanup;
 316     }
 317
 318   for (i = 0; i < loop->num_nodes; i++)
 319     {
 320       bb = body[i];
 321
 322       for (insn = BB_HEAD (bb);
 323            insn != NEXT_INSN (BB_END (bb));
 324            insn = NEXT_INSN (insn))
 325         {
 326           /* Different targets have different necessities for low-overhead
 327              looping.  Call the back end for each instruction within the loop
 328              to let it decide whether the insn prohibits a low-overhead loop.
 329              It will then return the cause for it to emit to the dump file.  */
 330           const char * invalid = targetm.invalid_within_doloop (insn);
 331           if (invalid)
 332             {
 333               if (dump_file)
 334                 fprintf (dump_file, "Doloop: %s\n", invalid);
 335               result = false;
 336               goto cleanup;
 337             }
 338         }
 339     }
 340   result = true;
 341
 342 cleanup:
 343   free (body);
 344
 345   return result;
 346 }
 347
 348 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
 349    edge.  If the condition is always false, do not do anything.  If it is always
 350    true, redirect E to DEST and return false.  In all other cases, true is
 351    returned.  */
 352
 353 static bool
 354 add_test (rtx cond, edge *e, basic_block dest)
 355 {
 356   rtx_insn *seq, *jump;
 357   rtx_code_label *label;
 358   machine_mode mode;
 359   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
 360   enum rtx_code code = GET_CODE (cond);
 361   basic_block bb;
 362
 363   mode = GET_MODE (XEXP (cond, 0));
 364   if (mode == VOIDmode)
 365     mode = GET_MODE (XEXP (cond, 1));
 366
 367   start_sequence ();
 368   op0 = force_operand (op0, NULL_RTX);
 369   op1 = force_operand (op1, NULL_RTX);
 370   label = block_label (dest);
 371   do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label, -1);
 372
 373   jump = get_last_insn ();
 374   if (!jump || !JUMP_P (jump))
 375     {
 376       /* The condition is always false and the jump was optimized out.  */
 377       end_sequence ();
 378       return true;
 379     }
 380
 381   seq = get_insns ();
 382   end_sequence ();
 383
 384   /* There always is at least the jump insn in the sequence.  */
 385   gcc_assert (seq != NULL_RTX);
 386
 387   bb = split_edge_and_insert (*e, seq);
 388   *e = single_succ_edge (bb);
 389
 390   if (any_uncondjump_p (jump))
 391     {
 392       /* The condition is always true.  */
 393       delete_insn (jump);
 394       redirect_edge_and_branch_force (*e, dest);
 395       return false;
 396     }
 397
 398   JUMP_LABEL (jump) = label;
 399
 400   /* The jump is supposed to handle an unlikely special case.  */
 401   add_int_reg_note (jump, REG_BR_PROB, 0);
 402
 403   LABEL_NUSES (label)++;
 404
 405   make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
 406   return true;
 407 }
 408
 409 /* Modify the loop to use the low-overhead looping insn where LOOP
 410    describes the loop, DESC describes the number of iterations of the
 411    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
 412    end of the loop.  CONDITION is the condition separated from the
 413    DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
 414
 415 static void
 416 doloop_modify (struct loop *loop, struct niter_desc *desc,
 417                rtx doloop_seq, rtx condition, rtx count)
 418 {
 419   rtx counter_reg;
 420   rtx tmp, noloop = NULL_RTX;
 421   rtx_insn *sequence;
 422   rtx_insn *jump_insn;
 423   rtx_code_label *jump_label;
 424   int nonneg = 0;
 425   bool increment_count;
 426   basic_block loop_end = desc->out_edge->src;
 427   machine_mode mode;
 428   rtx true_prob_val;
 429   widest_int iterations;
 430
 431   jump_insn = BB_END (loop_end);
 432
 433   if (dump_file)
 434     {
 435       fprintf (dump_file, "Doloop: Inserting doloop pattern (");
 436       if (desc->const_iter)
 437         fprintf (dump_file, "%" PRId64, desc->niter);
 438       else
 439         fputs ("runtime", dump_file);
 440       fputs (" iterations).\n", dump_file);
 441     }
 442
 443   /* Get the probability of the original branch. If it exists we would
 444      need to update REG_BR_PROB of the new jump_insn.  */
 445   true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX);
 446
 447   /* Discard original jump to continue loop.  The original compare
 448      result may still be live, so it cannot be discarded explicitly.  */
 449   delete_insn (jump_insn);
 450
 451   counter_reg = XEXP (condition, 0);
 452   if (GET_CODE (counter_reg) == PLUS)
 453     counter_reg = XEXP (counter_reg, 0);
 454   mode = GET_MODE (counter_reg);
 455
 456   increment_count = false;
 457   switch (GET_CODE (condition))
 458     {
 459     case NE:
 460       /* Currently only NE tests against zero and one are supported.  */
 461       noloop = XEXP (condition, 1);
 462       if (noloop != const0_rtx)
 463         {
 464           gcc_assert (noloop == const1_rtx);
 465           increment_count = true;
 466         }
 467       break;
 468
 469     case GE:
 470       /* Currently only GE tests against zero are supported.  */
 471       gcc_assert (XEXP (condition, 1) == const0_rtx);
 472
 473       noloop = constm1_rtx;
 474
 475       /* The iteration count does not need incrementing for a GE test.  */
 476       increment_count = false;
 477
 478       /* Determine if the iteration counter will be non-negative.
 479          Note that the maximum value loaded is iterations_max - 1.  */
 480       if (get_max_loop_iterations (loop, &iterations)
 481           && wi::leu_p (iterations,
 482                         wi::set_bit_in_zero <widest_int>
 483                         (GET_MODE_PRECISION (mode) - 1)))
 484         nonneg = 1;
 485       break;
 486
 487       /* Abort if an invalid doloop pattern has been generated.  */
 488     default:
 489       gcc_unreachable ();
 490     }
 491
 492   if (increment_count)
 493     count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
 494
 495   /* Insert initialization of the count register into the loop header.  */
 496   start_sequence ();
 497   tmp = force_operand (count, counter_reg);
 498   convert_move (counter_reg, tmp, 1);
 499   sequence = get_insns ();
 500   end_sequence ();
 501   emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 502
 503   if (desc->noloop_assumptions)
 504     {
 505       rtx ass = copy_rtx (desc->noloop_assumptions);
 506       basic_block preheader = loop_preheader_edge (loop)->src;
 507       basic_block set_zero
 508               = split_edge (loop_preheader_edge (loop));
 509       basic_block new_preheader
 510               = split_edge (loop_preheader_edge (loop));
 511       edge te;
 512
 513       /* Expand the condition testing the assumptions and if it does not pass,
 514          reset the count register to 0.  */
 515       redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
 516       set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
 517
 518       set_zero->count = 0;
 519       set_zero->frequency = 0;
 520
 521       te = single_succ_edge (preheader);
 522       for (; ass; ass = XEXP (ass, 1))
 523         if (!add_test (XEXP (ass, 0), &te, set_zero))
 524           break;
 525
 526       if (ass)
 527         {
 528           /* We reached a condition that is always true.  This is very hard to
 529              reproduce (such a loop does not roll, and thus it would most
 530              likely get optimized out by some of the preceding optimizations).
 531              In fact, I do not have any testcase for it.  However, it would
 532              also be very hard to show that it is impossible, so we must
 533              handle this case.  */
 534           set_zero->count = preheader->count;
 535           set_zero->frequency = preheader->frequency;
 536         }
 537
 538       if (EDGE_COUNT (set_zero->preds) == 0)
 539         {
 540           /* All the conditions were simplified to false, remove the
 541              unreachable set_zero block.  */
 542           delete_basic_block (set_zero);
 543         }
 544       else
 545         {
 546           /* Reset the counter to zero in the set_zero block.  */
 547           start_sequence ();
 548           convert_move (counter_reg, noloop, 0);
 549           sequence = get_insns ();
 550           end_sequence ();
 551           emit_insn_after (sequence, BB_END (set_zero));
 552
 553           set_immediate_dominator (CDI_DOMINATORS, set_zero,
 554                                    recompute_dominator (CDI_DOMINATORS,
 555                                                         set_zero));
 556         }
 557
 558       set_immediate_dominator (CDI_DOMINATORS, new_preheader,
 559                                recompute_dominator (CDI_DOMINATORS,
 560                                                     new_preheader));
 561     }
 562
 563   /* Some targets (eg, C4x) need to initialize special looping
 564      registers.  */
 565 #ifdef HAVE_doloop_begin
 566   {
 567     rtx init;
 568
 569     init = gen_doloop_begin (counter_reg, doloop_seq);
 570     if (init)
 571       {
 572         start_sequence ();
 573         emit_insn (init);
 574         sequence = get_insns ();
 575         end_sequence ();
 576         emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 577       }
 578   }
 579 #endif
 580
 581   /* Insert the new low-overhead looping insn.  */
 582   emit_jump_insn_after (doloop_seq, BB_END (loop_end));
 583   jump_insn = BB_END (loop_end);
 584   jump_label = block_label (desc->in_edge->dest);
 585   JUMP_LABEL (jump_insn) = jump_label;
 586   LABEL_NUSES (jump_label)++;
 587
 588   /* Ensure the right fallthru edge is marked, for case we have reversed
 589      the condition.  */
 590   desc->in_edge->flags &= ~EDGE_FALLTHRU;
 591   desc->out_edge->flags |= EDGE_FALLTHRU;
 592
 593   /* Add a REG_NONNEG note if the actual or estimated maximum number
 594      of iterations is non-negative.  */
 595   if (nonneg)
 596     add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
 597
 598   /* Update the REG_BR_PROB note.  */
 599   if (true_prob_val)
 600     {
 601       /* Seems safer to use the branch probability.  */
 602       add_int_reg_note (jump_insn, REG_BR_PROB, desc->in_edge->probability);
 603     }
 604 }
 605
 606 /* Process loop described by LOOP validating that the loop is suitable for
 607    conversion to use a low overhead looping instruction, replacing the jump
 608    insn where suitable.  Returns true if the loop was successfully
 609    modified.  */
 610
 611 static bool
 612 doloop_optimize (struct loop *loop)
 613 {
 614   machine_mode mode;
 615   rtx doloop_seq, doloop_pat, doloop_reg;
 616   rtx count;
 617   widest_int iterations, iterations_max;
 618   rtx_code_label *start_label;
 619   rtx condition;
 620   unsigned level, est_niter;
 621   int max_cost;
 622   struct niter_desc *desc;
 623   unsigned word_mode_size;
 624   unsigned HOST_WIDE_INT word_mode_max;
 625   int entered_at_top;
 626
 627   if (dump_file)
 628     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
 629
 630   iv_analysis_loop_init (loop);
 631
 632   /* Find the simple exit of a LOOP.  */
 633   desc = get_simple_loop_desc (loop);
 634
 635   /* Check that loop is a candidate for a low-overhead looping insn.  */
 636   if (!doloop_valid_p (loop, desc))
 637     {
 638       if (dump_file)
 639         fprintf (dump_file,
 640                  "Doloop: The loop is not suitable.\n");
 641       return false;
 642     }
 643   mode = desc->mode;
 644
 645   est_niter = 3;
 646   if (desc->const_iter)
 647     est_niter = desc->niter;
 648   /* If the estimate on number of iterations is reliable (comes from profile
 649      feedback), use it.  Do not use it normally, since the expected number
 650      of iterations of an unrolled loop is 2.  */
 651   if (loop->header->count)
 652     est_niter = expected_loop_iterations (loop);
 653
 654   if (est_niter < 3)
 655     {
 656       if (dump_file)
 657         fprintf (dump_file,
 658                  "Doloop: Too few iterations (%u) to be profitable.\n",
 659                  est_niter);
 660       return false;
 661     }
 662
 663   max_cost
 664     = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
 665   if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop))
 666       > max_cost)
 667     {
 668       if (dump_file)
 669         fprintf (dump_file,
 670                  "Doloop: number of iterations too costly to compute.\n");
 671       return false;
 672     }
 673
 674   if (desc->const_iter)
 675     iterations = widest_int::from (std::make_pair (desc->niter_expr, mode),
 676                                    UNSIGNED);
 677   else
 678     iterations = 0;
 679   if (!get_max_loop_iterations (loop, &iterations_max))
 680     iterations_max = 0;
 681   level = get_loop_level (loop) + 1;
 682   entered_at_top = (loop->latch == desc->in_edge->dest
 683                     && contains_no_active_insn_p (loop->latch));
 684   if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
 685                                  entered_at_top))
 686     {
 687       if (dump_file)
 688         fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
 689       return false;
 690     }
 691
 692   /* Generate looping insn.  If the pattern FAILs then give up trying
 693      to modify the loop since there is some aspect the back-end does
 694      not like.  */
 695   count = copy_rtx (desc->niter_expr);
 696   start_label = block_label (desc->in_edge->dest);
 697   doloop_reg = gen_reg_rtx (mode);
 698   doloop_seq = gen_doloop_end (doloop_reg, start_label);
 699
 700   word_mode_size = GET_MODE_PRECISION (word_mode);
 701   word_mode_max
 702           = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
 703   if (! doloop_seq
 704       && mode != word_mode
 705       /* Before trying mode different from the one in that # of iterations is
 706          computed, we must be sure that the number of iterations fits into
 707          the new mode.  */
 708       && (word_mode_size >= GET_MODE_PRECISION (mode)
 709           || wi::leu_p (iterations_max, word_mode_max)))
 710     {
 711       if (word_mode_size > GET_MODE_PRECISION (mode))
 712         count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
 713       else
 714         count = lowpart_subreg (word_mode, count, mode);
 715       PUT_MODE (doloop_reg, word_mode);
 716       doloop_seq = gen_doloop_end (doloop_reg, start_label);
 717     }
 718   if (! doloop_seq)
 719     {
 720       if (dump_file)
 721         fprintf (dump_file,
 722                  "Doloop: Target unwilling to use doloop pattern!\n");
 723       return false;
 724     }
 725
 726   /* If multiple instructions were created, the last must be the
 727      jump instruction.  Also, a raw define_insn may yield a plain
 728      pattern.  */
 729   doloop_pat = doloop_seq;
 730   if (INSN_P (doloop_pat))
 731     {
 732       rtx_insn *doloop_insn = as_a <rtx_insn *> (doloop_pat);
 733       while (NEXT_INSN (doloop_insn) != NULL_RTX)
 734         doloop_insn = NEXT_INSN (doloop_insn);
 735       if (!JUMP_P (doloop_insn))
 736         doloop_insn = NULL;
 737       doloop_pat = doloop_insn;
 738     }
 739
 740   if (! doloop_pat
 741       || ! (condition = doloop_condition_get (doloop_pat)))
 742     {
 743       if (dump_file)
 744         fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
 745       return false;
 746     }
 747
 748   doloop_modify (loop, desc, doloop_seq, condition, count);
 749   return true;
 750 }
 751
 752 /* This is the main entry point.  Process all loops using doloop_optimize.  */
 753
 754 void
 755 doloop_optimize_loops (void)
 756 {
 757   struct loop *loop;
 758
 759   FOR_EACH_LOOP (loop, 0)
 760     {
 761       doloop_optimize (loop);
 762     }
 763
 764   iv_analysis_done ();
 765
 766 #ifdef ENABLE_CHECKING
 767   verify_loop_structure ();
 768 #endif
 769 }
 770 #endif /* HAVE_doloop_end */
 771