gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "predict.h"
  72 #include "vec.h"
  73 #include "hashtab.h"
  74 #include "hash-set.h"
  75 #include "machmode.h"
  76 #include "hard-reg-set.h"
  77 #include "input.h"
  78 #include "function.h"
  79 #include "dominance.h"
  80 #include "cfg.h"
  81 #include "basic-block.h"
  82 #include "gimple-pretty-print.h"
  83 #include "hash-map.h"
  84 #include "hash-table.h"
  85 #include "tree-ssa-alias.h"
  86 #include "internal-fn.h"
  87 #include "tree-eh.h"
  88 #include "gimple-expr.h"
  89 #include "is-a.h"
  90 #include "gimple.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "gimple-ssa.h"
  95 #include "plugin-api.h"
  96 #include "ipa-ref.h"
  97 #include "cgraph.h"
  98 #include "tree-cfg.h"
  99 #include "tree-phinodes.h"
 100 #include "ssa-iterators.h"
 101 #include "stringpool.h"
 102 #include "tree-ssanames.h"
 103 #include "tree-ssa-loop-ivopts.h"
 104 #include "tree-ssa-loop-manip.h"
 105 #include "tree-ssa-loop-niter.h"
 106 #include "tree-ssa-loop.h"
 107 #include "expr.h"
 108 #include "tree-dfa.h"
 109 #include "tree-ssa.h"
 110 #include "cfgloop.h"
 111 #include "tree-pass.h"
 112 #include "insn-config.h"
 113 #include "tree-chrec.h"
 114 #include "tree-scalar-evolution.h"
 115 #include "cfgloop.h"
 116 #include "params.h"
 117 #include "langhooks.h"
 118 #include "tree-affine.h"
 119 #include "target.h"
 120 #include "tree-inline.h"
 121 #include "tree-ssa-propagate.h"
 122 #include "expmed.h"
 123 #include "tree-ssa-address.h"
 124 #include "builtins.h"
 125
 126 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 127    cost of different addressing modes.  This should be moved to a TBD
 128    interface between the GIMPLE and RTL worlds.  */
 129 #include "expr.h"
 130 #include "recog.h"
 131
 132 /* The infinite cost.  */
 133 #define INFTY 10000000
 134
 135 #define AVG_LOOP_NITER(LOOP) 5
 136
 137 /* Returns the expected number of loop iterations for LOOP.
 138    The average trip count is computed from profile data if it
 139    exists. */
 140
 141 static inline HOST_WIDE_INT
 142 avg_loop_niter (struct loop *loop)
 143 {
 144   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 145   if (niter == -1)
 146     return AVG_LOOP_NITER (loop);
 147
 148   return niter;
 149 }
 150
 151 /* Representation of the induction variable.  */
 152 struct iv
 153 {
 154   tree base;            /* Initial value of the iv.  */
 155   tree base_object;     /* A memory object to that the induction variable points.  */
 156   tree step;            /* Step of the iv (constant only).  */
 157   tree ssa_name;        /* The ssa name with the value.  */
 158   bool biv_p;           /* Is it a biv?  */
 159   bool have_use_for;    /* Do we already have a use for it?  */
 160   unsigned use_id;      /* The identifier in the use if it is the case.  */
 161 };
 162
 163 /* Per-ssa version information (induction variable descriptions, etc.).  */
 164 struct version_info
 165 {
 166   tree name;            /* The ssa name.  */
 167   struct iv *iv;        /* Induction variable description.  */
 168   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 169                            an expression that is not an induction variable.  */
 170   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 171   unsigned inv_id;      /* Id of an invariant.  */
 172 };
 173
 174 /* Types of uses.  */
 175 enum use_type
 176 {
 177   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 178   USE_ADDRESS,          /* Use in an address.  */
 179   USE_COMPARE           /* Use is a compare.  */
 180 };
 181
 182 /* Cost of a computation.  */
 183 typedef struct
 184 {
 185   int cost;             /* The runtime cost.  */
 186   unsigned complexity;  /* The estimate of the complexity of the code for
 187                            the computation (in no concrete units --
 188                            complexity field should be larger for more
 189                            complex expressions and addressing modes).  */
 190 } comp_cost;
 191
 192 static const comp_cost no_cost = {0, 0};
 193 static const comp_cost infinite_cost = {INFTY, INFTY};
 194
 195 /* The candidate - cost pair.  */
 196 struct cost_pair
 197 {
 198   struct iv_cand *cand; /* The candidate.  */
 199   comp_cost cost;       /* The cost.  */
 200   bitmap depends_on;    /* The list of invariants that have to be
 201                            preserved.  */
 202   tree value;           /* For final value elimination, the expression for
 203                            the final value of the iv.  For iv elimination,
 204                            the new bound to compare with.  */
 205   enum tree_code comp;  /* For iv elimination, the comparison.  */
 206   int inv_expr_id;      /* Loop invariant expression id.  */
 207 };
 208
 209 /* Use.  */
 210 struct iv_use
 211 {
 212   unsigned id;          /* The id of the use.  */
 213   enum use_type type;   /* Type of the use.  */
 214   struct iv *iv;        /* The induction variable it is based on.  */
 215   gimple stmt;          /* Statement in that it occurs.  */
 216   tree *op_p;           /* The place where it occurs.  */
 217   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 218                            important ones.  */
 219
 220   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 221   struct cost_pair *cost_map;
 222                         /* The costs wrto the iv candidates.  */
 223
 224   struct iv_cand *selected;
 225                         /* The selected candidate.  */
 226 };
 227
 228 /* The position where the iv is computed.  */
 229 enum iv_position
 230 {
 231   IP_NORMAL,            /* At the end, just before the exit condition.  */
 232   IP_END,               /* At the end of the latch block.  */
 233   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 234   IP_AFTER_USE,         /* Immediately after a specific use.  */
 235   IP_ORIGINAL           /* The original biv.  */
 236 };
 237
 238 /* The induction variable candidate.  */
 239 struct iv_cand
 240 {
 241   unsigned id;          /* The number of the candidate.  */
 242   bool important;       /* Whether this is an "important" candidate, i.e. such
 243                            that it should be considered by all uses.  */
 244   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 245   gimple incremented_at;/* For original biv, the statement where it is
 246                            incremented.  */
 247   tree var_before;      /* The variable used for it before increment.  */
 248   tree var_after;       /* The variable used for it after increment.  */
 249   struct iv *iv;        /* The value of the candidate.  NULL for
 250                            "pseudocandidate" used to indicate the possibility
 251                            to replace the final value of an iv by direct
 252                            computation of the value.  */
 253   unsigned cost;        /* Cost of the candidate.  */
 254   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 255   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 256                               where it is incremented.  */
 257   bitmap depends_on;    /* The list of invariants that are used in step of the
 258                            biv.  */
 259 };
 260
 261 /* Loop invariant expression hashtable entry.  */
 262 struct iv_inv_expr_ent
 263 {
 264   tree expr;
 265   int id;
 266   hashval_t hash;
 267 };
 268
 269 /* The data used by the induction variable optimizations.  */
 270
 271 typedef struct iv_use *iv_use_p;
 272
 273 typedef struct iv_cand *iv_cand_p;
 274
 275 /* Hashtable helpers.  */
 276
 277 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 278 {
 279   typedef iv_inv_expr_ent value_type;
 280   typedef iv_inv_expr_ent compare_type;
 281   static inline hashval_t hash (const value_type *);
 282   static inline bool equal (const value_type *, const compare_type *);
 283 };
 284
 285 /* Hash function for loop invariant expressions.  */
 286
 287 inline hashval_t
 288 iv_inv_expr_hasher::hash (const value_type *expr)
 289 {
 290   return expr->hash;
 291 }
 292
 293 /* Hash table equality function for expressions.  */
 294
 295 inline bool
 296 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 297 {
 298   return expr1->hash == expr2->hash
 299          && operand_equal_p (expr1->expr, expr2->expr, 0);
 300 }
 301
 302 struct ivopts_data
 303 {
 304   /* The currently optimized loop.  */
 305   struct loop *current_loop;
 306
 307   /* Numbers of iterations for all exits of the current loop.  */
 308   hash_map<edge, tree_niter_desc *> *niters;
 309
 310   /* Number of registers used in it.  */
 311   unsigned regs_used;
 312
 313   /* The size of version_info array allocated.  */
 314   unsigned version_info_size;
 315
 316   /* The array of information for the ssa names.  */
 317   struct version_info *version_info;
 318
 319   /* The hashtable of loop invariant expressions created
 320      by ivopt.  */
 321   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 322
 323   /* Loop invariant expression id.  */
 324   int inv_expr_id;
 325
 326   /* The bitmap of indices in version_info whose value was changed.  */
 327   bitmap relevant;
 328
 329   /* The uses of induction variables.  */
 330   vec<iv_use_p> iv_uses;
 331
 332   /* The candidates.  */
 333   vec<iv_cand_p> iv_candidates;
 334
 335   /* A bitmap of important candidates.  */
 336   bitmap important_candidates;
 337
 338   /* Cache used by tree_to_aff_combination_expand.  */
 339   hash_map<tree, name_expansion *> *name_expansion_cache;
 340
 341   /* The maximum invariant id.  */
 342   unsigned max_inv_id;
 343
 344   /* Whether to consider just related and important candidates when replacing a
 345      use.  */
 346   bool consider_all_candidates;
 347
 348   /* Are we optimizing for speed?  */
 349   bool speed;
 350
 351   /* Whether the loop body includes any function calls.  */
 352   bool body_includes_call;
 353
 354   /* Whether the loop body can only be exited via single exit.  */
 355   bool loop_single_exit_p;
 356 };
 357
 358 /* An assignment of iv candidates to uses.  */
 359
 360 struct iv_ca
 361 {
 362   /* The number of uses covered by the assignment.  */
 363   unsigned upto;
 364
 365   /* Number of uses that cannot be expressed by the candidates in the set.  */
 366   unsigned bad_uses;
 367
 368   /* Candidate assigned to a use, together with the related costs.  */
 369   struct cost_pair **cand_for_use;
 370
 371   /* Number of times each candidate is used.  */
 372   unsigned *n_cand_uses;
 373
 374   /* The candidates used.  */
 375   bitmap cands;
 376
 377   /* The number of candidates in the set.  */
 378   unsigned n_cands;
 379
 380   /* Total number of registers needed.  */
 381   unsigned n_regs;
 382
 383   /* Total cost of expressing uses.  */
 384   comp_cost cand_use_cost;
 385
 386   /* Total cost of candidates.  */
 387   unsigned cand_cost;
 388
 389   /* Number of times each invariant is used.  */
 390   unsigned *n_invariant_uses;
 391
 392   /* The array holding the number of uses of each loop
 393      invariant expressions created by ivopt.  */
 394   unsigned *used_inv_expr;
 395
 396   /* The number of created loop invariants.  */
 397   unsigned num_used_inv_expr;
 398
 399   /* Total cost of the assignment.  */
 400   comp_cost cost;
 401 };
 402
 403 /* Difference of two iv candidate assignments.  */
 404
 405 struct iv_ca_delta
 406 {
 407   /* Changed use.  */
 408   struct iv_use *use;
 409
 410   /* An old assignment (for rollback purposes).  */
 411   struct cost_pair *old_cp;
 412
 413   /* A new assignment.  */
 414   struct cost_pair *new_cp;
 415
 416   /* Next change in the list.  */
 417   struct iv_ca_delta *next_change;
 418 };
 419
 420 /* Bound on number of candidates below that all candidates are considered.  */
 421
 422 #define CONSIDER_ALL_CANDIDATES_BOUND \
 423   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 424
 425 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 426    optimizing such a loop would help, and it would take ages).  */
 427
 428 #define MAX_CONSIDERED_USES \
 429   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 430
 431 /* If there are at most this number of ivs in the set, try removing unnecessary
 432    ivs from the set always.  */
 433
 434 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 435   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 436
 437 /* The list of trees for that the decl_rtl field must be reset is stored
 438    here.  */
 439
 440 static vec<tree> decl_rtl_to_reset;
 441
 442 static comp_cost force_expr_to_var_cost (tree, bool);
 443
 444 /* Number of uses recorded in DATA.  */
 445
 446 static inline unsigned
 447 n_iv_uses (struct ivopts_data *data)
 448 {
 449   return data->iv_uses.length ();
 450 }
 451
 452 /* Ith use recorded in DATA.  */
 453
 454 static inline struct iv_use *
 455 iv_use (struct ivopts_data *data, unsigned i)
 456 {
 457   return data->iv_uses[i];
 458 }
 459
 460 /* Number of candidates recorded in DATA.  */
 461
 462 static inline unsigned
 463 n_iv_cands (struct ivopts_data *data)
 464 {
 465   return data->iv_candidates.length ();
 466 }
 467
 468 /* Ith candidate recorded in DATA.  */
 469
 470 static inline struct iv_cand *
 471 iv_cand (struct ivopts_data *data, unsigned i)
 472 {
 473   return data->iv_candidates[i];
 474 }
 475
 476 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 477
 478 edge
 479 single_dom_exit (struct loop *loop)
 480 {
 481   edge exit = single_exit (loop);
 482
 483   if (!exit)
 484     return NULL;
 485
 486   if (!just_once_each_iteration_p (loop, exit->src))
 487     return NULL;
 488
 489   return exit;
 490 }
 491
 492 /* Dumps information about the induction variable IV to FILE.  */
 493
 494 void
 495 dump_iv (FILE *file, struct iv *iv)
 496 {
 497   if (iv->ssa_name)
 498     {
 499       fprintf (file, "ssa name ");
 500       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 501       fprintf (file, "\n");
 502     }
 503
 504   fprintf (file, "  type ");
 505   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 506   fprintf (file, "\n");
 507
 508   if (iv->step)
 509     {
 510       fprintf (file, "  base ");
 511       print_generic_expr (file, iv->base, TDF_SLIM);
 512       fprintf (file, "\n");
 513
 514       fprintf (file, "  step ");
 515       print_generic_expr (file, iv->step, TDF_SLIM);
 516       fprintf (file, "\n");
 517     }
 518   else
 519     {
 520       fprintf (file, "  invariant ");
 521       print_generic_expr (file, iv->base, TDF_SLIM);
 522       fprintf (file, "\n");
 523     }
 524
 525   if (iv->base_object)
 526     {
 527       fprintf (file, "  base object ");
 528       print_generic_expr (file, iv->base_object, TDF_SLIM);
 529       fprintf (file, "\n");
 530     }
 531
 532   if (iv->biv_p)
 533     fprintf (file, "  is a biv\n");
 534 }
 535
 536 /* Dumps information about the USE to FILE.  */
 537
 538 void
 539 dump_use (FILE *file, struct iv_use *use)
 540 {
 541   fprintf (file, "use %d\n", use->id);
 542
 543   switch (use->type)
 544     {
 545     case USE_NONLINEAR_EXPR:
 546       fprintf (file, "  generic\n");
 547       break;
 548
 549     case USE_ADDRESS:
 550       fprintf (file, "  address\n");
 551       break;
 552
 553     case USE_COMPARE:
 554       fprintf (file, "  compare\n");
 555       break;
 556
 557     default:
 558       gcc_unreachable ();
 559     }
 560
 561   fprintf (file, "  in statement ");
 562   print_gimple_stmt (file, use->stmt, 0, 0);
 563   fprintf (file, "\n");
 564
 565   fprintf (file, "  at position ");
 566   if (use->op_p)
 567     print_generic_expr (file, *use->op_p, TDF_SLIM);
 568   fprintf (file, "\n");
 569
 570   dump_iv (file, use->iv);
 571
 572   if (use->related_cands)
 573     {
 574       fprintf (file, "  related candidates ");
 575       dump_bitmap (file, use->related_cands);
 576     }
 577 }
 578
 579 /* Dumps information about the uses to FILE.  */
 580
 581 void
 582 dump_uses (FILE *file, struct ivopts_data *data)
 583 {
 584   unsigned i;
 585   struct iv_use *use;
 586
 587   for (i = 0; i < n_iv_uses (data); i++)
 588     {
 589       use = iv_use (data, i);
 590
 591       dump_use (file, use);
 592       fprintf (file, "\n");
 593     }
 594 }
 595
 596 /* Dumps information about induction variable candidate CAND to FILE.  */
 597
 598 void
 599 dump_cand (FILE *file, struct iv_cand *cand)
 600 {
 601   struct iv *iv = cand->iv;
 602
 603   fprintf (file, "candidate %d%s\n",
 604            cand->id, cand->important ? " (important)" : "");
 605
 606   if (cand->depends_on)
 607     {
 608       fprintf (file, "  depends on ");
 609       dump_bitmap (file, cand->depends_on);
 610     }
 611
 612   if (!iv)
 613     {
 614       fprintf (file, "  final value replacement\n");
 615       return;
 616     }
 617
 618   if (cand->var_before)
 619     {
 620       fprintf (file, "  var_before ");
 621       print_generic_expr (file, cand->var_before, TDF_SLIM);
 622       fprintf (file, "\n");
 623     }
 624   if (cand->var_after)
 625     {
 626       fprintf (file, "  var_after ");
 627       print_generic_expr (file, cand->var_after, TDF_SLIM);
 628       fprintf (file, "\n");
 629     }
 630
 631   switch (cand->pos)
 632     {
 633     case IP_NORMAL:
 634       fprintf (file, "  incremented before exit test\n");
 635       break;
 636
 637     case IP_BEFORE_USE:
 638       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 639       break;
 640
 641     case IP_AFTER_USE:
 642       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 643       break;
 644
 645     case IP_END:
 646       fprintf (file, "  incremented at end\n");
 647       break;
 648
 649     case IP_ORIGINAL:
 650       fprintf (file, "  original biv\n");
 651       break;
 652     }
 653
 654   dump_iv (file, iv);
 655 }
 656
 657 /* Returns the info for ssa version VER.  */
 658
 659 static inline struct version_info *
 660 ver_info (struct ivopts_data *data, unsigned ver)
 661 {
 662   return data->version_info + ver;
 663 }
 664
 665 /* Returns the info for ssa name NAME.  */
 666
 667 static inline struct version_info *
 668 name_info (struct ivopts_data *data, tree name)
 669 {
 670   return ver_info (data, SSA_NAME_VERSION (name));
 671 }
 672
 673 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 674    emitted in LOOP.  */
 675
 676 static bool
 677 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 678 {
 679   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 680
 681   gcc_assert (bb);
 682
 683   if (sbb == loop->latch)
 684     return true;
 685
 686   if (sbb != bb)
 687     return false;
 688
 689   return stmt == last_stmt (bb);
 690 }
 691
 692 /* Returns true if STMT if after the place where the original induction
 693    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 694    if the positions are identical.  */
 695
 696 static bool
 697 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 698 {
 699   basic_block cand_bb = gimple_bb (cand->incremented_at);
 700   basic_block stmt_bb = gimple_bb (stmt);
 701
 702   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 703     return false;
 704
 705   if (stmt_bb != cand_bb)
 706     return true;
 707
 708   if (true_if_equal
 709       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 710     return true;
 711   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 712 }
 713
 714 /* Returns true if STMT if after the place where the induction variable
 715    CAND is incremented in LOOP.  */
 716
 717 static bool
 718 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 719 {
 720   switch (cand->pos)
 721     {
 722     case IP_END:
 723       return false;
 724
 725     case IP_NORMAL:
 726       return stmt_after_ip_normal_pos (loop, stmt);
 727
 728     case IP_ORIGINAL:
 729     case IP_AFTER_USE:
 730       return stmt_after_inc_pos (cand, stmt, false);
 731
 732     case IP_BEFORE_USE:
 733       return stmt_after_inc_pos (cand, stmt, true);
 734
 735     default:
 736       gcc_unreachable ();
 737     }
 738 }
 739
 740 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 741
 742 static bool
 743 abnormal_ssa_name_p (tree exp)
 744 {
 745   if (!exp)
 746     return false;
 747
 748   if (TREE_CODE (exp) != SSA_NAME)
 749     return false;
 750
 751   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 752 }
 753
 754 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 755    abnormal phi node.  Callback for for_each_index.  */
 756
 757 static bool
 758 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 759                                   void *data ATTRIBUTE_UNUSED)
 760 {
 761   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 762     {
 763       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 764         return false;
 765       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 766         return false;
 767     }
 768
 769   return !abnormal_ssa_name_p (*index);
 770 }
 771
 772 /* Returns true if EXPR contains a ssa name that occurs in an
 773    abnormal phi node.  */
 774
 775 bool
 776 contains_abnormal_ssa_name_p (tree expr)
 777 {
 778   enum tree_code code;
 779   enum tree_code_class codeclass;
 780
 781   if (!expr)
 782     return false;
 783
 784   code = TREE_CODE (expr);
 785   codeclass = TREE_CODE_CLASS (code);
 786
 787   if (code == SSA_NAME)
 788     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 789
 790   if (code == INTEGER_CST
 791       || is_gimple_min_invariant (expr))
 792     return false;
 793
 794   if (code == ADDR_EXPR)
 795     return !for_each_index (&TREE_OPERAND (expr, 0),
 796                             idx_contains_abnormal_ssa_name_p,
 797                             NULL);
 798
 799   if (code == COND_EXPR)
 800     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 801       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 802       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 803
 804   switch (codeclass)
 805     {
 806     case tcc_binary:
 807     case tcc_comparison:
 808       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 809         return true;
 810
 811       /* Fallthru.  */
 812     case tcc_unary:
 813       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 814         return true;
 815
 816       break;
 817
 818     default:
 819       gcc_unreachable ();
 820     }
 821
 822   return false;
 823 }
 824
 825 /*  Returns the structure describing number of iterations determined from
 826     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 827
 828 static struct tree_niter_desc *
 829 niter_for_exit (struct ivopts_data *data, edge exit)
 830 {
 831   struct tree_niter_desc *desc;
 832   tree_niter_desc **slot;
 833
 834   if (!data->niters)
 835     {
 836       data->niters = new hash_map<edge, tree_niter_desc *>;
 837       slot = NULL;
 838     }
 839   else
 840     slot = data->niters->get (exit);
 841
 842   if (!slot)
 843     {
 844       /* Try to determine number of iterations.  We cannot safely work with ssa
 845          names that appear in phi nodes on abnormal edges, so that we do not
 846          create overlapping life ranges for them (PR 27283).  */
 847       desc = XNEW (struct tree_niter_desc);
 848       if (!number_of_iterations_exit (data->current_loop,
 849                                       exit, desc, true)
 850           || contains_abnormal_ssa_name_p (desc->niter))
 851         {
 852           XDELETE (desc);
 853           desc = NULL;
 854         }
 855       data->niters->put (exit, desc);
 856     }
 857   else
 858     desc = *slot;
 859
 860   return desc;
 861 }
 862
 863 /* Returns the structure describing number of iterations determined from
 864    single dominating exit of DATA->current_loop, or NULL if something
 865    goes wrong.  */
 866
 867 static struct tree_niter_desc *
 868 niter_for_single_dom_exit (struct ivopts_data *data)
 869 {
 870   edge exit = single_dom_exit (data->current_loop);
 871
 872   if (!exit)
 873     return NULL;
 874
 875   return niter_for_exit (data, exit);
 876 }
 877
 878 /* Initializes data structures used by the iv optimization pass, stored
 879    in DATA.  */
 880
 881 static void
 882 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 883 {
 884   data->version_info_size = 2 * num_ssa_names;
 885   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 886   data->relevant = BITMAP_ALLOC (NULL);
 887   data->important_candidates = BITMAP_ALLOC (NULL);
 888   data->max_inv_id = 0;
 889   data->niters = NULL;
 890   data->iv_uses.create (20);
 891   data->iv_candidates.create (20);
 892   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 893   data->inv_expr_id = 0;
 894   data->name_expansion_cache = NULL;
 895   decl_rtl_to_reset.create (20);
 896 }
 897
 898 /* Returns a memory object to that EXPR points.  In case we are able to
 899    determine that it does not point to any such object, NULL is returned.  */
 900
 901 static tree
 902 determine_base_object (tree expr)
 903 {
 904   enum tree_code code = TREE_CODE (expr);
 905   tree base, obj;
 906
 907   /* If this is a pointer casted to any type, we need to determine
 908      the base object for the pointer; so handle conversions before
 909      throwing away non-pointer expressions.  */
 910   if (CONVERT_EXPR_P (expr))
 911     return determine_base_object (TREE_OPERAND (expr, 0));
 912
 913   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 914     return NULL_TREE;
 915
 916   switch (code)
 917     {
 918     case INTEGER_CST:
 919       return NULL_TREE;
 920
 921     case ADDR_EXPR:
 922       obj = TREE_OPERAND (expr, 0);
 923       base = get_base_address (obj);
 924
 925       if (!base)
 926         return expr;
 927
 928       if (TREE_CODE (base) == MEM_REF)
 929         return determine_base_object (TREE_OPERAND (base, 0));
 930
 931       return fold_convert (ptr_type_node,
 932                            build_fold_addr_expr (base));
 933
 934     case POINTER_PLUS_EXPR:
 935       return determine_base_object (TREE_OPERAND (expr, 0));
 936
 937     case PLUS_EXPR:
 938     case MINUS_EXPR:
 939       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 940       gcc_unreachable ();
 941
 942     default:
 943       return fold_convert (ptr_type_node, expr);
 944     }
 945 }
 946
 947 /* Return true if address expression with non-DECL_P operand appears
 948    in EXPR.  */
 949
 950 static bool
 951 contain_complex_addr_expr (tree expr)
 952 {
 953   bool res = false;
 954
 955   STRIP_NOPS (expr);
 956   switch (TREE_CODE (expr))
 957     {
 958     case POINTER_PLUS_EXPR:
 959     case PLUS_EXPR:
 960     case MINUS_EXPR:
 961       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 962       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 963       break;
 964
 965     case ADDR_EXPR:
 966       return (!DECL_P (TREE_OPERAND (expr, 0)));
 967
 968     default:
 969       return false;
 970     }
 971
 972   return res;
 973 }
 974
 975 /* Allocates an induction variable with given initial value BASE and step STEP
 976    for loop LOOP.  */
 977
 978 static struct iv *
 979 alloc_iv (tree base, tree step)
 980 {
 981   tree expr = base;
 982   struct iv *iv = XCNEW (struct iv);
 983   gcc_assert (step != NULL_TREE);
 984
 985   /* Lower address expression in base except ones with DECL_P as operand.
 986      By doing this:
 987        1) More accurate cost can be computed for address expressions;
 988        2) Duplicate candidates won't be created for bases in different
 989           forms, like &a[0] and &a.  */
 990   STRIP_NOPS (expr);
 991   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
 992       || contain_complex_addr_expr (expr))
 993     {
 994       aff_tree comb;
 995       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
 996       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 997     }
 998
 999   iv->base = base;
1000   iv->base_object = determine_base_object (base);
1001   iv->step = step;
1002   iv->biv_p = false;
1003   iv->have_use_for = false;
1004   iv->use_id = 0;
1005   iv->ssa_name = NULL_TREE;
1006
1007   return iv;
1008 }
1009
1010 /* Sets STEP and BASE for induction variable IV.  */
1011
1012 static void
1013 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1014 {
1015   struct version_info *info = name_info (data, iv);
1016
1017   gcc_assert (!info->iv);
1018
1019   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1020   info->iv = alloc_iv (base, step);
1021   info->iv->ssa_name = iv;
1022 }
1023
1024 /* Finds induction variable declaration for VAR.  */
1025
1026 static struct iv *
1027 get_iv (struct ivopts_data *data, tree var)
1028 {
1029   basic_block bb;
1030   tree type = TREE_TYPE (var);
1031
1032   if (!POINTER_TYPE_P (type)
1033       && !INTEGRAL_TYPE_P (type))
1034     return NULL;
1035
1036   if (!name_info (data, var)->iv)
1037     {
1038       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1039
1040       if (!bb
1041           || !flow_bb_inside_loop_p (data->current_loop, bb))
1042         set_iv (data, var, var, build_int_cst (type, 0));
1043     }
1044
1045   return name_info (data, var)->iv;
1046 }
1047
1048 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1049    not define a simple affine biv with nonzero step.  */
1050
1051 static tree
1052 determine_biv_step (gimple phi)
1053 {
1054   struct loop *loop = gimple_bb (phi)->loop_father;
1055   tree name = PHI_RESULT (phi);
1056   affine_iv iv;
1057
1058   if (virtual_operand_p (name))
1059     return NULL_TREE;
1060
1061   if (!simple_iv (loop, loop, name, &iv, true))
1062     return NULL_TREE;
1063
1064   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1065 }
1066
1067 /* Finds basic ivs.  */
1068
1069 static bool
1070 find_bivs (struct ivopts_data *data)
1071 {
1072   gimple phi;
1073   tree step, type, base;
1074   bool found = false;
1075   struct loop *loop = data->current_loop;
1076   gimple_stmt_iterator psi;
1077
1078   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1079     {
1080       phi = gsi_stmt (psi);
1081
1082       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1083         continue;
1084
1085       step = determine_biv_step (phi);
1086       if (!step)
1087         continue;
1088
1089       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1090       base = expand_simple_operations (base);
1091       if (contains_abnormal_ssa_name_p (base)
1092           || contains_abnormal_ssa_name_p (step))
1093         continue;
1094
1095       type = TREE_TYPE (PHI_RESULT (phi));
1096       base = fold_convert (type, base);
1097       if (step)
1098         {
1099           if (POINTER_TYPE_P (type))
1100             step = convert_to_ptrofftype (step);
1101           else
1102             step = fold_convert (type, step);
1103         }
1104
1105       set_iv (data, PHI_RESULT (phi), base, step);
1106       found = true;
1107     }
1108
1109   return found;
1110 }
1111
1112 /* Marks basic ivs.  */
1113
1114 static void
1115 mark_bivs (struct ivopts_data *data)
1116 {
1117   gimple phi, def;
1118   tree var;
1119   struct iv *iv, *incr_iv;
1120   struct loop *loop = data->current_loop;
1121   basic_block incr_bb;
1122   gimple_stmt_iterator psi;
1123
1124   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1125     {
1126       phi = gsi_stmt (psi);
1127
1128       iv = get_iv (data, PHI_RESULT (phi));
1129       if (!iv)
1130         continue;
1131
1132       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1133       def = SSA_NAME_DEF_STMT (var);
1134       /* Don't mark iv peeled from other one as biv.  */
1135       if (def
1136           && gimple_code (def) == GIMPLE_PHI
1137           && gimple_bb (def) == loop->header)
1138         continue;
1139
1140       incr_iv = get_iv (data, var);
1141       if (!incr_iv)
1142         continue;
1143
1144       /* If the increment is in the subloop, ignore it.  */
1145       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1146       if (incr_bb->loop_father != data->current_loop
1147           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1148         continue;
1149
1150       iv->biv_p = true;
1151       incr_iv->biv_p = true;
1152     }
1153 }
1154
1155 /* Checks whether STMT defines a linear induction variable and stores its
1156    parameters to IV.  */
1157
1158 static bool
1159 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1160 {
1161   tree lhs;
1162   struct loop *loop = data->current_loop;
1163
1164   iv->base = NULL_TREE;
1165   iv->step = NULL_TREE;
1166
1167   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1168     return false;
1169
1170   lhs = gimple_assign_lhs (stmt);
1171   if (TREE_CODE (lhs) != SSA_NAME)
1172     return false;
1173
1174   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1175     return false;
1176   iv->base = expand_simple_operations (iv->base);
1177
1178   if (contains_abnormal_ssa_name_p (iv->base)
1179       || contains_abnormal_ssa_name_p (iv->step))
1180     return false;
1181
1182   /* If STMT could throw, then do not consider STMT as defining a GIV.
1183      While this will suppress optimizations, we can not safely delete this
1184      GIV and associated statements, even if it appears it is not used.  */
1185   if (stmt_could_throw_p (stmt))
1186     return false;
1187
1188   return true;
1189 }
1190
1191 /* Finds general ivs in statement STMT.  */
1192
1193 static void
1194 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1195 {
1196   affine_iv iv;
1197
1198   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1199     return;
1200
1201   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1202 }
1203
1204 /* Finds general ivs in basic block BB.  */
1205
1206 static void
1207 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1208 {
1209   gimple_stmt_iterator bsi;
1210
1211   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1212     find_givs_in_stmt (data, gsi_stmt (bsi));
1213 }
1214
1215 /* Finds general ivs.  */
1216
1217 static void
1218 find_givs (struct ivopts_data *data)
1219 {
1220   struct loop *loop = data->current_loop;
1221   basic_block *body = get_loop_body_in_dom_order (loop);
1222   unsigned i;
1223
1224   for (i = 0; i < loop->num_nodes; i++)
1225     find_givs_in_bb (data, body[i]);
1226   free (body);
1227 }
1228
1229 /* For each ssa name defined in LOOP determines whether it is an induction
1230    variable and if so, its initial value and step.  */
1231
1232 static bool
1233 find_induction_variables (struct ivopts_data *data)
1234 {
1235   unsigned i;
1236   bitmap_iterator bi;
1237
1238   if (!find_bivs (data))
1239     return false;
1240
1241   find_givs (data);
1242   mark_bivs (data);
1243
1244   if (dump_file && (dump_flags & TDF_DETAILS))
1245     {
1246       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1247
1248       if (niter)
1249         {
1250           fprintf (dump_file, "  number of iterations ");
1251           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1252           if (!integer_zerop (niter->may_be_zero))
1253             {
1254               fprintf (dump_file, "; zero if ");
1255               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1256             }
1257           fprintf (dump_file, "\n\n");
1258         };
1259
1260       fprintf (dump_file, "Induction variables:\n\n");
1261
1262       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1263         {
1264           if (ver_info (data, i)->iv)
1265             dump_iv (dump_file, ver_info (data, i)->iv);
1266         }
1267     }
1268
1269   return true;
1270 }
1271
1272 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1273
1274 static struct iv_use *
1275 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1276             gimple stmt, enum use_type use_type)
1277 {
1278   struct iv_use *use = XCNEW (struct iv_use);
1279
1280   use->id = n_iv_uses (data);
1281   use->type = use_type;
1282   use->iv = iv;
1283   use->stmt = stmt;
1284   use->op_p = use_p;
1285   use->related_cands = BITMAP_ALLOC (NULL);
1286
1287   /* To avoid showing ssa name in the dumps, if it was not reset by the
1288      caller.  */
1289   iv->ssa_name = NULL_TREE;
1290
1291   if (dump_file && (dump_flags & TDF_DETAILS))
1292     dump_use (dump_file, use);
1293
1294   data->iv_uses.safe_push (use);
1295
1296   return use;
1297 }
1298
1299 /* Checks whether OP is a loop-level invariant and if so, records it.
1300    NONLINEAR_USE is true if the invariant is used in a way we do not
1301    handle specially.  */
1302
1303 static void
1304 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1305 {
1306   basic_block bb;
1307   struct version_info *info;
1308
1309   if (TREE_CODE (op) != SSA_NAME
1310       || virtual_operand_p (op))
1311     return;
1312
1313   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1314   if (bb
1315       && flow_bb_inside_loop_p (data->current_loop, bb))
1316     return;
1317
1318   info = name_info (data, op);
1319   info->name = op;
1320   info->has_nonlin_use |= nonlinear_use;
1321   if (!info->inv_id)
1322     info->inv_id = ++data->max_inv_id;
1323   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1324 }
1325
1326 /* Checks whether the use OP is interesting and if so, records it.  */
1327
1328 static struct iv_use *
1329 find_interesting_uses_op (struct ivopts_data *data, tree op)
1330 {
1331   struct iv *iv;
1332   struct iv *civ;
1333   gimple stmt;
1334   struct iv_use *use;
1335
1336   if (TREE_CODE (op) != SSA_NAME)
1337     return NULL;
1338
1339   iv = get_iv (data, op);
1340   if (!iv)
1341     return NULL;
1342
1343   if (iv->have_use_for)
1344     {
1345       use = iv_use (data, iv->use_id);
1346
1347       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1348       return use;
1349     }
1350
1351   if (integer_zerop (iv->step))
1352     {
1353       record_invariant (data, op, true);
1354       return NULL;
1355     }
1356   iv->have_use_for = true;
1357
1358   civ = XNEW (struct iv);
1359   *civ = *iv;
1360
1361   stmt = SSA_NAME_DEF_STMT (op);
1362   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1363               || is_gimple_assign (stmt));
1364
1365   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1366   iv->use_id = use->id;
1367
1368   return use;
1369 }
1370
1371 /* Given a condition in statement STMT, checks whether it is a compare
1372    of an induction variable and an invariant.  If this is the case,
1373    CONTROL_VAR is set to location of the iv, BOUND to the location of
1374    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1375    induction variable descriptions, and true is returned.  If this is not
1376    the case, CONTROL_VAR and BOUND are set to the arguments of the
1377    condition and false is returned.  */
1378
1379 static bool
1380 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1381                        tree **control_var, tree **bound,
1382                        struct iv **iv_var, struct iv **iv_bound)
1383 {
1384   /* The objects returned when COND has constant operands.  */
1385   static struct iv const_iv;
1386   static tree zero;
1387   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1388   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1389   bool ret = false;
1390
1391   if (gimple_code (stmt) == GIMPLE_COND)
1392     {
1393       op0 = gimple_cond_lhs_ptr (stmt);
1394       op1 = gimple_cond_rhs_ptr (stmt);
1395     }
1396   else
1397     {
1398       op0 = gimple_assign_rhs1_ptr (stmt);
1399       op1 = gimple_assign_rhs2_ptr (stmt);
1400     }
1401
1402   zero = integer_zero_node;
1403   const_iv.step = integer_zero_node;
1404
1405   if (TREE_CODE (*op0) == SSA_NAME)
1406     iv0 = get_iv (data, *op0);
1407   if (TREE_CODE (*op1) == SSA_NAME)
1408     iv1 = get_iv (data, *op1);
1409
1410   /* Exactly one of the compared values must be an iv, and the other one must
1411      be an invariant.  */
1412   if (!iv0 || !iv1)
1413     goto end;
1414
1415   if (integer_zerop (iv0->step))
1416     {
1417       /* Control variable may be on the other side.  */
1418       tmp_op = op0; op0 = op1; op1 = tmp_op;
1419       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1420     }
1421   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1422
1423 end:
1424   if (control_var)
1425     *control_var = op0;;
1426   if (iv_var)
1427     *iv_var = iv0;;
1428   if (bound)
1429     *bound = op1;
1430   if (iv_bound)
1431     *iv_bound = iv1;
1432
1433   return ret;
1434 }
1435
1436 /* Checks whether the condition in STMT is interesting and if so,
1437    records it.  */
1438
1439 static void
1440 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1441 {
1442   tree *var_p, *bound_p;
1443   struct iv *var_iv, *civ;
1444
1445   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1446     {
1447       find_interesting_uses_op (data, *var_p);
1448       find_interesting_uses_op (data, *bound_p);
1449       return;
1450     }
1451
1452   civ = XNEW (struct iv);
1453   *civ = *var_iv;
1454   record_use (data, NULL, civ, stmt, USE_COMPARE);
1455 }
1456
1457 /* Returns the outermost loop EXPR is obviously invariant in
1458    relative to the loop LOOP, i.e. if all its operands are defined
1459    outside of the returned loop.  Returns NULL if EXPR is not
1460    even obviously invariant in LOOP.  */
1461
1462 struct loop *
1463 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1464 {
1465   basic_block def_bb;
1466   unsigned i, len;
1467
1468   if (is_gimple_min_invariant (expr))
1469     return current_loops->tree_root;
1470
1471   if (TREE_CODE (expr) == SSA_NAME)
1472     {
1473       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1474       if (def_bb)
1475         {
1476           if (flow_bb_inside_loop_p (loop, def_bb))
1477             return NULL;
1478           return superloop_at_depth (loop,
1479                                      loop_depth (def_bb->loop_father) + 1);
1480         }
1481
1482       return current_loops->tree_root;
1483     }
1484
1485   if (!EXPR_P (expr))
1486     return NULL;
1487
1488   unsigned maxdepth = 0;
1489   len = TREE_OPERAND_LENGTH (expr);
1490   for (i = 0; i < len; i++)
1491     {
1492       struct loop *ivloop;
1493       if (!TREE_OPERAND (expr, i))
1494         continue;
1495
1496       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1497       if (!ivloop)
1498         return NULL;
1499       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1500     }
1501
1502   return superloop_at_depth (loop, maxdepth);
1503 }
1504
1505 /* Returns true if expression EXPR is obviously invariant in LOOP,
1506    i.e. if all its operands are defined outside of the LOOP.  LOOP
1507    should not be the function body.  */
1508
1509 bool
1510 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1511 {
1512   basic_block def_bb;
1513   unsigned i, len;
1514
1515   gcc_assert (loop_depth (loop) > 0);
1516
1517   if (is_gimple_min_invariant (expr))
1518     return true;
1519
1520   if (TREE_CODE (expr) == SSA_NAME)
1521     {
1522       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1523       if (def_bb
1524           && flow_bb_inside_loop_p (loop, def_bb))
1525         return false;
1526
1527       return true;
1528     }
1529
1530   if (!EXPR_P (expr))
1531     return false;
1532
1533   len = TREE_OPERAND_LENGTH (expr);
1534   for (i = 0; i < len; i++)
1535     if (TREE_OPERAND (expr, i)
1536         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1537       return false;
1538
1539   return true;
1540 }
1541
1542 /* Cumulates the steps of indices into DATA and replaces their values with the
1543    initial ones.  Returns false when the value of the index cannot be determined.
1544    Callback for for_each_index.  */
1545
1546 struct ifs_ivopts_data
1547 {
1548   struct ivopts_data *ivopts_data;
1549   gimple stmt;
1550   tree step;
1551 };
1552
1553 static bool
1554 idx_find_step (tree base, tree *idx, void *data)
1555 {
1556   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1557   struct iv *iv;
1558   tree step, iv_base, iv_step, lbound, off;
1559   struct loop *loop = dta->ivopts_data->current_loop;
1560
1561   /* If base is a component ref, require that the offset of the reference
1562      be invariant.  */
1563   if (TREE_CODE (base) == COMPONENT_REF)
1564     {
1565       off = component_ref_field_offset (base);
1566       return expr_invariant_in_loop_p (loop, off);
1567     }
1568
1569   /* If base is array, first check whether we will be able to move the
1570      reference out of the loop (in order to take its address in strength
1571      reduction).  In order for this to work we need both lower bound
1572      and step to be loop invariants.  */
1573   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1574     {
1575       /* Moreover, for a range, the size needs to be invariant as well.  */
1576       if (TREE_CODE (base) == ARRAY_RANGE_REF
1577           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1578         return false;
1579
1580       step = array_ref_element_size (base);
1581       lbound = array_ref_low_bound (base);
1582
1583       if (!expr_invariant_in_loop_p (loop, step)
1584           || !expr_invariant_in_loop_p (loop, lbound))
1585         return false;
1586     }
1587
1588   if (TREE_CODE (*idx) != SSA_NAME)
1589     return true;
1590
1591   iv = get_iv (dta->ivopts_data, *idx);
1592   if (!iv)
1593     return false;
1594
1595   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1596           *&x[0], which is not folded and does not trigger the
1597           ARRAY_REF path below.  */
1598   *idx = iv->base;
1599
1600   if (integer_zerop (iv->step))
1601     return true;
1602
1603   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1604     {
1605       step = array_ref_element_size (base);
1606
1607       /* We only handle addresses whose step is an integer constant.  */
1608       if (TREE_CODE (step) != INTEGER_CST)
1609         return false;
1610     }
1611   else
1612     /* The step for pointer arithmetics already is 1 byte.  */
1613     step = size_one_node;
1614
1615   iv_base = iv->base;
1616   iv_step = iv->step;
1617   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1618                             sizetype, &iv_base, &iv_step, dta->stmt,
1619                             false))
1620     {
1621       /* The index might wrap.  */
1622       return false;
1623     }
1624
1625   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1626   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1627
1628   return true;
1629 }
1630
1631 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1632    object is passed to it in DATA.  */
1633
1634 static bool
1635 idx_record_use (tree base, tree *idx,
1636                 void *vdata)
1637 {
1638   struct ivopts_data *data = (struct ivopts_data *) vdata;
1639   find_interesting_uses_op (data, *idx);
1640   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1641     {
1642       find_interesting_uses_op (data, array_ref_element_size (base));
1643       find_interesting_uses_op (data, array_ref_low_bound (base));
1644     }
1645   return true;
1646 }
1647
1648 /* If we can prove that TOP = cst * BOT for some constant cst,
1649    store cst to MUL and return true.  Otherwise return false.
1650    The returned value is always sign-extended, regardless of the
1651    signedness of TOP and BOT.  */
1652
1653 static bool
1654 constant_multiple_of (tree top, tree bot, widest_int *mul)
1655 {
1656   tree mby;
1657   enum tree_code code;
1658   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1659   widest_int res, p0, p1;
1660
1661   STRIP_NOPS (top);
1662   STRIP_NOPS (bot);
1663
1664   if (operand_equal_p (top, bot, 0))
1665     {
1666       *mul = 1;
1667       return true;
1668     }
1669
1670   code = TREE_CODE (top);
1671   switch (code)
1672     {
1673     case MULT_EXPR:
1674       mby = TREE_OPERAND (top, 1);
1675       if (TREE_CODE (mby) != INTEGER_CST)
1676         return false;
1677
1678       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1679         return false;
1680
1681       *mul = wi::sext (res * wi::to_widest (mby), precision);
1682       return true;
1683
1684     case PLUS_EXPR:
1685     case MINUS_EXPR:
1686       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1687           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1688         return false;
1689
1690       if (code == MINUS_EXPR)
1691         p1 = -p1;
1692       *mul = wi::sext (p0 + p1, precision);
1693       return true;
1694
1695     case INTEGER_CST:
1696       if (TREE_CODE (bot) != INTEGER_CST)
1697         return false;
1698
1699       p0 = widest_int::from (top, SIGNED);
1700       p1 = widest_int::from (bot, SIGNED);
1701       if (p1 == 0)
1702         return false;
1703       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1704       return res == 0;
1705
1706     default:
1707       return false;
1708     }
1709 }
1710
1711 /* Return true if memory reference REF with step STEP may be unaligned.  */
1712
1713 static bool
1714 may_be_unaligned_p (tree ref, tree step)
1715 {
1716   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1717      thus they are not misaligned.  */
1718   if (TREE_CODE (ref) == TARGET_MEM_REF)
1719     return false;
1720
1721   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1722   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1723     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1724
1725   unsigned HOST_WIDE_INT bitpos;
1726   unsigned int ref_align;
1727   get_object_alignment_1 (ref, &ref_align, &bitpos);
1728   if (ref_align < align
1729       || (bitpos % align) != 0
1730       || (bitpos % BITS_PER_UNIT) != 0)
1731     return true;
1732
1733   unsigned int trailing_zeros = tree_ctz (step);
1734   if (trailing_zeros < HOST_BITS_PER_INT
1735       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1736     return true;
1737
1738   return false;
1739 }
1740
1741 /* Return true if EXPR may be non-addressable.   */
1742
1743 bool
1744 may_be_nonaddressable_p (tree expr)
1745 {
1746   switch (TREE_CODE (expr))
1747     {
1748     case TARGET_MEM_REF:
1749       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1750          target, thus they are always addressable.  */
1751       return false;
1752
1753     case COMPONENT_REF:
1754       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1755              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1756
1757     case VIEW_CONVERT_EXPR:
1758       /* This kind of view-conversions may wrap non-addressable objects
1759          and make them look addressable.  After some processing the
1760          non-addressability may be uncovered again, causing ADDR_EXPRs
1761          of inappropriate objects to be built.  */
1762       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1763           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1764         return true;
1765
1766       /* ... fall through ... */
1767
1768     case ARRAY_REF:
1769     case ARRAY_RANGE_REF:
1770       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1771
1772     CASE_CONVERT:
1773       return true;
1774
1775     default:
1776       break;
1777     }
1778
1779   return false;
1780 }
1781
1782 /* Finds addresses in *OP_P inside STMT.  */
1783
1784 static void
1785 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1786 {
1787   tree base = *op_p, step = size_zero_node;
1788   struct iv *civ;
1789   struct ifs_ivopts_data ifs_ivopts_data;
1790
1791   /* Do not play with volatile memory references.  A bit too conservative,
1792      perhaps, but safe.  */
1793   if (gimple_has_volatile_ops (stmt))
1794     goto fail;
1795
1796   /* Ignore bitfields for now.  Not really something terribly complicated
1797      to handle.  TODO.  */
1798   if (TREE_CODE (base) == BIT_FIELD_REF)
1799     goto fail;
1800
1801   base = unshare_expr (base);
1802
1803   if (TREE_CODE (base) == TARGET_MEM_REF)
1804     {
1805       tree type = build_pointer_type (TREE_TYPE (base));
1806       tree astep;
1807
1808       if (TMR_BASE (base)
1809           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1810         {
1811           civ = get_iv (data, TMR_BASE (base));
1812           if (!civ)
1813             goto fail;
1814
1815           TMR_BASE (base) = civ->base;
1816           step = civ->step;
1817         }
1818       if (TMR_INDEX2 (base)
1819           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1820         {
1821           civ = get_iv (data, TMR_INDEX2 (base));
1822           if (!civ)
1823             goto fail;
1824
1825           TMR_INDEX2 (base) = civ->base;
1826           step = civ->step;
1827         }
1828       if (TMR_INDEX (base)
1829           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1830         {
1831           civ = get_iv (data, TMR_INDEX (base));
1832           if (!civ)
1833             goto fail;
1834
1835           TMR_INDEX (base) = civ->base;
1836           astep = civ->step;
1837
1838           if (astep)
1839             {
1840               if (TMR_STEP (base))
1841                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1842
1843               step = fold_build2 (PLUS_EXPR, type, step, astep);
1844             }
1845         }
1846
1847       if (integer_zerop (step))
1848         goto fail;
1849       base = tree_mem_ref_addr (type, base);
1850     }
1851   else
1852     {
1853       ifs_ivopts_data.ivopts_data = data;
1854       ifs_ivopts_data.stmt = stmt;
1855       ifs_ivopts_data.step = size_zero_node;
1856       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1857           || integer_zerop (ifs_ivopts_data.step))
1858         goto fail;
1859       step = ifs_ivopts_data.step;
1860
1861       /* Check that the base expression is addressable.  This needs
1862          to be done after substituting bases of IVs into it.  */
1863       if (may_be_nonaddressable_p (base))
1864         goto fail;
1865
1866       /* Moreover, on strict alignment platforms, check that it is
1867          sufficiently aligned.  */
1868       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1869         goto fail;
1870
1871       base = build_fold_addr_expr (base);
1872
1873       /* Substituting bases of IVs into the base expression might
1874          have caused folding opportunities.  */
1875       if (TREE_CODE (base) == ADDR_EXPR)
1876         {
1877           tree *ref = &TREE_OPERAND (base, 0);
1878           while (handled_component_p (*ref))
1879             ref = &TREE_OPERAND (*ref, 0);
1880           if (TREE_CODE (*ref) == MEM_REF)
1881             {
1882               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1883                                       TREE_OPERAND (*ref, 0),
1884                                       TREE_OPERAND (*ref, 1));
1885               if (tem)
1886                 *ref = tem;
1887             }
1888         }
1889     }
1890
1891   civ = alloc_iv (base, step);
1892   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1893   return;
1894
1895 fail:
1896   for_each_index (op_p, idx_record_use, data);
1897 }
1898
1899 /* Finds and records invariants used in STMT.  */
1900
1901 static void
1902 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1903 {
1904   ssa_op_iter iter;
1905   use_operand_p use_p;
1906   tree op;
1907
1908   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1909     {
1910       op = USE_FROM_PTR (use_p);
1911       record_invariant (data, op, false);
1912     }
1913 }
1914
1915 /* Finds interesting uses of induction variables in the statement STMT.  */
1916
1917 static void
1918 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1919 {
1920   struct iv *iv;
1921   tree op, *lhs, *rhs;
1922   ssa_op_iter iter;
1923   use_operand_p use_p;
1924   enum tree_code code;
1925
1926   find_invariants_stmt (data, stmt);
1927
1928   if (gimple_code (stmt) == GIMPLE_COND)
1929     {
1930       find_interesting_uses_cond (data, stmt);
1931       return;
1932     }
1933
1934   if (is_gimple_assign (stmt))
1935     {
1936       lhs = gimple_assign_lhs_ptr (stmt);
1937       rhs = gimple_assign_rhs1_ptr (stmt);
1938
1939       if (TREE_CODE (*lhs) == SSA_NAME)
1940         {
1941           /* If the statement defines an induction variable, the uses are not
1942              interesting by themselves.  */
1943
1944           iv = get_iv (data, *lhs);
1945
1946           if (iv && !integer_zerop (iv->step))
1947             return;
1948         }
1949
1950       code = gimple_assign_rhs_code (stmt);
1951       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1952           && (REFERENCE_CLASS_P (*rhs)
1953               || is_gimple_val (*rhs)))
1954         {
1955           if (REFERENCE_CLASS_P (*rhs))
1956             find_interesting_uses_address (data, stmt, rhs);
1957           else
1958             find_interesting_uses_op (data, *rhs);
1959
1960           if (REFERENCE_CLASS_P (*lhs))
1961             find_interesting_uses_address (data, stmt, lhs);
1962           return;
1963         }
1964       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1965         {
1966           find_interesting_uses_cond (data, stmt);
1967           return;
1968         }
1969
1970       /* TODO -- we should also handle address uses of type
1971
1972          memory = call (whatever);
1973
1974          and
1975
1976          call (memory).  */
1977     }
1978
1979   if (gimple_code (stmt) == GIMPLE_PHI
1980       && gimple_bb (stmt) == data->current_loop->header)
1981     {
1982       iv = get_iv (data, PHI_RESULT (stmt));
1983
1984       if (iv && !integer_zerop (iv->step))
1985         return;
1986     }
1987
1988   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1989     {
1990       op = USE_FROM_PTR (use_p);
1991
1992       if (TREE_CODE (op) != SSA_NAME)
1993         continue;
1994
1995       iv = get_iv (data, op);
1996       if (!iv)
1997         continue;
1998
1999       find_interesting_uses_op (data, op);
2000     }
2001 }
2002
2003 /* Finds interesting uses of induction variables outside of loops
2004    on loop exit edge EXIT.  */
2005
2006 static void
2007 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2008 {
2009   gimple phi;
2010   gimple_stmt_iterator psi;
2011   tree def;
2012
2013   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2014     {
2015       phi = gsi_stmt (psi);
2016       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2017       if (!virtual_operand_p (def))
2018         find_interesting_uses_op (data, def);
2019     }
2020 }
2021
2022 /* Finds uses of the induction variables that are interesting.  */
2023
2024 static void
2025 find_interesting_uses (struct ivopts_data *data)
2026 {
2027   basic_block bb;
2028   gimple_stmt_iterator bsi;
2029   basic_block *body = get_loop_body (data->current_loop);
2030   unsigned i;
2031   struct version_info *info;
2032   edge e;
2033
2034   if (dump_file && (dump_flags & TDF_DETAILS))
2035     fprintf (dump_file, "Uses:\n\n");
2036
2037   for (i = 0; i < data->current_loop->num_nodes; i++)
2038     {
2039       edge_iterator ei;
2040       bb = body[i];
2041
2042       FOR_EACH_EDGE (e, ei, bb->succs)
2043         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2044             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2045           find_interesting_uses_outside (data, e);
2046
2047       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2048         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2049       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2050         if (!is_gimple_debug (gsi_stmt (bsi)))
2051           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2052     }
2053
2054   if (dump_file && (dump_flags & TDF_DETAILS))
2055     {
2056       bitmap_iterator bi;
2057
2058       fprintf (dump_file, "\n");
2059
2060       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2061         {
2062           info = ver_info (data, i);
2063           if (info->inv_id)
2064             {
2065               fprintf (dump_file, "  ");
2066               print_generic_expr (dump_file, info->name, TDF_SLIM);
2067               fprintf (dump_file, " is invariant (%d)%s\n",
2068                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2069             }
2070         }
2071
2072       fprintf (dump_file, "\n");
2073     }
2074
2075   free (body);
2076 }
2077
2078 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2079    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2080    we are at the top-level of the processed address.  */
2081
2082 static tree
2083 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2084                 HOST_WIDE_INT *offset)
2085 {
2086   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2087   enum tree_code code;
2088   tree type, orig_type = TREE_TYPE (expr);
2089   HOST_WIDE_INT off0, off1, st;
2090   tree orig_expr = expr;
2091
2092   STRIP_NOPS (expr);
2093
2094   type = TREE_TYPE (expr);
2095   code = TREE_CODE (expr);
2096   *offset = 0;
2097
2098   switch (code)
2099     {
2100     case INTEGER_CST:
2101       if (!cst_and_fits_in_hwi (expr)
2102           || integer_zerop (expr))
2103         return orig_expr;
2104
2105       *offset = int_cst_value (expr);
2106       return build_int_cst (orig_type, 0);
2107
2108     case POINTER_PLUS_EXPR:
2109     case PLUS_EXPR:
2110     case MINUS_EXPR:
2111       op0 = TREE_OPERAND (expr, 0);
2112       op1 = TREE_OPERAND (expr, 1);
2113
2114       op0 = strip_offset_1 (op0, false, false, &off0);
2115       op1 = strip_offset_1 (op1, false, false, &off1);
2116
2117       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2118       if (op0 == TREE_OPERAND (expr, 0)
2119           && op1 == TREE_OPERAND (expr, 1))
2120         return orig_expr;
2121
2122       if (integer_zerop (op1))
2123         expr = op0;
2124       else if (integer_zerop (op0))
2125         {
2126           if (code == MINUS_EXPR)
2127             expr = fold_build1 (NEGATE_EXPR, type, op1);
2128           else
2129             expr = op1;
2130         }
2131       else
2132         expr = fold_build2 (code, type, op0, op1);
2133
2134       return fold_convert (orig_type, expr);
2135
2136     case MULT_EXPR:
2137       op1 = TREE_OPERAND (expr, 1);
2138       if (!cst_and_fits_in_hwi (op1))
2139         return orig_expr;
2140
2141       op0 = TREE_OPERAND (expr, 0);
2142       op0 = strip_offset_1 (op0, false, false, &off0);
2143       if (op0 == TREE_OPERAND (expr, 0))
2144         return orig_expr;
2145
2146       *offset = off0 * int_cst_value (op1);
2147       if (integer_zerop (op0))
2148         expr = op0;
2149       else
2150         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2151
2152       return fold_convert (orig_type, expr);
2153
2154     case ARRAY_REF:
2155     case ARRAY_RANGE_REF:
2156       if (!inside_addr)
2157         return orig_expr;
2158
2159       step = array_ref_element_size (expr);
2160       if (!cst_and_fits_in_hwi (step))
2161         break;
2162
2163       st = int_cst_value (step);
2164       op1 = TREE_OPERAND (expr, 1);
2165       op1 = strip_offset_1 (op1, false, false, &off1);
2166       *offset = off1 * st;
2167
2168       if (top_compref
2169           && integer_zerop (op1))
2170         {
2171           /* Strip the component reference completely.  */
2172           op0 = TREE_OPERAND (expr, 0);
2173           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2174           *offset += off0;
2175           return op0;
2176         }
2177       break;
2178
2179     case COMPONENT_REF:
2180       {
2181         tree field;
2182
2183         if (!inside_addr)
2184           return orig_expr;
2185
2186         tmp = component_ref_field_offset (expr);
2187         field = TREE_OPERAND (expr, 1);
2188         if (top_compref
2189             && cst_and_fits_in_hwi (tmp)
2190             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2191           {
2192             HOST_WIDE_INT boffset, abs_off;
2193
2194             /* Strip the component reference completely.  */
2195             op0 = TREE_OPERAND (expr, 0);
2196             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2197             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2198             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2199             if (boffset < 0)
2200               abs_off = -abs_off;
2201
2202             *offset = off0 + int_cst_value (tmp) + abs_off;
2203             return op0;
2204           }
2205       }
2206       break;
2207
2208     case ADDR_EXPR:
2209       op0 = TREE_OPERAND (expr, 0);
2210       op0 = strip_offset_1 (op0, true, true, &off0);
2211       *offset += off0;
2212
2213       if (op0 == TREE_OPERAND (expr, 0))
2214         return orig_expr;
2215
2216       expr = build_fold_addr_expr (op0);
2217       return fold_convert (orig_type, expr);
2218
2219     case MEM_REF:
2220       /* ???  Offset operand?  */
2221       inside_addr = false;
2222       break;
2223
2224     default:
2225       return orig_expr;
2226     }
2227
2228   /* Default handling of expressions for that we want to recurse into
2229      the first operand.  */
2230   op0 = TREE_OPERAND (expr, 0);
2231   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2232   *offset += off0;
2233
2234   if (op0 == TREE_OPERAND (expr, 0)
2235       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2236     return orig_expr;
2237
2238   expr = copy_node (expr);
2239   TREE_OPERAND (expr, 0) = op0;
2240   if (op1)
2241     TREE_OPERAND (expr, 1) = op1;
2242
2243   /* Inside address, we might strip the top level component references,
2244      thus changing type of the expression.  Handling of ADDR_EXPR
2245      will fix that.  */
2246   expr = fold_convert (orig_type, expr);
2247
2248   return expr;
2249 }
2250
2251 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2252
2253 static tree
2254 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2255 {
2256   HOST_WIDE_INT off;
2257   tree core = strip_offset_1 (expr, false, false, &off);
2258   *offset = off;
2259   return core;
2260 }
2261
2262 /* Returns variant of TYPE that can be used as base for different uses.
2263    We return unsigned type with the same precision, which avoids problems
2264    with overflows.  */
2265
2266 static tree
2267 generic_type_for (tree type)
2268 {
2269   if (POINTER_TYPE_P (type))
2270     return unsigned_type_for (type);
2271
2272   if (TYPE_UNSIGNED (type))
2273     return type;
2274
2275   return unsigned_type_for (type);
2276 }
2277
2278 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2279    the bitmap to that we should store it.  */
2280
2281 static struct ivopts_data *fd_ivopts_data;
2282 static tree
2283 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2284 {
2285   bitmap *depends_on = (bitmap *) data;
2286   struct version_info *info;
2287
2288   if (TREE_CODE (*expr_p) != SSA_NAME)
2289     return NULL_TREE;
2290   info = name_info (fd_ivopts_data, *expr_p);
2291
2292   if (!info->inv_id || info->has_nonlin_use)
2293     return NULL_TREE;
2294
2295   if (!*depends_on)
2296     *depends_on = BITMAP_ALLOC (NULL);
2297   bitmap_set_bit (*depends_on, info->inv_id);
2298
2299   return NULL_TREE;
2300 }
2301
2302 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2303    position to POS.  If USE is not NULL, the candidate is set as related to
2304    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2305    replacement of the final value of the iv by a direct computation.  */
2306
2307 static struct iv_cand *
2308 add_candidate_1 (struct ivopts_data *data,
2309                  tree base, tree step, bool important, enum iv_position pos,
2310                  struct iv_use *use, gimple incremented_at)
2311 {
2312   unsigned i;
2313   struct iv_cand *cand = NULL;
2314   tree type, orig_type;
2315
2316   /* For non-original variables, make sure their values are computed in a type
2317      that does not invoke undefined behavior on overflows (since in general,
2318      we cannot prove that these induction variables are non-wrapping).  */
2319   if (pos != IP_ORIGINAL)
2320     {
2321       orig_type = TREE_TYPE (base);
2322       type = generic_type_for (orig_type);
2323       if (type != orig_type)
2324         {
2325           base = fold_convert (type, base);
2326           step = fold_convert (type, step);
2327         }
2328     }
2329
2330   for (i = 0; i < n_iv_cands (data); i++)
2331     {
2332       cand = iv_cand (data, i);
2333
2334       if (cand->pos != pos)
2335         continue;
2336
2337       if (cand->incremented_at != incremented_at
2338           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2339               && cand->ainc_use != use))
2340         continue;
2341
2342       if (!cand->iv)
2343         {
2344           if (!base && !step)
2345             break;
2346
2347           continue;
2348         }
2349
2350       if (!base && !step)
2351         continue;
2352
2353       if (operand_equal_p (base, cand->iv->base, 0)
2354           && operand_equal_p (step, cand->iv->step, 0)
2355           && (TYPE_PRECISION (TREE_TYPE (base))
2356               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2357         break;
2358     }
2359
2360   if (i == n_iv_cands (data))
2361     {
2362       cand = XCNEW (struct iv_cand);
2363       cand->id = i;
2364
2365       if (!base && !step)
2366         cand->iv = NULL;
2367       else
2368         cand->iv = alloc_iv (base, step);
2369
2370       cand->pos = pos;
2371       if (pos != IP_ORIGINAL && cand->iv)
2372         {
2373           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2374           cand->var_after = cand->var_before;
2375         }
2376       cand->important = important;
2377       cand->incremented_at = incremented_at;
2378       data->iv_candidates.safe_push (cand);
2379
2380       if (step
2381           && TREE_CODE (step) != INTEGER_CST)
2382         {
2383           fd_ivopts_data = data;
2384           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2385         }
2386
2387       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2388         cand->ainc_use = use;
2389       else
2390         cand->ainc_use = NULL;
2391
2392       if (dump_file && (dump_flags & TDF_DETAILS))
2393         dump_cand (dump_file, cand);
2394     }
2395
2396   if (important && !cand->important)
2397     {
2398       cand->important = true;
2399       if (dump_file && (dump_flags & TDF_DETAILS))
2400         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2401     }
2402
2403   if (use)
2404     {
2405       bitmap_set_bit (use->related_cands, i);
2406       if (dump_file && (dump_flags & TDF_DETAILS))
2407         fprintf (dump_file, "Candidate %d is related to use %d\n",
2408                  cand->id, use->id);
2409     }
2410
2411   return cand;
2412 }
2413
2414 /* Returns true if incrementing the induction variable at the end of the LOOP
2415    is allowed.
2416
2417    The purpose is to avoid splitting latch edge with a biv increment, thus
2418    creating a jump, possibly confusing other optimization passes and leaving
2419    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2420    is not available (so we do not have a better alternative), or if the latch
2421    edge is already nonempty.  */
2422
2423 static bool
2424 allow_ip_end_pos_p (struct loop *loop)
2425 {
2426   if (!ip_normal_pos (loop))
2427     return true;
2428
2429   if (!empty_block_p (ip_end_pos (loop)))
2430     return true;
2431
2432   return false;
2433 }
2434
2435 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2436    Important field is set to IMPORTANT.  */
2437
2438 static void
2439 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2440                         bool important, struct iv_use *use)
2441 {
2442   basic_block use_bb = gimple_bb (use->stmt);
2443   machine_mode mem_mode;
2444   unsigned HOST_WIDE_INT cstepi;
2445
2446   /* If we insert the increment in any position other than the standard
2447      ones, we must ensure that it is incremented once per iteration.
2448      It must not be in an inner nested loop, or one side of an if
2449      statement.  */
2450   if (use_bb->loop_father != data->current_loop
2451       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2452       || stmt_could_throw_p (use->stmt)
2453       || !cst_and_fits_in_hwi (step))
2454     return;
2455
2456   cstepi = int_cst_value (step);
2457
2458   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2459   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2460         || USE_STORE_PRE_INCREMENT (mem_mode))
2461        && GET_MODE_SIZE (mem_mode) == cstepi)
2462       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2463            || USE_STORE_PRE_DECREMENT (mem_mode))
2464           && GET_MODE_SIZE (mem_mode) == -cstepi))
2465     {
2466       enum tree_code code = MINUS_EXPR;
2467       tree new_base;
2468       tree new_step = step;
2469
2470       if (POINTER_TYPE_P (TREE_TYPE (base)))
2471         {
2472           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2473           code = POINTER_PLUS_EXPR;
2474         }
2475       else
2476         new_step = fold_convert (TREE_TYPE (base), new_step);
2477       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2478       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2479                        use->stmt);
2480     }
2481   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2482         || USE_STORE_POST_INCREMENT (mem_mode))
2483        && GET_MODE_SIZE (mem_mode) == cstepi)
2484       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2485            || USE_STORE_POST_DECREMENT (mem_mode))
2486           && GET_MODE_SIZE (mem_mode) == -cstepi))
2487     {
2488       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2489                        use->stmt);
2490     }
2491 }
2492
2493 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2494    position to POS.  If USE is not NULL, the candidate is set as related to
2495    it.  The candidate computation is scheduled on all available positions.  */
2496
2497 static void
2498 add_candidate (struct ivopts_data *data,
2499                tree base, tree step, bool important, struct iv_use *use)
2500 {
2501   if (ip_normal_pos (data->current_loop))
2502     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2503   if (ip_end_pos (data->current_loop)
2504       && allow_ip_end_pos_p (data->current_loop))
2505     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2506
2507   if (use != NULL && use->type == USE_ADDRESS)
2508     add_autoinc_candidates (data, base, step, important, use);
2509 }
2510
2511 /* Adds standard iv candidates.  */
2512
2513 static void
2514 add_standard_iv_candidates (struct ivopts_data *data)
2515 {
2516   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2517
2518   /* The same for a double-integer type if it is still fast enough.  */
2519   if (TYPE_PRECISION
2520         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2521       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2522     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2523                    build_int_cst (long_integer_type_node, 1), true, NULL);
2524
2525   /* The same for a double-integer type if it is still fast enough.  */
2526   if (TYPE_PRECISION
2527         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2528       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2529     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2530                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2531 }
2532
2533
2534 /* Adds candidates bases on the old induction variable IV.  */
2535
2536 static void
2537 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2538 {
2539   gimple phi;
2540   tree def;
2541   struct iv_cand *cand;
2542
2543   add_candidate (data, iv->base, iv->step, true, NULL);
2544
2545   /* The same, but with initial value zero.  */
2546   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2547     add_candidate (data, size_int (0), iv->step, true, NULL);
2548   else
2549     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2550                    iv->step, true, NULL);
2551
2552   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2553   if (gimple_code (phi) == GIMPLE_PHI)
2554     {
2555       /* Additionally record the possibility of leaving the original iv
2556          untouched.  */
2557       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2558       /* Don't add candidate if it's from another PHI node because
2559          it's an affine iv appearing in the form of PEELED_CHREC.  */
2560       phi = SSA_NAME_DEF_STMT (def);
2561       if (gimple_code (phi) != GIMPLE_PHI)
2562         {
2563           cand = add_candidate_1 (data,
2564                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2565                                   SSA_NAME_DEF_STMT (def));
2566           cand->var_before = iv->ssa_name;
2567           cand->var_after = def;
2568         }
2569       else
2570         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2571     }
2572 }
2573
2574 /* Adds candidates based on the old induction variables.  */
2575
2576 static void
2577 add_old_ivs_candidates (struct ivopts_data *data)
2578 {
2579   unsigned i;
2580   struct iv *iv;
2581   bitmap_iterator bi;
2582
2583   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2584     {
2585       iv = ver_info (data, i)->iv;
2586       if (iv && iv->biv_p && !integer_zerop (iv->step))
2587         add_old_iv_candidates (data, iv);
2588     }
2589 }
2590
2591 /* Adds candidates based on the value of the induction variable IV and USE.  */
2592
2593 static void
2594 add_iv_value_candidates (struct ivopts_data *data,
2595                          struct iv *iv, struct iv_use *use)
2596 {
2597   unsigned HOST_WIDE_INT offset;
2598   tree base;
2599   tree basetype;
2600
2601   add_candidate (data, iv->base, iv->step, false, use);
2602
2603   /* The same, but with initial value zero.  Make such variable important,
2604      since it is generic enough so that possibly many uses may be based
2605      on it.  */
2606   basetype = TREE_TYPE (iv->base);
2607   if (POINTER_TYPE_P (basetype))
2608     basetype = sizetype;
2609   add_candidate (data, build_int_cst (basetype, 0),
2610                  iv->step, true, use);
2611
2612   /* Third, try removing the constant offset.  Make sure to even
2613      add a candidate for &a[0] vs. (T *)&a.  */
2614   base = strip_offset (iv->base, &offset);
2615   if (offset
2616       || base != iv->base)
2617     add_candidate (data, base, iv->step, false, use);
2618 }
2619
2620 /* Adds candidates based on the uses.  */
2621
2622 static void
2623 add_derived_ivs_candidates (struct ivopts_data *data)
2624 {
2625   unsigned i;
2626
2627   for (i = 0; i < n_iv_uses (data); i++)
2628     {
2629       struct iv_use *use = iv_use (data, i);
2630
2631       if (!use)
2632         continue;
2633
2634       switch (use->type)
2635         {
2636         case USE_NONLINEAR_EXPR:
2637         case USE_COMPARE:
2638         case USE_ADDRESS:
2639           /* Just add the ivs based on the value of the iv used here.  */
2640           add_iv_value_candidates (data, use->iv, use);
2641           break;
2642
2643         default:
2644           gcc_unreachable ();
2645         }
2646     }
2647 }
2648
2649 /* Record important candidates and add them to related_cands bitmaps
2650    if needed.  */
2651
2652 static void
2653 record_important_candidates (struct ivopts_data *data)
2654 {
2655   unsigned i;
2656   struct iv_use *use;
2657
2658   for (i = 0; i < n_iv_cands (data); i++)
2659     {
2660       struct iv_cand *cand = iv_cand (data, i);
2661
2662       if (cand->important)
2663         bitmap_set_bit (data->important_candidates, i);
2664     }
2665
2666   data->consider_all_candidates = (n_iv_cands (data)
2667                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2668
2669   if (data->consider_all_candidates)
2670     {
2671       /* We will not need "related_cands" bitmaps in this case,
2672          so release them to decrease peak memory consumption.  */
2673       for (i = 0; i < n_iv_uses (data); i++)
2674         {
2675           use = iv_use (data, i);
2676           BITMAP_FREE (use->related_cands);
2677         }
2678     }
2679   else
2680     {
2681       /* Add important candidates to the related_cands bitmaps.  */
2682       for (i = 0; i < n_iv_uses (data); i++)
2683         bitmap_ior_into (iv_use (data, i)->related_cands,
2684                          data->important_candidates);
2685     }
2686 }
2687
2688 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2689    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2690    we allocate a simple list to every use.  */
2691
2692 static void
2693 alloc_use_cost_map (struct ivopts_data *data)
2694 {
2695   unsigned i, size, s;
2696
2697   for (i = 0; i < n_iv_uses (data); i++)
2698     {
2699       struct iv_use *use = iv_use (data, i);
2700
2701       if (data->consider_all_candidates)
2702         size = n_iv_cands (data);
2703       else
2704         {
2705           s = bitmap_count_bits (use->related_cands);
2706
2707           /* Round up to the power of two, so that moduling by it is fast.  */
2708           size = s ? (1 << ceil_log2 (s)) : 1;
2709         }
2710
2711       use->n_map_members = size;
2712       use->cost_map = XCNEWVEC (struct cost_pair, size);
2713     }
2714 }
2715
2716 /* Returns description of computation cost of expression whose runtime
2717    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2718
2719 static comp_cost
2720 new_cost (unsigned runtime, unsigned complexity)
2721 {
2722   comp_cost cost;
2723
2724   cost.cost = runtime;
2725   cost.complexity = complexity;
2726
2727   return cost;
2728 }
2729
2730 /* Adds costs COST1 and COST2.  */
2731
2732 static comp_cost
2733 add_costs (comp_cost cost1, comp_cost cost2)
2734 {
2735   cost1.cost += cost2.cost;
2736   cost1.complexity += cost2.complexity;
2737
2738   return cost1;
2739 }
2740 /* Subtracts costs COST1 and COST2.  */
2741
2742 static comp_cost
2743 sub_costs (comp_cost cost1, comp_cost cost2)
2744 {
2745   cost1.cost -= cost2.cost;
2746   cost1.complexity -= cost2.complexity;
2747
2748   return cost1;
2749 }
2750
2751 /* Returns a negative number if COST1 < COST2, a positive number if
2752    COST1 > COST2, and 0 if COST1 = COST2.  */
2753
2754 static int
2755 compare_costs (comp_cost cost1, comp_cost cost2)
2756 {
2757   if (cost1.cost == cost2.cost)
2758     return cost1.complexity - cost2.complexity;
2759
2760   return cost1.cost - cost2.cost;
2761 }
2762
2763 /* Returns true if COST is infinite.  */
2764
2765 static bool
2766 infinite_cost_p (comp_cost cost)
2767 {
2768   return cost.cost == INFTY;
2769 }
2770
2771 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2772    on invariants DEPENDS_ON and that the value used in expressing it
2773    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2774
2775 static void
2776 set_use_iv_cost (struct ivopts_data *data,
2777                  struct iv_use *use, struct iv_cand *cand,
2778                  comp_cost cost, bitmap depends_on, tree value,
2779                  enum tree_code comp, int inv_expr_id)
2780 {
2781   unsigned i, s;
2782
2783   if (infinite_cost_p (cost))
2784     {
2785       BITMAP_FREE (depends_on);
2786       return;
2787     }
2788
2789   if (data->consider_all_candidates)
2790     {
2791       use->cost_map[cand->id].cand = cand;
2792       use->cost_map[cand->id].cost = cost;
2793       use->cost_map[cand->id].depends_on = depends_on;
2794       use->cost_map[cand->id].value = value;
2795       use->cost_map[cand->id].comp = comp;
2796       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2797       return;
2798     }
2799
2800   /* n_map_members is a power of two, so this computes modulo.  */
2801   s = cand->id & (use->n_map_members - 1);
2802   for (i = s; i < use->n_map_members; i++)
2803     if (!use->cost_map[i].cand)
2804       goto found;
2805   for (i = 0; i < s; i++)
2806     if (!use->cost_map[i].cand)
2807       goto found;
2808
2809   gcc_unreachable ();
2810
2811 found:
2812   use->cost_map[i].cand = cand;
2813   use->cost_map[i].cost = cost;
2814   use->cost_map[i].depends_on = depends_on;
2815   use->cost_map[i].value = value;
2816   use->cost_map[i].comp = comp;
2817   use->cost_map[i].inv_expr_id = inv_expr_id;
2818 }
2819
2820 /* Gets cost of (USE, CANDIDATE) pair.  */
2821
2822 static struct cost_pair *
2823 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2824                  struct iv_cand *cand)
2825 {
2826   unsigned i, s;
2827   struct cost_pair *ret;
2828
2829   if (!cand)
2830     return NULL;
2831
2832   if (data->consider_all_candidates)
2833     {
2834       ret = use->cost_map + cand->id;
2835       if (!ret->cand)
2836         return NULL;
2837
2838       return ret;
2839     }
2840
2841   /* n_map_members is a power of two, so this computes modulo.  */
2842   s = cand->id & (use->n_map_members - 1);
2843   for (i = s; i < use->n_map_members; i++)
2844     if (use->cost_map[i].cand == cand)
2845       return use->cost_map + i;
2846     else if (use->cost_map[i].cand == NULL)
2847       return NULL;
2848   for (i = 0; i < s; i++)
2849     if (use->cost_map[i].cand == cand)
2850       return use->cost_map + i;
2851     else if (use->cost_map[i].cand == NULL)
2852       return NULL;
2853
2854   return NULL;
2855 }
2856
2857 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2858 static rtx
2859 produce_memory_decl_rtl (tree obj, int *regno)
2860 {
2861   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2862   machine_mode address_mode = targetm.addr_space.address_mode (as);
2863   rtx x;
2864
2865   gcc_assert (obj);
2866   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2867     {
2868       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2869       x = gen_rtx_SYMBOL_REF (address_mode, name);
2870       SET_SYMBOL_REF_DECL (x, obj);
2871       x = gen_rtx_MEM (DECL_MODE (obj), x);
2872       set_mem_addr_space (x, as);
2873       targetm.encode_section_info (obj, x, true);
2874     }
2875   else
2876     {
2877       x = gen_raw_REG (address_mode, (*regno)++);
2878       x = gen_rtx_MEM (DECL_MODE (obj), x);
2879       set_mem_addr_space (x, as);
2880     }
2881
2882   return x;
2883 }
2884
2885 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2886    walk_tree.  DATA contains the actual fake register number.  */
2887
2888 static tree
2889 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2890 {
2891   tree obj = NULL_TREE;
2892   rtx x = NULL_RTX;
2893   int *regno = (int *) data;
2894
2895   switch (TREE_CODE (*expr_p))
2896     {
2897     case ADDR_EXPR:
2898       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2899            handled_component_p (*expr_p);
2900            expr_p = &TREE_OPERAND (*expr_p, 0))
2901         continue;
2902       obj = *expr_p;
2903       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2904         x = produce_memory_decl_rtl (obj, regno);
2905       break;
2906
2907     case SSA_NAME:
2908       *ws = 0;
2909       obj = SSA_NAME_VAR (*expr_p);
2910       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2911       if (!obj)
2912         return NULL_TREE;
2913       if (!DECL_RTL_SET_P (obj))
2914         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2915       break;
2916
2917     case VAR_DECL:
2918     case PARM_DECL:
2919     case RESULT_DECL:
2920       *ws = 0;
2921       obj = *expr_p;
2922
2923       if (DECL_RTL_SET_P (obj))
2924         break;
2925
2926       if (DECL_MODE (obj) == BLKmode)
2927         x = produce_memory_decl_rtl (obj, regno);
2928       else
2929         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2930
2931       break;
2932
2933     default:
2934       break;
2935     }
2936
2937   if (x)
2938     {
2939       decl_rtl_to_reset.safe_push (obj);
2940       SET_DECL_RTL (obj, x);
2941     }
2942
2943   return NULL_TREE;
2944 }
2945
2946 /* Determines cost of the computation of EXPR.  */
2947
2948 static unsigned
2949 computation_cost (tree expr, bool speed)
2950 {
2951   rtx_insn *seq;
2952   rtx rslt;
2953   tree type = TREE_TYPE (expr);
2954   unsigned cost;
2955   /* Avoid using hard regs in ways which may be unsupported.  */
2956   int regno = LAST_VIRTUAL_REGISTER + 1;
2957   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2958   enum node_frequency real_frequency = node->frequency;
2959
2960   node->frequency = NODE_FREQUENCY_NORMAL;
2961   crtl->maybe_hot_insn_p = speed;
2962   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2963   start_sequence ();
2964   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2965   seq = get_insns ();
2966   end_sequence ();
2967   default_rtl_profile ();
2968   node->frequency = real_frequency;
2969
2970   cost = seq_cost (seq, speed);
2971   if (MEM_P (rslt))
2972     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2973                           TYPE_ADDR_SPACE (type), speed);
2974   else if (!REG_P (rslt))
2975     cost += set_src_cost (rslt, speed);
2976
2977   return cost;
2978 }
2979
2980 /* Returns variable containing the value of candidate CAND at statement AT.  */
2981
2982 static tree
2983 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2984 {
2985   if (stmt_after_increment (loop, cand, stmt))
2986     return cand->var_after;
2987   else
2988     return cand->var_before;
2989 }
2990
2991 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2992    same precision that is at least as wide as the precision of TYPE, stores
2993    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2994    type of A and B.  */
2995
2996 static tree
2997 determine_common_wider_type (tree *a, tree *b)
2998 {
2999   tree wider_type = NULL;
3000   tree suba, subb;
3001   tree atype = TREE_TYPE (*a);
3002
3003   if (CONVERT_EXPR_P (*a))
3004     {
3005       suba = TREE_OPERAND (*a, 0);
3006       wider_type = TREE_TYPE (suba);
3007       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3008         return atype;
3009     }
3010   else
3011     return atype;
3012
3013   if (CONVERT_EXPR_P (*b))
3014     {
3015       subb = TREE_OPERAND (*b, 0);
3016       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3017         return atype;
3018     }
3019   else
3020     return atype;
3021
3022   *a = suba;
3023   *b = subb;
3024   return wider_type;
3025 }
3026
3027 /* Determines the expression by that USE is expressed from induction variable
3028    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3029    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3030
3031 static bool
3032 get_computation_aff (struct loop *loop,
3033                      struct iv_use *use, struct iv_cand *cand, gimple at,
3034                      struct aff_tree *aff)
3035 {
3036   tree ubase = use->iv->base;
3037   tree ustep = use->iv->step;
3038   tree cbase = cand->iv->base;
3039   tree cstep = cand->iv->step, cstep_common;
3040   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3041   tree common_type, var;
3042   tree uutype;
3043   aff_tree cbase_aff, var_aff;
3044   widest_int rat;
3045
3046   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3047     {
3048       /* We do not have a precision to express the values of use.  */
3049       return false;
3050     }
3051
3052   var = var_at_stmt (loop, cand, at);
3053   uutype = unsigned_type_for (utype);
3054
3055   /* If the conversion is not noop, perform it.  */
3056   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3057     {
3058       cstep = fold_convert (uutype, cstep);
3059       cbase = fold_convert (uutype, cbase);
3060       var = fold_convert (uutype, var);
3061     }
3062
3063   if (!constant_multiple_of (ustep, cstep, &rat))
3064     return false;
3065
3066   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3067      type, we achieve better folding by computing their difference in this
3068      wider type, and cast the result to UUTYPE.  We do not need to worry about
3069      overflows, as all the arithmetics will in the end be performed in UUTYPE
3070      anyway.  */
3071   common_type = determine_common_wider_type (&ubase, &cbase);
3072
3073   /* use = ubase - ratio * cbase + ratio * var.  */
3074   tree_to_aff_combination (ubase, common_type, aff);
3075   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3076   tree_to_aff_combination (var, uutype, &var_aff);
3077
3078   /* We need to shift the value if we are after the increment.  */
3079   if (stmt_after_increment (loop, cand, at))
3080     {
3081       aff_tree cstep_aff;
3082
3083       if (common_type != uutype)
3084         cstep_common = fold_convert (common_type, cstep);
3085       else
3086         cstep_common = cstep;
3087
3088       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3089       aff_combination_add (&cbase_aff, &cstep_aff);
3090     }
3091
3092   aff_combination_scale (&cbase_aff, -rat);
3093   aff_combination_add (aff, &cbase_aff);
3094   if (common_type != uutype)
3095     aff_combination_convert (aff, uutype);
3096
3097   aff_combination_scale (&var_aff, rat);
3098   aff_combination_add (aff, &var_aff);
3099
3100   return true;
3101 }
3102
3103 /* Return the type of USE.  */
3104
3105 static tree
3106 get_use_type (struct iv_use *use)
3107 {
3108   tree base_type = TREE_TYPE (use->iv->base);
3109   tree type;
3110
3111   if (use->type == USE_ADDRESS)
3112     {
3113       /* The base_type may be a void pointer.  Create a pointer type based on
3114          the mem_ref instead.  */
3115       type = build_pointer_type (TREE_TYPE (*use->op_p));
3116       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3117                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3118     }
3119   else
3120     type = base_type;
3121
3122   return type;
3123 }
3124
3125 /* Determines the expression by that USE is expressed from induction variable
3126    CAND at statement AT in LOOP.  The computation is unshared.  */
3127
3128 static tree
3129 get_computation_at (struct loop *loop,
3130                     struct iv_use *use, struct iv_cand *cand, gimple at)
3131 {
3132   aff_tree aff;
3133   tree type = get_use_type (use);
3134
3135   if (!get_computation_aff (loop, use, cand, at, &aff))
3136     return NULL_TREE;
3137   unshare_aff_combination (&aff);
3138   return fold_convert (type, aff_combination_to_tree (&aff));
3139 }
3140
3141 /* Determines the expression by that USE is expressed from induction variable
3142    CAND in LOOP.  The computation is unshared.  */
3143
3144 static tree
3145 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3146 {
3147   return get_computation_at (loop, use, cand, use->stmt);
3148 }
3149
3150 /* Adjust the cost COST for being in loop setup rather than loop body.
3151    If we're optimizing for space, the loop setup overhead is constant;
3152    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3153 static unsigned
3154 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3155 {
3156   if (cost == INFTY)
3157     return cost;
3158   else if (optimize_loop_for_speed_p (data->current_loop))
3159     return cost / avg_loop_niter (data->current_loop);
3160   else
3161     return cost;
3162 }
3163
3164 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3165    validity for a memory reference accessing memory of mode MODE in
3166    address space AS.  */
3167
3168
3169 bool
3170 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3171                                  addr_space_t as)
3172 {
3173 #define MAX_RATIO 128
3174   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3175   static vec<sbitmap> valid_mult_list;
3176   sbitmap valid_mult;
3177
3178   if (data_index >= valid_mult_list.length ())
3179     valid_mult_list.safe_grow_cleared (data_index + 1);
3180
3181   valid_mult = valid_mult_list[data_index];
3182   if (!valid_mult)
3183     {
3184       machine_mode address_mode = targetm.addr_space.address_mode (as);
3185       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3186       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3187       rtx addr, scaled;
3188       HOST_WIDE_INT i;
3189
3190       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3191       bitmap_clear (valid_mult);
3192       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3193       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3194       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3195         {
3196           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3197           if (memory_address_addr_space_p (mode, addr, as)
3198               || memory_address_addr_space_p (mode, scaled, as))
3199             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3200         }
3201
3202       if (dump_file && (dump_flags & TDF_DETAILS))
3203         {
3204           fprintf (dump_file, "  allowed multipliers:");
3205           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3206             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3207               fprintf (dump_file, " %d", (int) i);
3208           fprintf (dump_file, "\n");
3209           fprintf (dump_file, "\n");
3210         }
3211
3212       valid_mult_list[data_index] = valid_mult;
3213     }
3214
3215   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3216     return false;
3217
3218   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3219 }
3220
3221 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3222    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3223    variable is omitted.  Compute the cost for a memory reference that accesses
3224    a memory location of mode MEM_MODE in address space AS.
3225
3226    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3227    size of MEM_MODE / RATIO) is available.  To make this determination, we
3228    look at the size of the increment to be made, which is given in CSTEP.
3229    CSTEP may be zero if the step is unknown.
3230    STMT_AFTER_INC is true iff the statement we're looking at is after the
3231    increment of the original biv.
3232
3233    TODO -- there must be some better way.  This all is quite crude.  */
3234
3235 enum ainc_type
3236 {
3237   AINC_PRE_INC,         /* Pre increment.  */
3238   AINC_PRE_DEC,         /* Pre decrement.  */
3239   AINC_POST_INC,        /* Post increment.  */
3240   AINC_POST_DEC,        /* Post decrement.  */
3241   AINC_NONE             /* Also the number of auto increment types.  */
3242 };
3243
3244 typedef struct address_cost_data_s
3245 {
3246   HOST_WIDE_INT min_offset, max_offset;
3247   unsigned costs[2][2][2][2];
3248   unsigned ainc_costs[AINC_NONE];
3249 } *address_cost_data;
3250
3251
3252 static comp_cost
3253 get_address_cost (bool symbol_present, bool var_present,
3254                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3255                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3256                   addr_space_t as, bool speed,
3257                   bool stmt_after_inc, bool *may_autoinc)
3258 {
3259   machine_mode address_mode = targetm.addr_space.address_mode (as);
3260   static vec<address_cost_data> address_cost_data_list;
3261   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3262   address_cost_data data;
3263   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3264   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3265   unsigned cost, acost, complexity;
3266   enum ainc_type autoinc_type;
3267   bool offset_p, ratio_p, autoinc;
3268   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3269   unsigned HOST_WIDE_INT mask;
3270   unsigned bits;
3271
3272   if (data_index >= address_cost_data_list.length ())
3273     address_cost_data_list.safe_grow_cleared (data_index + 1);
3274
3275   data = address_cost_data_list[data_index];
3276   if (!data)
3277     {
3278       HOST_WIDE_INT i;
3279       HOST_WIDE_INT rat, off = 0;
3280       int old_cse_not_expected, width;
3281       unsigned sym_p, var_p, off_p, rat_p, add_c;
3282       rtx_insn *seq;
3283       rtx addr, base;
3284       rtx reg0, reg1;
3285
3286       data = (address_cost_data) xcalloc (1, sizeof (*data));
3287
3288       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3289
3290       width = GET_MODE_BITSIZE (address_mode) - 1;
3291       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3292         width = HOST_BITS_PER_WIDE_INT - 1;
3293       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3294
3295       for (i = width; i >= 0; i--)
3296         {
3297           off = -((unsigned HOST_WIDE_INT) 1 << i);
3298           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3299           if (memory_address_addr_space_p (mem_mode, addr, as))
3300             break;
3301         }
3302       data->min_offset = (i == -1? 0 : off);
3303
3304       for (i = width; i >= 0; i--)
3305         {
3306           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3307           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3308           if (memory_address_addr_space_p (mem_mode, addr, as))
3309             break;
3310           /* For some TARGET, like ARM THUMB1, the offset should be nature
3311              aligned.  Try an aligned offset if address_mode is not QImode.  */
3312           off = (address_mode == QImode)
3313                 ? 0
3314                 : ((unsigned HOST_WIDE_INT) 1 << i)
3315                     - GET_MODE_SIZE (address_mode);
3316           if (off > 0)
3317             {
3318               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3319               if (memory_address_addr_space_p (mem_mode, addr, as))
3320                 break;
3321             }
3322         }
3323       if (i == -1)
3324         off = 0;
3325       data->max_offset = off;
3326
3327       if (dump_file && (dump_flags & TDF_DETAILS))
3328         {
3329           fprintf (dump_file, "get_address_cost:\n");
3330           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3331                    GET_MODE_NAME (mem_mode),
3332                    data->min_offset);
3333           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3334                    GET_MODE_NAME (mem_mode),
3335                    data->max_offset);
3336         }
3337
3338       rat = 1;
3339       for (i = 2; i <= MAX_RATIO; i++)
3340         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3341           {
3342             rat = i;
3343             break;
3344           }
3345
3346       /* Compute the cost of various addressing modes.  */
3347       acost = 0;
3348       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3349       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3350
3351       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3352           || USE_STORE_PRE_DECREMENT (mem_mode))
3353         {
3354           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3355           has_predec[mem_mode]
3356             = memory_address_addr_space_p (mem_mode, addr, as);
3357
3358           if (has_predec[mem_mode])
3359             data->ainc_costs[AINC_PRE_DEC]
3360               = address_cost (addr, mem_mode, as, speed);
3361         }
3362       if (USE_LOAD_POST_DECREMENT (mem_mode)
3363           || USE_STORE_POST_DECREMENT (mem_mode))
3364         {
3365           addr = gen_rtx_POST_DEC (address_mode, reg0);
3366           has_postdec[mem_mode]
3367             = memory_address_addr_space_p (mem_mode, addr, as);
3368
3369           if (has_postdec[mem_mode])
3370             data->ainc_costs[AINC_POST_DEC]
3371               = address_cost (addr, mem_mode, as, speed);
3372         }
3373       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3374           || USE_STORE_PRE_DECREMENT (mem_mode))
3375         {
3376           addr = gen_rtx_PRE_INC (address_mode, reg0);
3377           has_preinc[mem_mode]
3378             = memory_address_addr_space_p (mem_mode, addr, as);
3379
3380           if (has_preinc[mem_mode])
3381             data->ainc_costs[AINC_PRE_INC]
3382               = address_cost (addr, mem_mode, as, speed);
3383         }
3384       if (USE_LOAD_POST_INCREMENT (mem_mode)
3385           || USE_STORE_POST_INCREMENT (mem_mode))
3386         {
3387           addr = gen_rtx_POST_INC (address_mode, reg0);
3388           has_postinc[mem_mode]
3389             = memory_address_addr_space_p (mem_mode, addr, as);
3390
3391           if (has_postinc[mem_mode])
3392             data->ainc_costs[AINC_POST_INC]
3393               = address_cost (addr, mem_mode, as, speed);
3394         }
3395       for (i = 0; i < 16; i++)
3396         {
3397           sym_p = i & 1;
3398           var_p = (i >> 1) & 1;
3399           off_p = (i >> 2) & 1;
3400           rat_p = (i >> 3) & 1;
3401
3402           addr = reg0;
3403           if (rat_p)
3404             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3405                                    gen_int_mode (rat, address_mode));
3406
3407           if (var_p)
3408             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3409
3410           if (sym_p)
3411             {
3412               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3413               /* ??? We can run into trouble with some backends by presenting
3414                  it with symbols which haven't been properly passed through
3415                  targetm.encode_section_info.  By setting the local bit, we
3416                  enhance the probability of things working.  */
3417               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3418
3419               if (off_p)
3420                 base = gen_rtx_fmt_e (CONST, address_mode,
3421                                       gen_rtx_fmt_ee
3422                                         (PLUS, address_mode, base,
3423                                          gen_int_mode (off, address_mode)));
3424             }
3425           else if (off_p)
3426             base = gen_int_mode (off, address_mode);
3427           else
3428             base = NULL_RTX;
3429
3430           if (base)
3431             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3432
3433           start_sequence ();
3434           /* To avoid splitting addressing modes, pretend that no cse will
3435              follow.  */
3436           old_cse_not_expected = cse_not_expected;
3437           cse_not_expected = true;
3438           addr = memory_address_addr_space (mem_mode, addr, as);
3439           cse_not_expected = old_cse_not_expected;
3440           seq = get_insns ();
3441           end_sequence ();
3442
3443           acost = seq_cost (seq, speed);
3444           acost += address_cost (addr, mem_mode, as, speed);
3445
3446           if (!acost)
3447             acost = 1;
3448           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3449         }
3450
3451       /* On some targets, it is quite expensive to load symbol to a register,
3452          which makes addresses that contain symbols look much more expensive.
3453          However, the symbol will have to be loaded in any case before the
3454          loop (and quite likely we have it in register already), so it does not
3455          make much sense to penalize them too heavily.  So make some final
3456          tweaks for the SYMBOL_PRESENT modes:
3457
3458          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3459          var is cheaper, use this mode with small penalty.
3460          If VAR_PRESENT is true, try whether the mode with
3461          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3462          if this is the case, use it.  */
3463       add_c = add_cost (speed, address_mode);
3464       for (i = 0; i < 8; i++)
3465         {
3466           var_p = i & 1;
3467           off_p = (i >> 1) & 1;
3468           rat_p = (i >> 2) & 1;
3469
3470           acost = data->costs[0][1][off_p][rat_p] + 1;
3471           if (var_p)
3472             acost += add_c;
3473
3474           if (acost < data->costs[1][var_p][off_p][rat_p])
3475             data->costs[1][var_p][off_p][rat_p] = acost;
3476         }
3477
3478       if (dump_file && (dump_flags & TDF_DETAILS))
3479         {
3480           fprintf (dump_file, "Address costs:\n");
3481
3482           for (i = 0; i < 16; i++)
3483             {
3484               sym_p = i & 1;
3485               var_p = (i >> 1) & 1;
3486               off_p = (i >> 2) & 1;
3487               rat_p = (i >> 3) & 1;
3488
3489               fprintf (dump_file, "  ");
3490               if (sym_p)
3491                 fprintf (dump_file, "sym + ");
3492               if (var_p)
3493                 fprintf (dump_file, "var + ");
3494               if (off_p)
3495                 fprintf (dump_file, "cst + ");
3496               if (rat_p)
3497                 fprintf (dump_file, "rat * ");
3498
3499               acost = data->costs[sym_p][var_p][off_p][rat_p];
3500               fprintf (dump_file, "index costs %d\n", acost);
3501             }
3502           if (has_predec[mem_mode] || has_postdec[mem_mode]
3503               || has_preinc[mem_mode] || has_postinc[mem_mode])
3504             fprintf (dump_file, "  May include autoinc/dec\n");
3505           fprintf (dump_file, "\n");
3506         }
3507
3508       address_cost_data_list[data_index] = data;
3509     }
3510
3511   bits = GET_MODE_BITSIZE (address_mode);
3512   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3513   offset &= mask;
3514   if ((offset >> (bits - 1) & 1))
3515     offset |= ~mask;
3516   s_offset = offset;
3517
3518   autoinc = false;
3519   autoinc_type = AINC_NONE;
3520   msize = GET_MODE_SIZE (mem_mode);
3521   autoinc_offset = offset;
3522   if (stmt_after_inc)
3523     autoinc_offset += ratio * cstep;
3524   if (symbol_present || var_present || ratio != 1)
3525     autoinc = false;
3526   else
3527     {
3528       if (has_postinc[mem_mode] && autoinc_offset == 0
3529           && msize == cstep)
3530         autoinc_type = AINC_POST_INC;
3531       else if (has_postdec[mem_mode] && autoinc_offset == 0
3532                && msize == -cstep)
3533         autoinc_type = AINC_POST_DEC;
3534       else if (has_preinc[mem_mode] && autoinc_offset == msize
3535                && msize == cstep)
3536         autoinc_type = AINC_PRE_INC;
3537       else if (has_predec[mem_mode] && autoinc_offset == -msize
3538                && msize == -cstep)
3539         autoinc_type = AINC_PRE_DEC;
3540
3541       if (autoinc_type != AINC_NONE)
3542         autoinc = true;
3543     }
3544
3545   cost = 0;
3546   offset_p = (s_offset != 0
3547               && data->min_offset <= s_offset
3548               && s_offset <= data->max_offset);
3549   ratio_p = (ratio != 1
3550              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3551
3552   if (ratio != 1 && !ratio_p)
3553     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3554
3555   if (s_offset && !offset_p && !symbol_present)
3556     cost += add_cost (speed, address_mode);
3557
3558   if (may_autoinc)
3559     *may_autoinc = autoinc;
3560   if (autoinc)
3561     acost = data->ainc_costs[autoinc_type];
3562   else
3563     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3564   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3565   return new_cost (cost + acost, complexity);
3566 }
3567
3568  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3569     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3570     calculating the operands of EXPR.  Returns true if successful, and returns
3571     the cost in COST.  */
3572
3573 static bool
3574 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3575                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3576 {
3577   comp_cost res;
3578   tree op1 = TREE_OPERAND (expr, 1);
3579   tree cst = TREE_OPERAND (mult, 1);
3580   tree multop = TREE_OPERAND (mult, 0);
3581   int m = exact_log2 (int_cst_value (cst));
3582   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3583   int sa_cost;
3584   bool equal_p = false;
3585
3586   if (!(m >= 0 && m < maxm))
3587     return false;
3588
3589   if (operand_equal_p (op1, mult, 0))
3590     equal_p = true;
3591
3592   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3593              ? shiftadd_cost (speed, mode, m)
3594              : (equal_p
3595                 ? shiftsub1_cost (speed, mode, m)
3596                 : shiftsub0_cost (speed, mode, m)));
3597   res = new_cost (sa_cost, 0);
3598   res = add_costs (res, equal_p ? cost0 : cost1);
3599
3600   STRIP_NOPS (multop);
3601   if (!is_gimple_val (multop))
3602     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3603
3604   *cost = res;
3605   return true;
3606 }
3607
3608 /* Estimates cost of forcing expression EXPR into a variable.  */
3609
3610 static comp_cost
3611 force_expr_to_var_cost (tree expr, bool speed)
3612 {
3613   static bool costs_initialized = false;
3614   static unsigned integer_cost [2];
3615   static unsigned symbol_cost [2];
3616   static unsigned address_cost [2];
3617   tree op0, op1;
3618   comp_cost cost0, cost1, cost;
3619   machine_mode mode;
3620
3621   if (!costs_initialized)
3622     {
3623       tree type = build_pointer_type (integer_type_node);
3624       tree var, addr;
3625       rtx x;
3626       int i;
3627
3628       var = create_tmp_var_raw (integer_type_node, "test_var");
3629       TREE_STATIC (var) = 1;
3630       x = produce_memory_decl_rtl (var, NULL);
3631       SET_DECL_RTL (var, x);
3632
3633       addr = build1 (ADDR_EXPR, type, var);
3634
3635
3636       for (i = 0; i < 2; i++)
3637         {
3638           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3639                                                              2000), i);
3640
3641           symbol_cost[i] = computation_cost (addr, i) + 1;
3642
3643           address_cost[i]
3644             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3645           if (dump_file && (dump_flags & TDF_DETAILS))
3646             {
3647               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3648               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3649               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3650               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3651               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3652               fprintf (dump_file, "\n");
3653             }
3654         }
3655
3656       costs_initialized = true;
3657     }
3658
3659   STRIP_NOPS (expr);
3660
3661   if (SSA_VAR_P (expr))
3662     return no_cost;
3663
3664   if (is_gimple_min_invariant (expr))
3665     {
3666       if (TREE_CODE (expr) == INTEGER_CST)
3667         return new_cost (integer_cost [speed], 0);
3668
3669       if (TREE_CODE (expr) == ADDR_EXPR)
3670         {
3671           tree obj = TREE_OPERAND (expr, 0);
3672
3673           if (TREE_CODE (obj) == VAR_DECL
3674               || TREE_CODE (obj) == PARM_DECL
3675               || TREE_CODE (obj) == RESULT_DECL)
3676             return new_cost (symbol_cost [speed], 0);
3677         }
3678
3679       return new_cost (address_cost [speed], 0);
3680     }
3681
3682   switch (TREE_CODE (expr))
3683     {
3684     case POINTER_PLUS_EXPR:
3685     case PLUS_EXPR:
3686     case MINUS_EXPR:
3687     case MULT_EXPR:
3688       op0 = TREE_OPERAND (expr, 0);
3689       op1 = TREE_OPERAND (expr, 1);
3690       STRIP_NOPS (op0);
3691       STRIP_NOPS (op1);
3692       break;
3693
3694     CASE_CONVERT:
3695     case NEGATE_EXPR:
3696       op0 = TREE_OPERAND (expr, 0);
3697       STRIP_NOPS (op0);
3698       op1 = NULL_TREE;
3699       break;
3700
3701     default:
3702       /* Just an arbitrary value, FIXME.  */
3703       return new_cost (target_spill_cost[speed], 0);
3704     }
3705
3706   if (op0 == NULL_TREE
3707       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3708     cost0 = no_cost;
3709   else
3710     cost0 = force_expr_to_var_cost (op0, speed);
3711
3712   if (op1 == NULL_TREE
3713       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3714     cost1 = no_cost;
3715   else
3716     cost1 = force_expr_to_var_cost (op1, speed);
3717
3718   mode = TYPE_MODE (TREE_TYPE (expr));
3719   switch (TREE_CODE (expr))
3720     {
3721     case POINTER_PLUS_EXPR:
3722     case PLUS_EXPR:
3723     case MINUS_EXPR:
3724     case NEGATE_EXPR:
3725       cost = new_cost (add_cost (speed, mode), 0);
3726       if (TREE_CODE (expr) != NEGATE_EXPR)
3727         {
3728           tree mult = NULL_TREE;
3729           comp_cost sa_cost;
3730           if (TREE_CODE (op1) == MULT_EXPR)
3731             mult = op1;
3732           else if (TREE_CODE (op0) == MULT_EXPR)
3733             mult = op0;
3734
3735           if (mult != NULL_TREE
3736               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3737               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3738                                     speed, &sa_cost))
3739             return sa_cost;
3740         }
3741       break;
3742
3743     CASE_CONVERT:
3744       {
3745         tree inner_mode, outer_mode;
3746         outer_mode = TREE_TYPE (expr);
3747         inner_mode = TREE_TYPE (op0);
3748         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3749                                        TYPE_MODE (inner_mode), speed), 0);
3750       }
3751       break;
3752
3753     case MULT_EXPR:
3754       if (cst_and_fits_in_hwi (op0))
3755         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3756                                              mode, speed), 0);
3757       else if (cst_and_fits_in_hwi (op1))
3758         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3759                                              mode, speed), 0);
3760       else
3761         return new_cost (target_spill_cost [speed], 0);
3762       break;
3763
3764     default:
3765       gcc_unreachable ();
3766     }
3767
3768   cost = add_costs (cost, cost0);
3769   cost = add_costs (cost, cost1);
3770
3771   /* Bound the cost by target_spill_cost.  The parts of complicated
3772      computations often are either loop invariant or at least can
3773      be shared between several iv uses, so letting this grow without
3774      limits would not give reasonable results.  */
3775   if (cost.cost > (int) target_spill_cost [speed])
3776     cost.cost = target_spill_cost [speed];
3777
3778   return cost;
3779 }
3780
3781 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3782    invariants the computation depends on.  */
3783
3784 static comp_cost
3785 force_var_cost (struct ivopts_data *data,
3786                 tree expr, bitmap *depends_on)
3787 {
3788   if (depends_on)
3789     {
3790       fd_ivopts_data = data;
3791       walk_tree (&expr, find_depends, depends_on, NULL);
3792     }
3793
3794   return force_expr_to_var_cost (expr, data->speed);
3795 }
3796
3797 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3798    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3799    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3800    invariants the computation depends on.  */
3801
3802 static comp_cost
3803 split_address_cost (struct ivopts_data *data,
3804                     tree addr, bool *symbol_present, bool *var_present,
3805                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3806 {
3807   tree core;
3808   HOST_WIDE_INT bitsize;
3809   HOST_WIDE_INT bitpos;
3810   tree toffset;
3811   machine_mode mode;
3812   int unsignedp, volatilep;
3813
3814   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3815                               &unsignedp, &volatilep, false);
3816
3817   if (toffset != 0
3818       || bitpos % BITS_PER_UNIT != 0
3819       || TREE_CODE (core) != VAR_DECL)
3820     {
3821       *symbol_present = false;
3822       *var_present = true;
3823       fd_ivopts_data = data;
3824       walk_tree (&addr, find_depends, depends_on, NULL);
3825       return new_cost (target_spill_cost[data->speed], 0);
3826     }
3827
3828   *offset += bitpos / BITS_PER_UNIT;
3829   if (TREE_STATIC (core)
3830       || DECL_EXTERNAL (core))
3831     {
3832       *symbol_present = true;
3833       *var_present = false;
3834       return no_cost;
3835     }
3836
3837   *symbol_present = false;
3838   *var_present = true;
3839   return no_cost;
3840 }
3841
3842 /* Estimates cost of expressing difference of addresses E1 - E2 as
3843    var + symbol + offset.  The value of offset is added to OFFSET,
3844    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3845    part is missing.  DEPENDS_ON is a set of the invariants the computation
3846    depends on.  */
3847
3848 static comp_cost
3849 ptr_difference_cost (struct ivopts_data *data,
3850                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3851                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3852 {
3853   HOST_WIDE_INT diff = 0;
3854   aff_tree aff_e1, aff_e2;
3855   tree type;
3856
3857   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3858
3859   if (ptr_difference_const (e1, e2, &diff))
3860     {
3861       *offset += diff;
3862       *symbol_present = false;
3863       *var_present = false;
3864       return no_cost;
3865     }
3866
3867   if (integer_zerop (e2))
3868     return split_address_cost (data, TREE_OPERAND (e1, 0),
3869                                symbol_present, var_present, offset, depends_on);
3870
3871   *symbol_present = false;
3872   *var_present = true;
3873
3874   type = signed_type_for (TREE_TYPE (e1));
3875   tree_to_aff_combination (e1, type, &aff_e1);
3876   tree_to_aff_combination (e2, type, &aff_e2);
3877   aff_combination_scale (&aff_e2, -1);
3878   aff_combination_add (&aff_e1, &aff_e2);
3879
3880   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3881 }
3882
3883 /* Estimates cost of expressing difference E1 - E2 as
3884    var + symbol + offset.  The value of offset is added to OFFSET,
3885    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3886    part is missing.  DEPENDS_ON is a set of the invariants the computation
3887    depends on.  */
3888
3889 static comp_cost
3890 difference_cost (struct ivopts_data *data,
3891                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3892                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3893 {
3894   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3895   unsigned HOST_WIDE_INT off1, off2;
3896   aff_tree aff_e1, aff_e2;
3897   tree type;
3898
3899   e1 = strip_offset (e1, &off1);
3900   e2 = strip_offset (e2, &off2);
3901   *offset += off1 - off2;
3902
3903   STRIP_NOPS (e1);
3904   STRIP_NOPS (e2);
3905
3906   if (TREE_CODE (e1) == ADDR_EXPR)
3907     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3908                                 offset, depends_on);
3909   *symbol_present = false;
3910
3911   if (operand_equal_p (e1, e2, 0))
3912     {
3913       *var_present = false;
3914       return no_cost;
3915     }
3916
3917   *var_present = true;
3918
3919   if (integer_zerop (e2))
3920     return force_var_cost (data, e1, depends_on);
3921
3922   if (integer_zerop (e1))
3923     {
3924       comp_cost cost = force_var_cost (data, e2, depends_on);
3925       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3926       return cost;
3927     }
3928
3929   type = signed_type_for (TREE_TYPE (e1));
3930   tree_to_aff_combination (e1, type, &aff_e1);
3931   tree_to_aff_combination (e2, type, &aff_e2);
3932   aff_combination_scale (&aff_e2, -1);
3933   aff_combination_add (&aff_e1, &aff_e2);
3934
3935   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3936 }
3937
3938 /* Returns true if AFF1 and AFF2 are identical.  */
3939
3940 static bool
3941 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3942 {
3943   unsigned i;
3944
3945   if (aff1->n != aff2->n)
3946     return false;
3947
3948   for (i = 0; i < aff1->n; i++)
3949     {
3950       if (aff1->elts[i].coef != aff2->elts[i].coef)
3951         return false;
3952
3953       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3954         return false;
3955     }
3956   return true;
3957 }
3958
3959 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3960
3961 static int
3962 get_expr_id (struct ivopts_data *data, tree expr)
3963 {
3964   struct iv_inv_expr_ent ent;
3965   struct iv_inv_expr_ent **slot;
3966
3967   ent.expr = expr;
3968   ent.hash = iterative_hash_expr (expr, 0);
3969   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3970   if (*slot)
3971     return (*slot)->id;
3972
3973   *slot = XNEW (struct iv_inv_expr_ent);
3974   (*slot)->expr = expr;
3975   (*slot)->hash = ent.hash;
3976   (*slot)->id = data->inv_expr_id++;
3977   return (*slot)->id;
3978 }
3979
3980 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3981    requires a new compiler generated temporary.  Returns -1 otherwise.
3982    ADDRESS_P is a flag indicating if the expression is for address
3983    computation.  */
3984
3985 static int
3986 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3987                             tree cbase, HOST_WIDE_INT ratio,
3988                             bool address_p)
3989 {
3990   aff_tree ubase_aff, cbase_aff;
3991   tree expr, ub, cb;
3992
3993   STRIP_NOPS (ubase);
3994   STRIP_NOPS (cbase);
3995   ub = ubase;
3996   cb = cbase;
3997
3998   if ((TREE_CODE (ubase) == INTEGER_CST)
3999       && (TREE_CODE (cbase) == INTEGER_CST))
4000     return -1;
4001
4002   /* Strips the constant part. */
4003   if (TREE_CODE (ubase) == PLUS_EXPR
4004       || TREE_CODE (ubase) == MINUS_EXPR
4005       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4006     {
4007       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4008         ubase = TREE_OPERAND (ubase, 0);
4009     }
4010
4011   /* Strips the constant part. */
4012   if (TREE_CODE (cbase) == PLUS_EXPR
4013       || TREE_CODE (cbase) == MINUS_EXPR
4014       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4015     {
4016       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4017         cbase = TREE_OPERAND (cbase, 0);
4018     }
4019
4020   if (address_p)
4021     {
4022       if (((TREE_CODE (ubase) == SSA_NAME)
4023            || (TREE_CODE (ubase) == ADDR_EXPR
4024                && is_gimple_min_invariant (ubase)))
4025           && (TREE_CODE (cbase) == INTEGER_CST))
4026         return -1;
4027
4028       if (((TREE_CODE (cbase) == SSA_NAME)
4029            || (TREE_CODE (cbase) == ADDR_EXPR
4030                && is_gimple_min_invariant (cbase)))
4031           && (TREE_CODE (ubase) == INTEGER_CST))
4032         return -1;
4033     }
4034
4035   if (ratio == 1)
4036     {
4037       if (operand_equal_p (ubase, cbase, 0))
4038         return -1;
4039
4040       if (TREE_CODE (ubase) == ADDR_EXPR
4041           && TREE_CODE (cbase) == ADDR_EXPR)
4042         {
4043           tree usym, csym;
4044
4045           usym = TREE_OPERAND (ubase, 0);
4046           csym = TREE_OPERAND (cbase, 0);
4047           if (TREE_CODE (usym) == ARRAY_REF)
4048             {
4049               tree ind = TREE_OPERAND (usym, 1);
4050               if (TREE_CODE (ind) == INTEGER_CST
4051                   && tree_fits_shwi_p (ind)
4052                   && tree_to_shwi (ind) == 0)
4053                 usym = TREE_OPERAND (usym, 0);
4054             }
4055           if (TREE_CODE (csym) == ARRAY_REF)
4056             {
4057               tree ind = TREE_OPERAND (csym, 1);
4058               if (TREE_CODE (ind) == INTEGER_CST
4059                   && tree_fits_shwi_p (ind)
4060                   && tree_to_shwi (ind) == 0)
4061                 csym = TREE_OPERAND (csym, 0);
4062             }
4063           if (operand_equal_p (usym, csym, 0))
4064             return -1;
4065         }
4066       /* Now do more complex comparison  */
4067       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4068       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4069       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4070         return -1;
4071     }
4072
4073   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4074   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4075
4076   aff_combination_scale (&cbase_aff, -1 * ratio);
4077   aff_combination_add (&ubase_aff, &cbase_aff);
4078   expr = aff_combination_to_tree (&ubase_aff);
4079   return get_expr_id (data, expr);
4080 }
4081
4082
4083
4084 /* Determines the cost of the computation by that USE is expressed
4085    from induction variable CAND.  If ADDRESS_P is true, we just need
4086    to create an address from it, otherwise we want to get it into
4087    register.  A set of invariants we depend on is stored in
4088    DEPENDS_ON.  AT is the statement at that the value is computed.
4089    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4090    addressing is likely.  */
4091
4092 static comp_cost
4093 get_computation_cost_at (struct ivopts_data *data,
4094                          struct iv_use *use, struct iv_cand *cand,
4095                          bool address_p, bitmap *depends_on, gimple at,
4096                          bool *can_autoinc,
4097                          int *inv_expr_id)
4098 {
4099   tree ubase = use->iv->base, ustep = use->iv->step;
4100   tree cbase, cstep;
4101   tree utype = TREE_TYPE (ubase), ctype;
4102   unsigned HOST_WIDE_INT cstepi, offset = 0;
4103   HOST_WIDE_INT ratio, aratio;
4104   bool var_present, symbol_present, stmt_is_after_inc;
4105   comp_cost cost;
4106   widest_int rat;
4107   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4108   machine_mode mem_mode = (address_p
4109                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4110                                 : VOIDmode);
4111
4112   *depends_on = NULL;
4113
4114   /* Only consider real candidates.  */
4115   if (!cand->iv)
4116     return infinite_cost;
4117
4118   cbase = cand->iv->base;
4119   cstep = cand->iv->step;
4120   ctype = TREE_TYPE (cbase);
4121
4122   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4123     {
4124       /* We do not have a precision to express the values of use.  */
4125       return infinite_cost;
4126     }
4127
4128   if (address_p
4129       || (use->iv->base_object
4130           && cand->iv->base_object
4131           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4132           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4133     {
4134       /* Do not try to express address of an object with computation based
4135          on address of a different object.  This may cause problems in rtl
4136          level alias analysis (that does not expect this to be happening,
4137          as this is illegal in C), and would be unlikely to be useful
4138          anyway.  */
4139       if (use->iv->base_object
4140           && cand->iv->base_object
4141           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4142         return infinite_cost;
4143     }
4144
4145   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4146     {
4147       /* TODO -- add direct handling of this case.  */
4148       goto fallback;
4149     }
4150
4151   /* CSTEPI is removed from the offset in case statement is after the
4152      increment.  If the step is not constant, we use zero instead.
4153      This is a bit imprecise (there is the extra addition), but
4154      redundancy elimination is likely to transform the code so that
4155      it uses value of the variable before increment anyway,
4156      so it is not that much unrealistic.  */
4157   if (cst_and_fits_in_hwi (cstep))
4158     cstepi = int_cst_value (cstep);
4159   else
4160     cstepi = 0;
4161
4162   if (!constant_multiple_of (ustep, cstep, &rat))
4163     return infinite_cost;
4164
4165   if (wi::fits_shwi_p (rat))
4166     ratio = rat.to_shwi ();
4167   else
4168     return infinite_cost;
4169
4170   STRIP_NOPS (cbase);
4171   ctype = TREE_TYPE (cbase);
4172
4173   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4174
4175   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4176      or ratio == 1, it is better to handle this like
4177
4178      ubase - ratio * cbase + ratio * var
4179
4180      (also holds in the case ratio == -1, TODO.  */
4181
4182   if (cst_and_fits_in_hwi (cbase))
4183     {
4184       offset = - ratio * int_cst_value (cbase);
4185       cost = difference_cost (data,
4186                               ubase, build_int_cst (utype, 0),
4187                               &symbol_present, &var_present, &offset,
4188                               depends_on);
4189       cost.cost /= avg_loop_niter (data->current_loop);
4190     }
4191   else if (ratio == 1)
4192     {
4193       tree real_cbase = cbase;
4194
4195       /* Check to see if any adjustment is needed.  */
4196       if (cstepi == 0 && stmt_is_after_inc)
4197         {
4198           aff_tree real_cbase_aff;
4199           aff_tree cstep_aff;
4200
4201           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4202                                    &real_cbase_aff);
4203           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4204
4205           aff_combination_add (&real_cbase_aff, &cstep_aff);
4206           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4207         }
4208
4209       cost = difference_cost (data,
4210                               ubase, real_cbase,
4211                               &symbol_present, &var_present, &offset,
4212                               depends_on);
4213       cost.cost /= avg_loop_niter (data->current_loop);
4214     }
4215   else if (address_p
4216            && !POINTER_TYPE_P (ctype)
4217            && multiplier_allowed_in_address_p
4218                 (ratio, mem_mode,
4219                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4220     {
4221       cbase
4222         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4223       cost = difference_cost (data,
4224                               ubase, cbase,
4225                               &symbol_present, &var_present, &offset,
4226                               depends_on);
4227       cost.cost /= avg_loop_niter (data->current_loop);
4228     }
4229   else
4230     {
4231       cost = force_var_cost (data, cbase, depends_on);
4232       cost = add_costs (cost,
4233                         difference_cost (data,
4234                                          ubase, build_int_cst (utype, 0),
4235                                          &symbol_present, &var_present,
4236                                          &offset, depends_on));
4237       cost.cost /= avg_loop_niter (data->current_loop);
4238       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4239     }
4240
4241   if (inv_expr_id)
4242     {
4243       *inv_expr_id =
4244           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4245       /* Clear depends on.  */
4246       if (*inv_expr_id != -1 && depends_on && *depends_on)
4247         bitmap_clear (*depends_on);
4248     }
4249
4250   /* If we are after the increment, the value of the candidate is higher by
4251      one iteration.  */
4252   if (stmt_is_after_inc)
4253     offset -= ratio * cstepi;
4254
4255   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4256      (symbol/var1/const parts may be omitted).  If we are looking for an
4257      address, find the cost of addressing this.  */
4258   if (address_p)
4259     return add_costs (cost,
4260                       get_address_cost (symbol_present, var_present,
4261                                         offset, ratio, cstepi,
4262                                         mem_mode,
4263                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4264                                         speed, stmt_is_after_inc,
4265                                         can_autoinc));
4266
4267   /* Otherwise estimate the costs for computing the expression.  */
4268   if (!symbol_present && !var_present && !offset)
4269     {
4270       if (ratio != 1)
4271         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4272       return cost;
4273     }
4274
4275   /* Symbol + offset should be compile-time computable so consider that they
4276       are added once to the variable, if present.  */
4277   if (var_present && (symbol_present || offset))
4278     cost.cost += adjust_setup_cost (data,
4279                                     add_cost (speed, TYPE_MODE (ctype)));
4280
4281   /* Having offset does not affect runtime cost in case it is added to
4282      symbol, but it increases complexity.  */
4283   if (offset)
4284     cost.complexity++;
4285
4286   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4287
4288   aratio = ratio > 0 ? ratio : -ratio;
4289   if (aratio != 1)
4290     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4291   return cost;
4292
4293 fallback:
4294   if (can_autoinc)
4295     *can_autoinc = false;
4296
4297   {
4298     /* Just get the expression, expand it and measure the cost.  */
4299     tree comp = get_computation_at (data->current_loop, use, cand, at);
4300
4301     if (!comp)
4302       return infinite_cost;
4303
4304     if (address_p)
4305       comp = build_simple_mem_ref (comp);
4306
4307     return new_cost (computation_cost (comp, speed), 0);
4308   }
4309 }
4310
4311 /* Determines the cost of the computation by that USE is expressed
4312    from induction variable CAND.  If ADDRESS_P is true, we just need
4313    to create an address from it, otherwise we want to get it into
4314    register.  A set of invariants we depend on is stored in
4315    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4316    autoinc addressing is likely.  */
4317
4318 static comp_cost
4319 get_computation_cost (struct ivopts_data *data,
4320                       struct iv_use *use, struct iv_cand *cand,
4321                       bool address_p, bitmap *depends_on,
4322                       bool *can_autoinc, int *inv_expr_id)
4323 {
4324   return get_computation_cost_at (data,
4325                                   use, cand, address_p, depends_on, use->stmt,
4326                                   can_autoinc, inv_expr_id);
4327 }
4328
4329 /* Determines cost of basing replacement of USE on CAND in a generic
4330    expression.  */
4331
4332 static bool
4333 determine_use_iv_cost_generic (struct ivopts_data *data,
4334                                struct iv_use *use, struct iv_cand *cand)
4335 {
4336   bitmap depends_on;
4337   comp_cost cost;
4338   int inv_expr_id = -1;
4339
4340   /* The simple case first -- if we need to express value of the preserved
4341      original biv, the cost is 0.  This also prevents us from counting the
4342      cost of increment twice -- once at this use and once in the cost of
4343      the candidate.  */
4344   if (cand->pos == IP_ORIGINAL
4345       && cand->incremented_at == use->stmt)
4346     {
4347       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4348                        ERROR_MARK, -1);
4349       return true;
4350     }
4351
4352   cost = get_computation_cost (data, use, cand, false, &depends_on,
4353                                NULL, &inv_expr_id);
4354
4355   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4356                    inv_expr_id);
4357
4358   return !infinite_cost_p (cost);
4359 }
4360
4361 /* Determines cost of basing replacement of USE on CAND in an address.  */
4362
4363 static bool
4364 determine_use_iv_cost_address (struct ivopts_data *data,
4365                                struct iv_use *use, struct iv_cand *cand)
4366 {
4367   bitmap depends_on;
4368   bool can_autoinc;
4369   int inv_expr_id = -1;
4370   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4371                                          &can_autoinc, &inv_expr_id);
4372
4373   if (cand->ainc_use == use)
4374     {
4375       if (can_autoinc)
4376         cost.cost -= cand->cost_step;
4377       /* If we generated the candidate solely for exploiting autoincrement
4378          opportunities, and it turns out it can't be used, set the cost to
4379          infinity to make sure we ignore it.  */
4380       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4381         cost = infinite_cost;
4382     }
4383   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4384                    inv_expr_id);
4385
4386   return !infinite_cost_p (cost);
4387 }
4388
4389 /* Computes value of candidate CAND at position AT in iteration NITER, and
4390    stores it to VAL.  */
4391
4392 static void
4393 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4394                aff_tree *val)
4395 {
4396   aff_tree step, delta, nit;
4397   struct iv *iv = cand->iv;
4398   tree type = TREE_TYPE (iv->base);
4399   tree steptype = type;
4400   if (POINTER_TYPE_P (type))
4401     steptype = sizetype;
4402   steptype = unsigned_type_for (type);
4403
4404   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4405   aff_combination_convert (&step, steptype);
4406   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4407   aff_combination_convert (&nit, steptype);
4408   aff_combination_mult (&nit, &step, &delta);
4409   if (stmt_after_increment (loop, cand, at))
4410     aff_combination_add (&delta, &step);
4411
4412   tree_to_aff_combination (iv->base, type, val);
4413   if (!POINTER_TYPE_P (type))
4414     aff_combination_convert (val, steptype);
4415   aff_combination_add (val, &delta);
4416 }
4417
4418 /* Returns period of induction variable iv.  */
4419
4420 static tree
4421 iv_period (struct iv *iv)
4422 {
4423   tree step = iv->step, period, type;
4424   tree pow2div;
4425
4426   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4427
4428   type = unsigned_type_for (TREE_TYPE (step));
4429   /* Period of the iv is lcm (step, type_range)/step -1,
4430      i.e., N*type_range/step - 1. Since type range is power
4431      of two, N == (step >> num_of_ending_zeros_binary (step),
4432      so the final result is
4433
4434        (type_range >> num_of_ending_zeros_binary (step)) - 1
4435
4436   */
4437   pow2div = num_ending_zeros (step);
4438
4439   period = build_low_bits_mask (type,
4440                                 (TYPE_PRECISION (type)
4441                                  - tree_to_uhwi (pow2div)));
4442
4443   return period;
4444 }
4445
4446 /* Returns the comparison operator used when eliminating the iv USE.  */
4447
4448 static enum tree_code
4449 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4450 {
4451   struct loop *loop = data->current_loop;
4452   basic_block ex_bb;
4453   edge exit;
4454
4455   ex_bb = gimple_bb (use->stmt);
4456   exit = EDGE_SUCC (ex_bb, 0);
4457   if (flow_bb_inside_loop_p (loop, exit->dest))
4458     exit = EDGE_SUCC (ex_bb, 1);
4459
4460   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4461 }
4462
4463 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4464    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4465    calculation is performed in non-wrapping type.
4466
4467    TODO: More generally, we could test for the situation that
4468          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4469          This would require knowing the sign of OFFSET.  */
4470
4471 static bool
4472 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4473 {
4474   enum tree_code code;
4475   tree e1, e2;
4476   aff_tree aff_e1, aff_e2, aff_offset;
4477
4478   if (!nowrap_type_p (TREE_TYPE (base)))
4479     return false;
4480
4481   base = expand_simple_operations (base);
4482
4483   if (TREE_CODE (base) == SSA_NAME)
4484     {
4485       gimple stmt = SSA_NAME_DEF_STMT (base);
4486
4487       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4488         return false;
4489
4490       code = gimple_assign_rhs_code (stmt);
4491       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4492         return false;
4493
4494       e1 = gimple_assign_rhs1 (stmt);
4495       e2 = gimple_assign_rhs2 (stmt);
4496     }
4497   else
4498     {
4499       code = TREE_CODE (base);
4500       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4501         return false;
4502       e1 = TREE_OPERAND (base, 0);
4503       e2 = TREE_OPERAND (base, 1);
4504     }
4505
4506   /* Use affine expansion as deeper inspection to prove the equality.  */
4507   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4508                                   &aff_e2, &data->name_expansion_cache);
4509   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4510                                   &aff_offset, &data->name_expansion_cache);
4511   aff_combination_scale (&aff_offset, -1);
4512   switch (code)
4513     {
4514     case PLUS_EXPR:
4515       aff_combination_add (&aff_e2, &aff_offset);
4516       if (aff_combination_zero_p (&aff_e2))
4517         return true;
4518
4519       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4520                                       &aff_e1, &data->name_expansion_cache);
4521       aff_combination_add (&aff_e1, &aff_offset);
4522       return aff_combination_zero_p (&aff_e1);
4523
4524     case POINTER_PLUS_EXPR:
4525       aff_combination_add (&aff_e2, &aff_offset);
4526       return aff_combination_zero_p (&aff_e2);
4527
4528     default:
4529       return false;
4530     }
4531 }
4532
4533 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4534    comparison with CAND.  NITER describes the number of iterations of
4535    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4536
4537    We aim to handle the following situation:
4538
4539    sometype *base, *p;
4540    int a, b, i;
4541
4542    i = a;
4543    p = p_0 = base + a;
4544
4545    do
4546      {
4547        bla (*p);
4548        p++;
4549        i++;
4550      }
4551    while (i < b);
4552
4553    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4554    We aim to optimize this to
4555
4556    p = p_0 = base + a;
4557    do
4558      {
4559        bla (*p);
4560        p++;
4561      }
4562    while (p < p_0 - a + b);
4563
4564    This preserves the correctness, since the pointer arithmetics does not
4565    overflow.  More precisely:
4566
4567    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4568       overflow in computing it or the values of p.
4569    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4570       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4571
4572 static bool
4573 iv_elimination_compare_lt (struct ivopts_data *data,
4574                            struct iv_cand *cand, enum tree_code *comp_p,
4575                            struct tree_niter_desc *niter)
4576 {
4577   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4578   struct aff_tree nit, tmpa, tmpb;
4579   enum tree_code comp;
4580   HOST_WIDE_INT step;
4581
4582   /* We need to know that the candidate induction variable does not overflow.
4583      While more complex analysis may be used to prove this, for now just
4584      check that the variable appears in the original program and that it
4585      is computed in a type that guarantees no overflows.  */
4586   cand_type = TREE_TYPE (cand->iv->base);
4587   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4588     return false;
4589
4590   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4591      the calculation of the BOUND could overflow, making the comparison
4592      invalid.  */
4593   if (!data->loop_single_exit_p)
4594     return false;
4595
4596   /* We need to be able to decide whether candidate is increasing or decreasing
4597      in order to choose the right comparison operator.  */
4598   if (!cst_and_fits_in_hwi (cand->iv->step))
4599     return false;
4600   step = int_cst_value (cand->iv->step);
4601
4602   /* Check that the number of iterations matches the expected pattern:
4603      a + 1 > b ? 0 : b - a - 1.  */
4604   mbz = niter->may_be_zero;
4605   if (TREE_CODE (mbz) == GT_EXPR)
4606     {
4607       /* Handle a + 1 > b.  */
4608       tree op0 = TREE_OPERAND (mbz, 0);
4609       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4610         {
4611           a = TREE_OPERAND (op0, 0);
4612           b = TREE_OPERAND (mbz, 1);
4613         }
4614       else
4615         return false;
4616     }
4617   else if (TREE_CODE (mbz) == LT_EXPR)
4618     {
4619       tree op1 = TREE_OPERAND (mbz, 1);
4620
4621       /* Handle b < a + 1.  */
4622       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4623         {
4624           a = TREE_OPERAND (op1, 0);
4625           b = TREE_OPERAND (mbz, 0);
4626         }
4627       else
4628         return false;
4629     }
4630   else
4631     return false;
4632
4633   /* Expected number of iterations is B - A - 1.  Check that it matches
4634      the actual number, i.e., that B - A - NITER = 1.  */
4635   tree_to_aff_combination (niter->niter, nit_type, &nit);
4636   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4637   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4638   aff_combination_scale (&nit, -1);
4639   aff_combination_scale (&tmpa, -1);
4640   aff_combination_add (&tmpb, &tmpa);
4641   aff_combination_add (&tmpb, &nit);
4642   if (tmpb.n != 0 || tmpb.offset != 1)
4643     return false;
4644
4645   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4646      overflow.  */
4647   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4648                         cand->iv->step,
4649                         fold_convert (TREE_TYPE (cand->iv->step), a));
4650   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4651     return false;
4652
4653   /* Determine the new comparison operator.  */
4654   comp = step < 0 ? GT_EXPR : LT_EXPR;
4655   if (*comp_p == NE_EXPR)
4656     *comp_p = comp;
4657   else if (*comp_p == EQ_EXPR)
4658     *comp_p = invert_tree_comparison (comp, false);
4659   else
4660     gcc_unreachable ();
4661
4662   return true;
4663 }
4664
4665 /* Check whether it is possible to express the condition in USE by comparison
4666    of candidate CAND.  If so, store the value compared with to BOUND, and the
4667    comparison operator to COMP.  */
4668
4669 static bool
4670 may_eliminate_iv (struct ivopts_data *data,
4671                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4672                   enum tree_code *comp)
4673 {
4674   basic_block ex_bb;
4675   edge exit;
4676   tree period;
4677   struct loop *loop = data->current_loop;
4678   aff_tree bnd;
4679   struct tree_niter_desc *desc = NULL;
4680
4681   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4682     return false;
4683
4684   /* For now works only for exits that dominate the loop latch.
4685      TODO: extend to other conditions inside loop body.  */
4686   ex_bb = gimple_bb (use->stmt);
4687   if (use->stmt != last_stmt (ex_bb)
4688       || gimple_code (use->stmt) != GIMPLE_COND
4689       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4690     return false;
4691
4692   exit = EDGE_SUCC (ex_bb, 0);
4693   if (flow_bb_inside_loop_p (loop, exit->dest))
4694     exit = EDGE_SUCC (ex_bb, 1);
4695   if (flow_bb_inside_loop_p (loop, exit->dest))
4696     return false;
4697
4698   desc = niter_for_exit (data, exit);
4699   if (!desc)
4700     return false;
4701
4702   /* Determine whether we can use the variable to test the exit condition.
4703      This is the case iff the period of the induction variable is greater
4704      than the number of iterations for which the exit condition is true.  */
4705   period = iv_period (cand->iv);
4706
4707   /* If the number of iterations is constant, compare against it directly.  */
4708   if (TREE_CODE (desc->niter) == INTEGER_CST)
4709     {
4710       /* See cand_value_at.  */
4711       if (stmt_after_increment (loop, cand, use->stmt))
4712         {
4713           if (!tree_int_cst_lt (desc->niter, period))
4714             return false;
4715         }
4716       else
4717         {
4718           if (tree_int_cst_lt (period, desc->niter))
4719             return false;
4720         }
4721     }
4722
4723   /* If not, and if this is the only possible exit of the loop, see whether
4724      we can get a conservative estimate on the number of iterations of the
4725      entire loop and compare against that instead.  */
4726   else
4727     {
4728       widest_int period_value, max_niter;
4729
4730       max_niter = desc->max;
4731       if (stmt_after_increment (loop, cand, use->stmt))
4732         max_niter += 1;
4733       period_value = wi::to_widest (period);
4734       if (wi::gtu_p (max_niter, period_value))
4735         {
4736           /* See if we can take advantage of inferred loop bound information.  */
4737           if (data->loop_single_exit_p)
4738             {
4739               if (!max_loop_iterations (loop, &max_niter))
4740                 return false;
4741               /* The loop bound is already adjusted by adding 1.  */
4742               if (wi::gtu_p (max_niter, period_value))
4743                 return false;
4744             }
4745           else
4746             return false;
4747         }
4748     }
4749
4750   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4751
4752   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4753                          aff_combination_to_tree (&bnd));
4754   *comp = iv_elimination_compare (data, use);
4755
4756   /* It is unlikely that computing the number of iterations using division
4757      would be more profitable than keeping the original induction variable.  */
4758   if (expression_expensive_p (*bound))
4759     return false;
4760
4761   /* Sometimes, it is possible to handle the situation that the number of
4762      iterations may be zero unless additional assumtions by using <
4763      instead of != in the exit condition.
4764
4765      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4766            base the exit condition on it.  However, that is often too
4767            expensive.  */
4768   if (!integer_zerop (desc->may_be_zero))
4769     return iv_elimination_compare_lt (data, cand, comp, desc);
4770
4771   return true;
4772 }
4773
4774  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4775     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4776
4777 static int
4778 parm_decl_cost (struct ivopts_data *data, tree bound)
4779 {
4780   tree sbound = bound;
4781   STRIP_NOPS (sbound);
4782
4783   if (TREE_CODE (sbound) == SSA_NAME
4784       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4785       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4786       && data->body_includes_call)
4787     return COSTS_N_INSNS (1);
4788
4789   return 0;
4790 }
4791
4792 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4793
4794 static bool
4795 determine_use_iv_cost_condition (struct ivopts_data *data,
4796                                  struct iv_use *use, struct iv_cand *cand)
4797 {
4798   tree bound = NULL_TREE;
4799   struct iv *cmp_iv;
4800   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4801   comp_cost elim_cost, express_cost, cost, bound_cost;
4802   bool ok;
4803   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4804   tree *control_var, *bound_cst;
4805   enum tree_code comp = ERROR_MARK;
4806
4807   /* Only consider real candidates.  */
4808   if (!cand->iv)
4809     {
4810       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4811                        ERROR_MARK, -1);
4812       return false;
4813     }
4814
4815   /* Try iv elimination.  */
4816   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4817     {
4818       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4819       if (elim_cost.cost == 0)
4820         elim_cost.cost = parm_decl_cost (data, bound);
4821       else if (TREE_CODE (bound) == INTEGER_CST)
4822         elim_cost.cost = 0;
4823       /* If we replace a loop condition 'i < n' with 'p < base + n',
4824          depends_on_elim will have 'base' and 'n' set, which implies
4825          that both 'base' and 'n' will be live during the loop.  More likely,
4826          'base + n' will be loop invariant, resulting in only one live value
4827          during the loop.  So in that case we clear depends_on_elim and set
4828         elim_inv_expr_id instead.  */
4829       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4830         {
4831           elim_inv_expr_id = get_expr_id (data, bound);
4832           bitmap_clear (depends_on_elim);
4833         }
4834       /* The bound is a loop invariant, so it will be only computed
4835          once.  */
4836       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4837     }
4838   else
4839     elim_cost = infinite_cost;
4840
4841   /* Try expressing the original giv.  If it is compared with an invariant,
4842      note that we cannot get rid of it.  */
4843   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4844                               NULL, &cmp_iv);
4845   gcc_assert (ok);
4846
4847   /* When the condition is a comparison of the candidate IV against
4848      zero, prefer this IV.
4849
4850      TODO: The constant that we're subtracting from the cost should
4851      be target-dependent.  This information should be added to the
4852      target costs for each backend.  */
4853   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4854       && integer_zerop (*bound_cst)
4855       && (operand_equal_p (*control_var, cand->var_after, 0)
4856           || operand_equal_p (*control_var, cand->var_before, 0)))
4857     elim_cost.cost -= 1;
4858
4859   express_cost = get_computation_cost (data, use, cand, false,
4860                                        &depends_on_express, NULL,
4861                                        &express_inv_expr_id);
4862   fd_ivopts_data = data;
4863   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4864
4865   /* Count the cost of the original bound as well.  */
4866   bound_cost = force_var_cost (data, *bound_cst, NULL);
4867   if (bound_cost.cost == 0)
4868     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4869   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4870     bound_cost.cost = 0;
4871   express_cost.cost += bound_cost.cost;
4872
4873   /* Choose the better approach, preferring the eliminated IV. */
4874   if (compare_costs (elim_cost, express_cost) <= 0)
4875     {
4876       cost = elim_cost;
4877       depends_on = depends_on_elim;
4878       depends_on_elim = NULL;
4879       inv_expr_id = elim_inv_expr_id;
4880     }
4881   else
4882     {
4883       cost = express_cost;
4884       depends_on = depends_on_express;
4885       depends_on_express = NULL;
4886       bound = NULL_TREE;
4887       comp = ERROR_MARK;
4888       inv_expr_id = express_inv_expr_id;
4889     }
4890
4891   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4892
4893   if (depends_on_elim)
4894     BITMAP_FREE (depends_on_elim);
4895   if (depends_on_express)
4896     BITMAP_FREE (depends_on_express);
4897
4898   return !infinite_cost_p (cost);
4899 }
4900
4901 /* Determines cost of basing replacement of USE on CAND.  Returns false
4902    if USE cannot be based on CAND.  */
4903
4904 static bool
4905 determine_use_iv_cost (struct ivopts_data *data,
4906                        struct iv_use *use, struct iv_cand *cand)
4907 {
4908   switch (use->type)
4909     {
4910     case USE_NONLINEAR_EXPR:
4911       return determine_use_iv_cost_generic (data, use, cand);
4912
4913     case USE_ADDRESS:
4914       return determine_use_iv_cost_address (data, use, cand);
4915
4916     case USE_COMPARE:
4917       return determine_use_iv_cost_condition (data, use, cand);
4918
4919     default:
4920       gcc_unreachable ();
4921     }
4922 }
4923
4924 /* Return true if get_computation_cost indicates that autoincrement is
4925    a possibility for the pair of USE and CAND, false otherwise.  */
4926
4927 static bool
4928 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4929                            struct iv_cand *cand)
4930 {
4931   bitmap depends_on;
4932   bool can_autoinc;
4933   comp_cost cost;
4934
4935   if (use->type != USE_ADDRESS)
4936     return false;
4937
4938   cost = get_computation_cost (data, use, cand, true, &depends_on,
4939                                &can_autoinc, NULL);
4940
4941   BITMAP_FREE (depends_on);
4942
4943   return !infinite_cost_p (cost) && can_autoinc;
4944 }
4945
4946 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4947    use that allows autoincrement, and set their AINC_USE if possible.  */
4948
4949 static void
4950 set_autoinc_for_original_candidates (struct ivopts_data *data)
4951 {
4952   unsigned i, j;
4953
4954   for (i = 0; i < n_iv_cands (data); i++)
4955     {
4956       struct iv_cand *cand = iv_cand (data, i);
4957       struct iv_use *closest_before = NULL;
4958       struct iv_use *closest_after = NULL;
4959       if (cand->pos != IP_ORIGINAL)
4960         continue;
4961
4962       for (j = 0; j < n_iv_uses (data); j++)
4963         {
4964           struct iv_use *use = iv_use (data, j);
4965           unsigned uid = gimple_uid (use->stmt);
4966
4967           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4968             continue;
4969
4970           if (uid < gimple_uid (cand->incremented_at)
4971               && (closest_before == NULL
4972                   || uid > gimple_uid (closest_before->stmt)))
4973             closest_before = use;
4974
4975           if (uid > gimple_uid (cand->incremented_at)
4976               && (closest_after == NULL
4977                   || uid < gimple_uid (closest_after->stmt)))
4978             closest_after = use;
4979         }
4980
4981       if (closest_before != NULL
4982           && autoinc_possible_for_pair (data, closest_before, cand))
4983         cand->ainc_use = closest_before;
4984       else if (closest_after != NULL
4985                && autoinc_possible_for_pair (data, closest_after, cand))
4986         cand->ainc_use = closest_after;
4987     }
4988 }
4989
4990 /* Finds the candidates for the induction variables.  */
4991
4992 static void
4993 find_iv_candidates (struct ivopts_data *data)
4994 {
4995   /* Add commonly used ivs.  */
4996   add_standard_iv_candidates (data);
4997
4998   /* Add old induction variables.  */
4999   add_old_ivs_candidates (data);
5000
5001   /* Add induction variables derived from uses.  */
5002   add_derived_ivs_candidates (data);
5003
5004   set_autoinc_for_original_candidates (data);
5005
5006   /* Record the important candidates.  */
5007   record_important_candidates (data);
5008 }
5009
5010 /* Determines costs of basing the use of the iv on an iv candidate.  */
5011
5012 static void
5013 determine_use_iv_costs (struct ivopts_data *data)
5014 {
5015   unsigned i, j;
5016   struct iv_use *use;
5017   struct iv_cand *cand;
5018   bitmap to_clear = BITMAP_ALLOC (NULL);
5019
5020   alloc_use_cost_map (data);
5021
5022   for (i = 0; i < n_iv_uses (data); i++)
5023     {
5024       use = iv_use (data, i);
5025
5026       if (data->consider_all_candidates)
5027         {
5028           for (j = 0; j < n_iv_cands (data); j++)
5029             {
5030               cand = iv_cand (data, j);
5031               determine_use_iv_cost (data, use, cand);
5032             }
5033         }
5034       else
5035         {
5036           bitmap_iterator bi;
5037
5038           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5039             {
5040               cand = iv_cand (data, j);
5041               if (!determine_use_iv_cost (data, use, cand))
5042                 bitmap_set_bit (to_clear, j);
5043             }
5044
5045           /* Remove the candidates for that the cost is infinite from
5046              the list of related candidates.  */
5047           bitmap_and_compl_into (use->related_cands, to_clear);
5048           bitmap_clear (to_clear);
5049         }
5050     }
5051
5052   BITMAP_FREE (to_clear);
5053
5054   if (dump_file && (dump_flags & TDF_DETAILS))
5055     {
5056       fprintf (dump_file, "Use-candidate costs:\n");
5057
5058       for (i = 0; i < n_iv_uses (data); i++)
5059         {
5060           use = iv_use (data, i);
5061
5062           fprintf (dump_file, "Use %d:\n", i);
5063           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5064           for (j = 0; j < use->n_map_members; j++)
5065             {
5066               if (!use->cost_map[j].cand
5067                   || infinite_cost_p (use->cost_map[j].cost))
5068                 continue;
5069
5070               fprintf (dump_file, "  %d\t%d\t%d\t",
5071                        use->cost_map[j].cand->id,
5072                        use->cost_map[j].cost.cost,
5073                        use->cost_map[j].cost.complexity);
5074               if (use->cost_map[j].depends_on)
5075                 bitmap_print (dump_file,
5076                               use->cost_map[j].depends_on, "","");
5077               if (use->cost_map[j].inv_expr_id != -1)
5078                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5079               fprintf (dump_file, "\n");
5080             }
5081
5082           fprintf (dump_file, "\n");
5083         }
5084       fprintf (dump_file, "\n");
5085     }
5086 }
5087
5088 /* Determines cost of the candidate CAND.  */
5089
5090 static void
5091 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5092 {
5093   comp_cost cost_base;
5094   unsigned cost, cost_step;
5095   tree base;
5096
5097   if (!cand->iv)
5098     {
5099       cand->cost = 0;
5100       return;
5101     }
5102
5103   /* There are two costs associated with the candidate -- its increment
5104      and its initialization.  The second is almost negligible for any loop
5105      that rolls enough, so we take it just very little into account.  */
5106
5107   base = cand->iv->base;
5108   cost_base = force_var_cost (data, base, NULL);
5109   /* It will be exceptional that the iv register happens to be initialized with
5110      the proper value at no cost.  In general, there will at least be a regcopy
5111      or a const set.  */
5112   if (cost_base.cost == 0)
5113     cost_base.cost = COSTS_N_INSNS (1);
5114   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5115
5116   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5117
5118   /* Prefer the original ivs unless we may gain something by replacing it.
5119      The reason is to make debugging simpler; so this is not relevant for
5120      artificial ivs created by other optimization passes.  */
5121   if (cand->pos != IP_ORIGINAL
5122       || !SSA_NAME_VAR (cand->var_before)
5123       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5124     cost++;
5125
5126   /* Prefer not to insert statements into latch unless there are some
5127      already (so that we do not create unnecessary jumps).  */
5128   if (cand->pos == IP_END
5129       && empty_block_p (ip_end_pos (data->current_loop)))
5130     cost++;
5131
5132   cand->cost = cost;
5133   cand->cost_step = cost_step;
5134 }
5135
5136 /* Determines costs of computation of the candidates.  */
5137
5138 static void
5139 determine_iv_costs (struct ivopts_data *data)
5140 {
5141   unsigned i;
5142
5143   if (dump_file && (dump_flags & TDF_DETAILS))
5144     {
5145       fprintf (dump_file, "Candidate costs:\n");
5146       fprintf (dump_file, "  cand\tcost\n");
5147     }
5148
5149   for (i = 0; i < n_iv_cands (data); i++)
5150     {
5151       struct iv_cand *cand = iv_cand (data, i);
5152
5153       determine_iv_cost (data, cand);
5154
5155       if (dump_file && (dump_flags & TDF_DETAILS))
5156         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5157     }
5158
5159   if (dump_file && (dump_flags & TDF_DETAILS))
5160     fprintf (dump_file, "\n");
5161 }
5162
5163 /* Calculates cost for having SIZE induction variables.  */
5164
5165 static unsigned
5166 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5167 {
5168   /* We add size to the cost, so that we prefer eliminating ivs
5169      if possible.  */
5170   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5171                                             data->body_includes_call);
5172 }
5173
5174 /* For each size of the induction variable set determine the penalty.  */
5175
5176 static void
5177 determine_set_costs (struct ivopts_data *data)
5178 {
5179   unsigned j, n;
5180   gimple phi;
5181   gimple_stmt_iterator psi;
5182   tree op;
5183   struct loop *loop = data->current_loop;
5184   bitmap_iterator bi;
5185
5186   if (dump_file && (dump_flags & TDF_DETAILS))
5187     {
5188       fprintf (dump_file, "Global costs:\n");
5189       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5190       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5191       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5192       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5193     }
5194
5195   n = 0;
5196   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5197     {
5198       phi = gsi_stmt (psi);
5199       op = PHI_RESULT (phi);
5200
5201       if (virtual_operand_p (op))
5202         continue;
5203
5204       if (get_iv (data, op))
5205         continue;
5206
5207       n++;
5208     }
5209
5210   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5211     {
5212       struct version_info *info = ver_info (data, j);
5213
5214       if (info->inv_id && info->has_nonlin_use)
5215         n++;
5216     }
5217
5218   data->regs_used = n;
5219   if (dump_file && (dump_flags & TDF_DETAILS))
5220     fprintf (dump_file, "  regs_used %d\n", n);
5221
5222   if (dump_file && (dump_flags & TDF_DETAILS))
5223     {
5224       fprintf (dump_file, "  cost for size:\n");
5225       fprintf (dump_file, "  ivs\tcost\n");
5226       for (j = 0; j <= 2 * target_avail_regs; j++)
5227         fprintf (dump_file, "  %d\t%d\n", j,
5228                  ivopts_global_cost_for_size (data, j));
5229       fprintf (dump_file, "\n");
5230     }
5231 }
5232
5233 /* Returns true if A is a cheaper cost pair than B.  */
5234
5235 static bool
5236 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5237 {
5238   int cmp;
5239
5240   if (!a)
5241     return false;
5242
5243   if (!b)
5244     return true;
5245
5246   cmp = compare_costs (a->cost, b->cost);
5247   if (cmp < 0)
5248     return true;
5249
5250   if (cmp > 0)
5251     return false;
5252
5253   /* In case the costs are the same, prefer the cheaper candidate.  */
5254   if (a->cand->cost < b->cand->cost)
5255     return true;
5256
5257   return false;
5258 }
5259
5260
5261 /* Returns candidate by that USE is expressed in IVS.  */
5262
5263 static struct cost_pair *
5264 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5265 {
5266   return ivs->cand_for_use[use->id];
5267 }
5268
5269 /* Computes the cost field of IVS structure.  */
5270
5271 static void
5272 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5273 {
5274   comp_cost cost = ivs->cand_use_cost;
5275
5276   cost.cost += ivs->cand_cost;
5277
5278   cost.cost += ivopts_global_cost_for_size (data,
5279                                             ivs->n_regs + ivs->num_used_inv_expr);
5280
5281   ivs->cost = cost;
5282 }
5283
5284 /* Remove invariants in set INVS to set IVS.  */
5285
5286 static void
5287 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5288 {
5289   bitmap_iterator bi;
5290   unsigned iid;
5291
5292   if (!invs)
5293     return;
5294
5295   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5296     {
5297       ivs->n_invariant_uses[iid]--;
5298       if (ivs->n_invariant_uses[iid] == 0)
5299         ivs->n_regs--;
5300     }
5301 }
5302
5303 /* Set USE not to be expressed by any candidate in IVS.  */
5304
5305 static void
5306 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5307                  struct iv_use *use)
5308 {
5309   unsigned uid = use->id, cid;
5310   struct cost_pair *cp;
5311
5312   cp = ivs->cand_for_use[uid];
5313   if (!cp)
5314     return;
5315   cid = cp->cand->id;
5316
5317   ivs->bad_uses++;
5318   ivs->cand_for_use[uid] = NULL;
5319   ivs->n_cand_uses[cid]--;
5320
5321   if (ivs->n_cand_uses[cid] == 0)
5322     {
5323       bitmap_clear_bit (ivs->cands, cid);
5324       /* Do not count the pseudocandidates.  */
5325       if (cp->cand->iv)
5326         ivs->n_regs--;
5327       ivs->n_cands--;
5328       ivs->cand_cost -= cp->cand->cost;
5329
5330       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5331     }
5332
5333   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5334
5335   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5336
5337   if (cp->inv_expr_id != -1)
5338     {
5339       ivs->used_inv_expr[cp->inv_expr_id]--;
5340       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5341         ivs->num_used_inv_expr--;
5342     }
5343   iv_ca_recount_cost (data, ivs);
5344 }
5345
5346 /* Add invariants in set INVS to set IVS.  */
5347
5348 static void
5349 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5350 {
5351   bitmap_iterator bi;
5352   unsigned iid;
5353
5354   if (!invs)
5355     return;
5356
5357   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5358     {
5359       ivs->n_invariant_uses[iid]++;
5360       if (ivs->n_invariant_uses[iid] == 1)
5361         ivs->n_regs++;
5362     }
5363 }
5364
5365 /* Set cost pair for USE in set IVS to CP.  */
5366
5367 static void
5368 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5369               struct iv_use *use, struct cost_pair *cp)
5370 {
5371   unsigned uid = use->id, cid;
5372
5373   if (ivs->cand_for_use[uid] == cp)
5374     return;
5375
5376   if (ivs->cand_for_use[uid])
5377     iv_ca_set_no_cp (data, ivs, use);
5378
5379   if (cp)
5380     {
5381       cid = cp->cand->id;
5382
5383       ivs->bad_uses--;
5384       ivs->cand_for_use[uid] = cp;
5385       ivs->n_cand_uses[cid]++;
5386       if (ivs->n_cand_uses[cid] == 1)
5387         {
5388           bitmap_set_bit (ivs->cands, cid);
5389           /* Do not count the pseudocandidates.  */
5390           if (cp->cand->iv)
5391             ivs->n_regs++;
5392           ivs->n_cands++;
5393           ivs->cand_cost += cp->cand->cost;
5394
5395           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5396         }
5397
5398       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5399       iv_ca_set_add_invariants (ivs, cp->depends_on);
5400
5401       if (cp->inv_expr_id != -1)
5402         {
5403           ivs->used_inv_expr[cp->inv_expr_id]++;
5404           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5405             ivs->num_used_inv_expr++;
5406         }
5407       iv_ca_recount_cost (data, ivs);
5408     }
5409 }
5410
5411 /* Extend set IVS by expressing USE by some of the candidates in it
5412    if possible.  Consider all important candidates if candidates in
5413    set IVS don't give any result.  */
5414
5415 static void
5416 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5417                struct iv_use *use)
5418 {
5419   struct cost_pair *best_cp = NULL, *cp;
5420   bitmap_iterator bi;
5421   unsigned i;
5422   struct iv_cand *cand;
5423
5424   gcc_assert (ivs->upto >= use->id);
5425   ivs->upto++;
5426   ivs->bad_uses++;
5427
5428   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5429     {
5430       cand = iv_cand (data, i);
5431       cp = get_use_iv_cost (data, use, cand);
5432       if (cheaper_cost_pair (cp, best_cp))
5433         best_cp = cp;
5434     }
5435
5436   if (best_cp == NULL)
5437     {
5438       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5439         {
5440           cand = iv_cand (data, i);
5441           cp = get_use_iv_cost (data, use, cand);
5442           if (cheaper_cost_pair (cp, best_cp))
5443             best_cp = cp;
5444         }
5445     }
5446
5447   iv_ca_set_cp (data, ivs, use, best_cp);
5448 }
5449
5450 /* Get cost for assignment IVS.  */
5451
5452 static comp_cost
5453 iv_ca_cost (struct iv_ca *ivs)
5454 {
5455   /* This was a conditional expression but it triggered a bug in
5456      Sun C 5.5.  */
5457   if (ivs->bad_uses)
5458     return infinite_cost;
5459   else
5460     return ivs->cost;
5461 }
5462
5463 /* Returns true if all dependences of CP are among invariants in IVS.  */
5464
5465 static bool
5466 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5467 {
5468   unsigned i;
5469   bitmap_iterator bi;
5470
5471   if (!cp->depends_on)
5472     return true;
5473
5474   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5475     {
5476       if (ivs->n_invariant_uses[i] == 0)
5477         return false;
5478     }
5479
5480   return true;
5481 }
5482
5483 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5484    it before NEXT_CHANGE.  */
5485
5486 static struct iv_ca_delta *
5487 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5488                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5489 {
5490   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5491
5492   change->use = use;
5493   change->old_cp = old_cp;
5494   change->new_cp = new_cp;
5495   change->next_change = next_change;
5496
5497   return change;
5498 }
5499
5500 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5501    are rewritten.  */
5502
5503 static struct iv_ca_delta *
5504 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5505 {
5506   struct iv_ca_delta *last;
5507
5508   if (!l2)
5509     return l1;
5510
5511   if (!l1)
5512     return l2;
5513
5514   for (last = l1; last->next_change; last = last->next_change)
5515     continue;
5516   last->next_change = l2;
5517
5518   return l1;
5519 }
5520
5521 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5522
5523 static struct iv_ca_delta *
5524 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5525 {
5526   struct iv_ca_delta *act, *next, *prev = NULL;
5527   struct cost_pair *tmp;
5528
5529   for (act = delta; act; act = next)
5530     {
5531       next = act->next_change;
5532       act->next_change = prev;
5533       prev = act;
5534
5535       tmp = act->old_cp;
5536       act->old_cp = act->new_cp;
5537       act->new_cp = tmp;
5538     }
5539
5540   return prev;
5541 }
5542
5543 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5544    reverted instead.  */
5545
5546 static void
5547 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5548                     struct iv_ca_delta *delta, bool forward)
5549 {
5550   struct cost_pair *from, *to;
5551   struct iv_ca_delta *act;
5552
5553   if (!forward)
5554     delta = iv_ca_delta_reverse (delta);
5555
5556   for (act = delta; act; act = act->next_change)
5557     {
5558       from = act->old_cp;
5559       to = act->new_cp;
5560       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5561       iv_ca_set_cp (data, ivs, act->use, to);
5562     }
5563
5564   if (!forward)
5565     iv_ca_delta_reverse (delta);
5566 }
5567
5568 /* Returns true if CAND is used in IVS.  */
5569
5570 static bool
5571 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5572 {
5573   return ivs->n_cand_uses[cand->id] > 0;
5574 }
5575
5576 /* Returns number of induction variable candidates in the set IVS.  */
5577
5578 static unsigned
5579 iv_ca_n_cands (struct iv_ca *ivs)
5580 {
5581   return ivs->n_cands;
5582 }
5583
5584 /* Free the list of changes DELTA.  */
5585
5586 static void
5587 iv_ca_delta_free (struct iv_ca_delta **delta)
5588 {
5589   struct iv_ca_delta *act, *next;
5590
5591   for (act = *delta; act; act = next)
5592     {
5593       next = act->next_change;
5594       free (act);
5595     }
5596
5597   *delta = NULL;
5598 }
5599
5600 /* Allocates new iv candidates assignment.  */
5601
5602 static struct iv_ca *
5603 iv_ca_new (struct ivopts_data *data)
5604 {
5605   struct iv_ca *nw = XNEW (struct iv_ca);
5606
5607   nw->upto = 0;
5608   nw->bad_uses = 0;
5609   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5610   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5611   nw->cands = BITMAP_ALLOC (NULL);
5612   nw->n_cands = 0;
5613   nw->n_regs = 0;
5614   nw->cand_use_cost = no_cost;
5615   nw->cand_cost = 0;
5616   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5617   nw->cost = no_cost;
5618   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5619   nw->num_used_inv_expr = 0;
5620
5621   return nw;
5622 }
5623
5624 /* Free memory occupied by the set IVS.  */
5625
5626 static void
5627 iv_ca_free (struct iv_ca **ivs)
5628 {
5629   free ((*ivs)->cand_for_use);
5630   free ((*ivs)->n_cand_uses);
5631   BITMAP_FREE ((*ivs)->cands);
5632   free ((*ivs)->n_invariant_uses);
5633   free ((*ivs)->used_inv_expr);
5634   free (*ivs);
5635   *ivs = NULL;
5636 }
5637
5638 /* Dumps IVS to FILE.  */
5639
5640 static void
5641 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5642 {
5643   const char *pref = "  invariants ";
5644   unsigned i;
5645   comp_cost cost = iv_ca_cost (ivs);
5646
5647   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5648   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5649            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5650   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5651
5652    for (i = 0; i < ivs->upto; i++)
5653     {
5654       struct iv_use *use = iv_use (data, i);
5655       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5656       if (cp)
5657         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5658                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5659       else
5660         fprintf (file, "   use:%d --> ??\n", use->id);
5661     }
5662
5663   for (i = 1; i <= data->max_inv_id; i++)
5664     if (ivs->n_invariant_uses[i])
5665       {
5666         fprintf (file, "%s%d", pref, i);
5667         pref = ", ";
5668       }
5669   fprintf (file, "\n\n");
5670 }
5671
5672 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5673    new set, and store differences in DELTA.  Number of induction variables
5674    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5675    the function will try to find a solution with mimimal iv candidates.  */
5676
5677 static comp_cost
5678 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5679               struct iv_cand *cand, struct iv_ca_delta **delta,
5680               unsigned *n_ivs, bool min_ncand)
5681 {
5682   unsigned i;
5683   comp_cost cost;
5684   struct iv_use *use;
5685   struct cost_pair *old_cp, *new_cp;
5686
5687   *delta = NULL;
5688   for (i = 0; i < ivs->upto; i++)
5689     {
5690       use = iv_use (data, i);
5691       old_cp = iv_ca_cand_for_use (ivs, use);
5692
5693       if (old_cp
5694           && old_cp->cand == cand)
5695         continue;
5696
5697       new_cp = get_use_iv_cost (data, use, cand);
5698       if (!new_cp)
5699         continue;
5700
5701       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5702         continue;
5703
5704       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5705         continue;
5706
5707       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5708     }
5709
5710   iv_ca_delta_commit (data, ivs, *delta, true);
5711   cost = iv_ca_cost (ivs);
5712   if (n_ivs)
5713     *n_ivs = iv_ca_n_cands (ivs);
5714   iv_ca_delta_commit (data, ivs, *delta, false);
5715
5716   return cost;
5717 }
5718
5719 /* Try narrowing set IVS by removing CAND.  Return the cost of
5720    the new set and store the differences in DELTA.  START is
5721    the candidate with which we start narrowing.  */
5722
5723 static comp_cost
5724 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5725               struct iv_cand *cand, struct iv_cand *start,
5726               struct iv_ca_delta **delta)
5727 {
5728   unsigned i, ci;
5729   struct iv_use *use;
5730   struct cost_pair *old_cp, *new_cp, *cp;
5731   bitmap_iterator bi;
5732   struct iv_cand *cnd;
5733   comp_cost cost, best_cost, acost;
5734
5735   *delta = NULL;
5736   for (i = 0; i < n_iv_uses (data); i++)
5737     {
5738       use = iv_use (data, i);
5739
5740       old_cp = iv_ca_cand_for_use (ivs, use);
5741       if (old_cp->cand != cand)
5742         continue;
5743
5744       best_cost = iv_ca_cost (ivs);
5745       /* Start narrowing with START.  */
5746       new_cp = get_use_iv_cost (data, use, start);
5747
5748       if (data->consider_all_candidates)
5749         {
5750           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5751             {
5752               if (ci == cand->id || (start && ci == start->id))
5753                 continue;
5754
5755               cnd = iv_cand (data, ci);
5756
5757               cp = get_use_iv_cost (data, use, cnd);
5758               if (!cp)
5759                 continue;
5760
5761               iv_ca_set_cp (data, ivs, use, cp);
5762               acost = iv_ca_cost (ivs);
5763
5764               if (compare_costs (acost, best_cost) < 0)
5765                 {
5766                   best_cost = acost;
5767                   new_cp = cp;
5768                 }
5769             }
5770         }
5771       else
5772         {
5773           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5774             {
5775               if (ci == cand->id || (start && ci == start->id))
5776                 continue;
5777
5778               cnd = iv_cand (data, ci);
5779
5780               cp = get_use_iv_cost (data, use, cnd);
5781               if (!cp)
5782                 continue;
5783
5784               iv_ca_set_cp (data, ivs, use, cp);
5785               acost = iv_ca_cost (ivs);
5786
5787               if (compare_costs (acost, best_cost) < 0)
5788                 {
5789                   best_cost = acost;
5790                   new_cp = cp;
5791                 }
5792             }
5793         }
5794       /* Restore to old cp for use.  */
5795       iv_ca_set_cp (data, ivs, use, old_cp);
5796
5797       if (!new_cp)
5798         {
5799           iv_ca_delta_free (delta);
5800           return infinite_cost;
5801         }
5802
5803       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5804     }
5805
5806   iv_ca_delta_commit (data, ivs, *delta, true);
5807   cost = iv_ca_cost (ivs);
5808   iv_ca_delta_commit (data, ivs, *delta, false);
5809
5810   return cost;
5811 }
5812
5813 /* Try optimizing the set of candidates IVS by removing candidates different
5814    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5815    differences in DELTA.  */
5816
5817 static comp_cost
5818 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5819              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5820 {
5821   bitmap_iterator bi;
5822   struct iv_ca_delta *act_delta, *best_delta;
5823   unsigned i;
5824   comp_cost best_cost, acost;
5825   struct iv_cand *cand;
5826
5827   best_delta = NULL;
5828   best_cost = iv_ca_cost (ivs);
5829
5830   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5831     {
5832       cand = iv_cand (data, i);
5833
5834       if (cand == except_cand)
5835         continue;
5836
5837       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5838
5839       if (compare_costs (acost, best_cost) < 0)
5840         {
5841           best_cost = acost;
5842           iv_ca_delta_free (&best_delta);
5843           best_delta = act_delta;
5844         }
5845       else
5846         iv_ca_delta_free (&act_delta);
5847     }
5848
5849   if (!best_delta)
5850     {
5851       *delta = NULL;
5852       return best_cost;
5853     }
5854
5855   /* Recurse to possibly remove other unnecessary ivs.  */
5856   iv_ca_delta_commit (data, ivs, best_delta, true);
5857   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5858   iv_ca_delta_commit (data, ivs, best_delta, false);
5859   *delta = iv_ca_delta_join (best_delta, *delta);
5860   return best_cost;
5861 }
5862
5863 /* Tries to extend the sets IVS in the best possible way in order
5864    to express the USE.  If ORIGINALP is true, prefer candidates from
5865    the original set of IVs, otherwise favor important candidates not
5866    based on any memory object.  */
5867
5868 static bool
5869 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5870                   struct iv_use *use, bool originalp)
5871 {
5872   comp_cost best_cost, act_cost;
5873   unsigned i;
5874   bitmap_iterator bi;
5875   struct iv_cand *cand;
5876   struct iv_ca_delta *best_delta = NULL, *act_delta;
5877   struct cost_pair *cp;
5878
5879   iv_ca_add_use (data, ivs, use);
5880   best_cost = iv_ca_cost (ivs);
5881   cp = iv_ca_cand_for_use (ivs, use);
5882   if (cp)
5883     {
5884       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5885       iv_ca_set_no_cp (data, ivs, use);
5886     }
5887
5888   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5889      first try important candidates not based on any memory object.  Only if
5890      this fails, try the specific ones.  Rationale -- in loops with many
5891      variables the best choice often is to use just one generic biv.  If we
5892      added here many ivs specific to the uses, the optimization algorithm later
5893      would be likely to get stuck in a local minimum, thus causing us to create
5894      too many ivs.  The approach from few ivs to more seems more likely to be
5895      successful -- starting from few ivs, replacing an expensive use by a
5896      specific iv should always be a win.  */
5897   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5898     {
5899       cand = iv_cand (data, i);
5900
5901       if (originalp && cand->pos !=IP_ORIGINAL)
5902         continue;
5903
5904       if (!originalp && cand->iv->base_object != NULL_TREE)
5905         continue;
5906
5907       if (iv_ca_cand_used_p (ivs, cand))
5908         continue;
5909
5910       cp = get_use_iv_cost (data, use, cand);
5911       if (!cp)
5912         continue;
5913
5914       iv_ca_set_cp (data, ivs, use, cp);
5915       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5916                                true);
5917       iv_ca_set_no_cp (data, ivs, use);
5918       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5919
5920       if (compare_costs (act_cost, best_cost) < 0)
5921         {
5922           best_cost = act_cost;
5923
5924           iv_ca_delta_free (&best_delta);
5925           best_delta = act_delta;
5926         }
5927       else
5928         iv_ca_delta_free (&act_delta);
5929     }
5930
5931   if (infinite_cost_p (best_cost))
5932     {
5933       for (i = 0; i < use->n_map_members; i++)
5934         {
5935           cp = use->cost_map + i;
5936           cand = cp->cand;
5937           if (!cand)
5938             continue;
5939
5940           /* Already tried this.  */
5941           if (cand->important)
5942             {
5943               if (originalp && cand->pos == IP_ORIGINAL)
5944                 continue;
5945               if (!originalp && cand->iv->base_object == NULL_TREE)
5946                 continue;
5947             }
5948
5949           if (iv_ca_cand_used_p (ivs, cand))
5950             continue;
5951
5952           act_delta = NULL;
5953           iv_ca_set_cp (data, ivs, use, cp);
5954           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5955           iv_ca_set_no_cp (data, ivs, use);
5956           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5957                                        cp, act_delta);
5958
5959           if (compare_costs (act_cost, best_cost) < 0)
5960             {
5961               best_cost = act_cost;
5962
5963               if (best_delta)
5964                 iv_ca_delta_free (&best_delta);
5965               best_delta = act_delta;
5966             }
5967           else
5968             iv_ca_delta_free (&act_delta);
5969         }
5970     }
5971
5972   iv_ca_delta_commit (data, ivs, best_delta, true);
5973   iv_ca_delta_free (&best_delta);
5974
5975   return !infinite_cost_p (best_cost);
5976 }
5977
5978 /* Finds an initial assignment of candidates to uses.  */
5979
5980 static struct iv_ca *
5981 get_initial_solution (struct ivopts_data *data, bool originalp)
5982 {
5983   struct iv_ca *ivs = iv_ca_new (data);
5984   unsigned i;
5985
5986   for (i = 0; i < n_iv_uses (data); i++)
5987     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5988       {
5989         iv_ca_free (&ivs);
5990         return NULL;
5991       }
5992
5993   return ivs;
5994 }
5995
5996 /* Tries to improve set of induction variables IVS.  */
5997
5998 static bool
5999 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6000 {
6001   unsigned i, n_ivs;
6002   comp_cost acost, best_cost = iv_ca_cost (ivs);
6003   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6004   struct iv_cand *cand;
6005
6006   /* Try extending the set of induction variables by one.  */
6007   for (i = 0; i < n_iv_cands (data); i++)
6008     {
6009       cand = iv_cand (data, i);
6010
6011       if (iv_ca_cand_used_p (ivs, cand))
6012         continue;
6013
6014       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6015       if (!act_delta)
6016         continue;
6017
6018       /* If we successfully added the candidate and the set is small enough,
6019          try optimizing it by removing other candidates.  */
6020       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6021         {
6022           iv_ca_delta_commit (data, ivs, act_delta, true);
6023           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6024           iv_ca_delta_commit (data, ivs, act_delta, false);
6025           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6026         }
6027
6028       if (compare_costs (acost, best_cost) < 0)
6029         {
6030           best_cost = acost;
6031           iv_ca_delta_free (&best_delta);
6032           best_delta = act_delta;
6033         }
6034       else
6035         iv_ca_delta_free (&act_delta);
6036     }
6037
6038   if (!best_delta)
6039     {
6040       /* Try removing the candidates from the set instead.  */
6041       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6042
6043       /* Nothing more we can do.  */
6044       if (!best_delta)
6045         return false;
6046     }
6047
6048   iv_ca_delta_commit (data, ivs, best_delta, true);
6049   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6050   iv_ca_delta_free (&best_delta);
6051   return true;
6052 }
6053
6054 /* Attempts to find the optimal set of induction variables.  We do simple
6055    greedy heuristic -- we try to replace at most one candidate in the selected
6056    solution and remove the unused ivs while this improves the cost.  */
6057
6058 static struct iv_ca *
6059 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6060 {
6061   struct iv_ca *set;
6062
6063   /* Get the initial solution.  */
6064   set = get_initial_solution (data, originalp);
6065   if (!set)
6066     {
6067       if (dump_file && (dump_flags & TDF_DETAILS))
6068         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6069       return NULL;
6070     }
6071
6072   if (dump_file && (dump_flags & TDF_DETAILS))
6073     {
6074       fprintf (dump_file, "Initial set of candidates:\n");
6075       iv_ca_dump (data, dump_file, set);
6076     }
6077
6078   while (try_improve_iv_set (data, set))
6079     {
6080       if (dump_file && (dump_flags & TDF_DETAILS))
6081         {
6082           fprintf (dump_file, "Improved to:\n");
6083           iv_ca_dump (data, dump_file, set);
6084         }
6085     }
6086
6087   return set;
6088 }
6089
6090 static struct iv_ca *
6091 find_optimal_iv_set (struct ivopts_data *data)
6092 {
6093   unsigned i;
6094   struct iv_ca *set, *origset;
6095   struct iv_use *use;
6096   comp_cost cost, origcost;
6097
6098   /* Determine the cost based on a strategy that starts with original IVs,
6099      and try again using a strategy that prefers candidates not based
6100      on any IVs.  */
6101   origset = find_optimal_iv_set_1 (data, true);
6102   set = find_optimal_iv_set_1 (data, false);
6103
6104   if (!origset && !set)
6105     return NULL;
6106
6107   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6108   cost = set ? iv_ca_cost (set) : infinite_cost;
6109
6110   if (dump_file && (dump_flags & TDF_DETAILS))
6111     {
6112       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6113                origcost.cost, origcost.complexity);
6114       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6115                cost.cost, cost.complexity);
6116     }
6117
6118   /* Choose the one with the best cost.  */
6119   if (compare_costs (origcost, cost) <= 0)
6120     {
6121       if (set)
6122         iv_ca_free (&set);
6123       set = origset;
6124     }
6125   else if (origset)
6126     iv_ca_free (&origset);
6127
6128   for (i = 0; i < n_iv_uses (data); i++)
6129     {
6130       use = iv_use (data, i);
6131       use->selected = iv_ca_cand_for_use (set, use)->cand;
6132     }
6133
6134   return set;
6135 }
6136
6137 /* Creates a new induction variable corresponding to CAND.  */
6138
6139 static void
6140 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6141 {
6142   gimple_stmt_iterator incr_pos;
6143   tree base;
6144   bool after = false;
6145
6146   if (!cand->iv)
6147     return;
6148
6149   switch (cand->pos)
6150     {
6151     case IP_NORMAL:
6152       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6153       break;
6154
6155     case IP_END:
6156       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6157       after = true;
6158       break;
6159
6160     case IP_AFTER_USE:
6161       after = true;
6162       /* fall through */
6163     case IP_BEFORE_USE:
6164       incr_pos = gsi_for_stmt (cand->incremented_at);
6165       break;
6166
6167     case IP_ORIGINAL:
6168       /* Mark that the iv is preserved.  */
6169       name_info (data, cand->var_before)->preserve_biv = true;
6170       name_info (data, cand->var_after)->preserve_biv = true;
6171
6172       /* Rewrite the increment so that it uses var_before directly.  */
6173       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6174       return;
6175     }
6176
6177   gimple_add_tmp_var (cand->var_before);
6178
6179   base = unshare_expr (cand->iv->base);
6180
6181   create_iv (base, unshare_expr (cand->iv->step),
6182              cand->var_before, data->current_loop,
6183              &incr_pos, after, &cand->var_before, &cand->var_after);
6184 }
6185
6186 /* Creates new induction variables described in SET.  */
6187
6188 static void
6189 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6190 {
6191   unsigned i;
6192   struct iv_cand *cand;
6193   bitmap_iterator bi;
6194
6195   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6196     {
6197       cand = iv_cand (data, i);
6198       create_new_iv (data, cand);
6199     }
6200
6201   if (dump_file && (dump_flags & TDF_DETAILS))
6202     {
6203       fprintf (dump_file, "\nSelected IV set: \n");
6204       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6205         {
6206           cand = iv_cand (data, i);
6207           dump_cand (dump_file, cand);
6208         }
6209       fprintf (dump_file, "\n");
6210     }
6211 }
6212
6213 /* Rewrites USE (definition of iv used in a nonlinear expression)
6214    using candidate CAND.  */
6215
6216 static void
6217 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6218                             struct iv_use *use, struct iv_cand *cand)
6219 {
6220   tree comp;
6221   tree op, tgt;
6222   gimple ass;
6223   gimple_stmt_iterator bsi;
6224
6225   /* An important special case -- if we are asked to express value of
6226      the original iv by itself, just exit; there is no need to
6227      introduce a new computation (that might also need casting the
6228      variable to unsigned and back).  */
6229   if (cand->pos == IP_ORIGINAL
6230       && cand->incremented_at == use->stmt)
6231     {
6232       enum tree_code stmt_code;
6233
6234       gcc_assert (is_gimple_assign (use->stmt));
6235       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6236
6237       /* Check whether we may leave the computation unchanged.
6238          This is the case only if it does not rely on other
6239          computations in the loop -- otherwise, the computation
6240          we rely upon may be removed in remove_unused_ivs,
6241          thus leading to ICE.  */
6242       stmt_code = gimple_assign_rhs_code (use->stmt);
6243       if (stmt_code == PLUS_EXPR
6244           || stmt_code == MINUS_EXPR
6245           || stmt_code == POINTER_PLUS_EXPR)
6246         {
6247           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6248             op = gimple_assign_rhs2 (use->stmt);
6249           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6250             op = gimple_assign_rhs1 (use->stmt);
6251           else
6252             op = NULL_TREE;
6253         }
6254       else
6255         op = NULL_TREE;
6256
6257       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6258         return;
6259     }
6260
6261   comp = get_computation (data->current_loop, use, cand);
6262   gcc_assert (comp != NULL_TREE);
6263
6264   switch (gimple_code (use->stmt))
6265     {
6266     case GIMPLE_PHI:
6267       tgt = PHI_RESULT (use->stmt);
6268
6269       /* If we should keep the biv, do not replace it.  */
6270       if (name_info (data, tgt)->preserve_biv)
6271         return;
6272
6273       bsi = gsi_after_labels (gimple_bb (use->stmt));
6274       break;
6275
6276     case GIMPLE_ASSIGN:
6277       tgt = gimple_assign_lhs (use->stmt);
6278       bsi = gsi_for_stmt (use->stmt);
6279       break;
6280
6281     default:
6282       gcc_unreachable ();
6283     }
6284
6285   if (!valid_gimple_rhs_p (comp)
6286       || (gimple_code (use->stmt) != GIMPLE_PHI
6287           /* We can't allow re-allocating the stmt as it might be pointed
6288              to still.  */
6289           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6290               >= gimple_num_ops (gsi_stmt (bsi)))))
6291     {
6292       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6293                                        true, GSI_SAME_STMT);
6294       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6295         {
6296           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6297           /* As this isn't a plain copy we have to reset alignment
6298              information.  */
6299           if (SSA_NAME_PTR_INFO (comp))
6300             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6301         }
6302     }
6303
6304   if (gimple_code (use->stmt) == GIMPLE_PHI)
6305     {
6306       ass = gimple_build_assign (tgt, comp);
6307       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6308
6309       bsi = gsi_for_stmt (use->stmt);
6310       remove_phi_node (&bsi, false);
6311     }
6312   else
6313     {
6314       gimple_assign_set_rhs_from_tree (&bsi, comp);
6315       use->stmt = gsi_stmt (bsi);
6316     }
6317 }
6318
6319 /* Performs a peephole optimization to reorder the iv update statement with
6320    a mem ref to enable instruction combining in later phases. The mem ref uses
6321    the iv value before the update, so the reordering transformation requires
6322    adjustment of the offset. CAND is the selected IV_CAND.
6323
6324    Example:
6325
6326    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6327    iv2 = iv1 + 1;
6328
6329    if (t < val)      (1)
6330      goto L;
6331    goto Head;
6332
6333
6334    directly propagating t over to (1) will introduce overlapping live range
6335    thus increase register pressure. This peephole transform it into:
6336
6337
6338    iv2 = iv1 + 1;
6339    t = MEM_REF (base, iv2, 8, 8);
6340    if (t < val)
6341      goto L;
6342    goto Head;
6343 */
6344
6345 static void
6346 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6347 {
6348   tree var_after;
6349   gimple iv_update, stmt;
6350   basic_block bb;
6351   gimple_stmt_iterator gsi, gsi_iv;
6352
6353   if (cand->pos != IP_NORMAL)
6354     return;
6355
6356   var_after = cand->var_after;
6357   iv_update = SSA_NAME_DEF_STMT (var_after);
6358
6359   bb = gimple_bb (iv_update);
6360   gsi = gsi_last_nondebug_bb (bb);
6361   stmt = gsi_stmt (gsi);
6362
6363   /* Only handle conditional statement for now.  */
6364   if (gimple_code (stmt) != GIMPLE_COND)
6365     return;
6366
6367   gsi_prev_nondebug (&gsi);
6368   stmt = gsi_stmt (gsi);
6369   if (stmt != iv_update)
6370     return;
6371
6372   gsi_prev_nondebug (&gsi);
6373   if (gsi_end_p (gsi))
6374     return;
6375
6376   stmt = gsi_stmt (gsi);
6377   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6378     return;
6379
6380   if (stmt != use->stmt)
6381     return;
6382
6383   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6384     return;
6385
6386   if (dump_file && (dump_flags & TDF_DETAILS))
6387     {
6388       fprintf (dump_file, "Reordering \n");
6389       print_gimple_stmt (dump_file, iv_update, 0, 0);
6390       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6391       fprintf (dump_file, "\n");
6392     }
6393
6394   gsi = gsi_for_stmt (use->stmt);
6395   gsi_iv = gsi_for_stmt (iv_update);
6396   gsi_move_before (&gsi_iv, &gsi);
6397
6398   cand->pos = IP_BEFORE_USE;
6399   cand->incremented_at = use->stmt;
6400 }
6401
6402 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6403
6404 static void
6405 rewrite_use_address (struct ivopts_data *data,
6406                      struct iv_use *use, struct iv_cand *cand)
6407 {
6408   aff_tree aff;
6409   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6410   tree base_hint = NULL_TREE;
6411   tree ref, iv;
6412   bool ok;
6413
6414   adjust_iv_update_pos (cand, use);
6415   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6416   gcc_assert (ok);
6417   unshare_aff_combination (&aff);
6418
6419   /* To avoid undefined overflow problems, all IV candidates use unsigned
6420      integer types.  The drawback is that this makes it impossible for
6421      create_mem_ref to distinguish an IV that is based on a memory object
6422      from one that represents simply an offset.
6423
6424      To work around this problem, we pass a hint to create_mem_ref that
6425      indicates which variable (if any) in aff is an IV based on a memory
6426      object.  Note that we only consider the candidate.  If this is not
6427      based on an object, the base of the reference is in some subexpression
6428      of the use -- but these will use pointer types, so they are recognized
6429      by the create_mem_ref heuristics anyway.  */
6430   if (cand->iv->base_object)
6431     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6432
6433   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6434   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6435                         reference_alias_ptr_type (*use->op_p),
6436                         iv, base_hint, data->speed);
6437   copy_ref_info (ref, *use->op_p);
6438   *use->op_p = ref;
6439 }
6440
6441 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6442    candidate CAND.  */
6443
6444 static void
6445 rewrite_use_compare (struct ivopts_data *data,
6446                      struct iv_use *use, struct iv_cand *cand)
6447 {
6448   tree comp, *var_p, op, bound;
6449   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6450   enum tree_code compare;
6451   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6452   bool ok;
6453
6454   bound = cp->value;
6455   if (bound)
6456     {
6457       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6458       tree var_type = TREE_TYPE (var);
6459       gimple_seq stmts;
6460
6461       if (dump_file && (dump_flags & TDF_DETAILS))
6462         {
6463           fprintf (dump_file, "Replacing exit test: ");
6464           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6465         }
6466       compare = cp->comp;
6467       bound = unshare_expr (fold_convert (var_type, bound));
6468       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6469       if (stmts)
6470         gsi_insert_seq_on_edge_immediate (
6471                 loop_preheader_edge (data->current_loop),
6472                 stmts);
6473
6474       gimple_cond_set_lhs (use->stmt, var);
6475       gimple_cond_set_code (use->stmt, compare);
6476       gimple_cond_set_rhs (use->stmt, op);
6477       return;
6478     }
6479
6480   /* The induction variable elimination failed; just express the original
6481      giv.  */
6482   comp = get_computation (data->current_loop, use, cand);
6483   gcc_assert (comp != NULL_TREE);
6484
6485   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6486   gcc_assert (ok);
6487
6488   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6489                                      true, GSI_SAME_STMT);
6490 }
6491
6492 /* Rewrites USE using candidate CAND.  */
6493
6494 static void
6495 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6496 {
6497   switch (use->type)
6498     {
6499       case USE_NONLINEAR_EXPR:
6500         rewrite_use_nonlinear_expr (data, use, cand);
6501         break;
6502
6503       case USE_ADDRESS:
6504         rewrite_use_address (data, use, cand);
6505         break;
6506
6507       case USE_COMPARE:
6508         rewrite_use_compare (data, use, cand);
6509         break;
6510
6511       default:
6512         gcc_unreachable ();
6513     }
6514
6515   update_stmt (use->stmt);
6516 }
6517
6518 /* Rewrite the uses using the selected induction variables.  */
6519
6520 static void
6521 rewrite_uses (struct ivopts_data *data)
6522 {
6523   unsigned i;
6524   struct iv_cand *cand;
6525   struct iv_use *use;
6526
6527   for (i = 0; i < n_iv_uses (data); i++)
6528     {
6529       use = iv_use (data, i);
6530       cand = use->selected;
6531       gcc_assert (cand);
6532
6533       rewrite_use (data, use, cand);
6534     }
6535 }
6536
6537 /* Removes the ivs that are not used after rewriting.  */
6538
6539 static void
6540 remove_unused_ivs (struct ivopts_data *data)
6541 {
6542   unsigned j;
6543   bitmap_iterator bi;
6544   bitmap toremove = BITMAP_ALLOC (NULL);
6545
6546   /* Figure out an order in which to release SSA DEFs so that we don't
6547      release something that we'd have to propagate into a debug stmt
6548      afterwards.  */
6549   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6550     {
6551       struct version_info *info;
6552
6553       info = ver_info (data, j);
6554       if (info->iv
6555           && !integer_zerop (info->iv->step)
6556           && !info->inv_id
6557           && !info->iv->have_use_for
6558           && !info->preserve_biv)
6559         {
6560           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6561
6562           tree def = info->iv->ssa_name;
6563
6564           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6565             {
6566               imm_use_iterator imm_iter;
6567               use_operand_p use_p;
6568               gimple stmt;
6569               int count = 0;
6570
6571               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6572                 {
6573                   if (!gimple_debug_bind_p (stmt))
6574                     continue;
6575
6576                   /* We just want to determine whether to do nothing
6577                      (count == 0), to substitute the computed
6578                      expression into a single use of the SSA DEF by
6579                      itself (count == 1), or to use a debug temp
6580                      because the SSA DEF is used multiple times or as
6581                      part of a larger expression (count > 1). */
6582                   count++;
6583                   if (gimple_debug_bind_get_value (stmt) != def)
6584                     count++;
6585
6586                   if (count > 1)
6587                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6588                 }
6589
6590               if (!count)
6591                 continue;
6592
6593               struct iv_use dummy_use;
6594               struct iv_cand *best_cand = NULL, *cand;
6595               unsigned i, best_pref = 0, cand_pref;
6596
6597               memset (&dummy_use, 0, sizeof (dummy_use));
6598               dummy_use.iv = info->iv;
6599               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6600                 {
6601                   cand = iv_use (data, i)->selected;
6602                   if (cand == best_cand)
6603                     continue;
6604                   cand_pref = operand_equal_p (cand->iv->step,
6605                                                info->iv->step, 0)
6606                     ? 4 : 0;
6607                   cand_pref
6608                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6609                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6610                     ? 2 : 0;
6611                   cand_pref
6612                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6613                     ? 1 : 0;
6614                   if (best_cand == NULL || best_pref < cand_pref)
6615                     {
6616                       best_cand = cand;
6617                       best_pref = cand_pref;
6618                     }
6619                 }
6620
6621               if (!best_cand)
6622                 continue;
6623
6624               tree comp = get_computation_at (data->current_loop,
6625                                               &dummy_use, best_cand,
6626                                               SSA_NAME_DEF_STMT (def));
6627               if (!comp)
6628                 continue;
6629
6630               if (count > 1)
6631                 {
6632                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6633                   DECL_ARTIFICIAL (vexpr) = 1;
6634                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6635                   if (SSA_NAME_VAR (def))
6636                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6637                   else
6638                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6639                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6640                   gimple_stmt_iterator gsi;
6641
6642                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6643                     gsi = gsi_after_labels (gimple_bb
6644                                             (SSA_NAME_DEF_STMT (def)));
6645                   else
6646                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6647
6648                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6649                   comp = vexpr;
6650                 }
6651
6652               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6653                 {
6654                   if (!gimple_debug_bind_p (stmt))
6655                     continue;
6656
6657                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6658                     SET_USE (use_p, comp);
6659
6660                   update_stmt (stmt);
6661                 }
6662             }
6663         }
6664     }
6665
6666   release_defs_bitset (toremove);
6667
6668   BITMAP_FREE (toremove);
6669 }
6670
6671 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6672    for hash_map::traverse.  */
6673
6674 bool
6675 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6676 {
6677   free (value);
6678   return true;
6679 }
6680
6681 /* Frees data allocated by the optimization of a single loop.  */
6682
6683 static void
6684 free_loop_data (struct ivopts_data *data)
6685 {
6686   unsigned i, j;
6687   bitmap_iterator bi;
6688   tree obj;
6689
6690   if (data->niters)
6691     {
6692       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6693       delete data->niters;
6694       data->niters = NULL;
6695     }
6696
6697   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6698     {
6699       struct version_info *info;
6700
6701       info = ver_info (data, i);
6702       free (info->iv);
6703       info->iv = NULL;
6704       info->has_nonlin_use = false;
6705       info->preserve_biv = false;
6706       info->inv_id = 0;
6707     }
6708   bitmap_clear (data->relevant);
6709   bitmap_clear (data->important_candidates);
6710
6711   for (i = 0; i < n_iv_uses (data); i++)
6712     {
6713       struct iv_use *use = iv_use (data, i);
6714
6715       free (use->iv);
6716       BITMAP_FREE (use->related_cands);
6717       for (j = 0; j < use->n_map_members; j++)
6718         if (use->cost_map[j].depends_on)
6719           BITMAP_FREE (use->cost_map[j].depends_on);
6720       free (use->cost_map);
6721       free (use);
6722     }
6723   data->iv_uses.truncate (0);
6724
6725   for (i = 0; i < n_iv_cands (data); i++)
6726     {
6727       struct iv_cand *cand = iv_cand (data, i);
6728
6729       free (cand->iv);
6730       if (cand->depends_on)
6731         BITMAP_FREE (cand->depends_on);
6732       free (cand);
6733     }
6734   data->iv_candidates.truncate (0);
6735
6736   if (data->version_info_size < num_ssa_names)
6737     {
6738       data->version_info_size = 2 * num_ssa_names;
6739       free (data->version_info);
6740       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6741     }
6742
6743   data->max_inv_id = 0;
6744
6745   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6746     SET_DECL_RTL (obj, NULL_RTX);
6747
6748   decl_rtl_to_reset.truncate (0);
6749
6750   data->inv_expr_tab->empty ();
6751   data->inv_expr_id = 0;
6752 }
6753
6754 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6755    loop tree.  */
6756
6757 static void
6758 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6759 {
6760   free_loop_data (data);
6761   free (data->version_info);
6762   BITMAP_FREE (data->relevant);
6763   BITMAP_FREE (data->important_candidates);
6764
6765   decl_rtl_to_reset.release ();
6766   data->iv_uses.release ();
6767   data->iv_candidates.release ();
6768   delete data->inv_expr_tab;
6769   data->inv_expr_tab = NULL;
6770   free_affine_expand_cache (&data->name_expansion_cache);
6771 }
6772
6773 /* Returns true if the loop body BODY includes any function calls.  */
6774
6775 static bool
6776 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6777 {
6778   gimple_stmt_iterator gsi;
6779   unsigned i;
6780
6781   for (i = 0; i < num_nodes; i++)
6782     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6783       {
6784         gimple stmt = gsi_stmt (gsi);
6785         if (is_gimple_call (stmt)
6786             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6787           return true;
6788       }
6789   return false;
6790 }
6791
6792 /* Optimizes the LOOP.  Returns true if anything changed.  */
6793
6794 static bool
6795 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6796 {
6797   bool changed = false;
6798   struct iv_ca *iv_ca;
6799   edge exit = single_dom_exit (loop);
6800   basic_block *body;
6801
6802   gcc_assert (!data->niters);
6803   data->current_loop = loop;
6804   data->speed = optimize_loop_for_speed_p (loop);
6805
6806   if (dump_file && (dump_flags & TDF_DETAILS))
6807     {
6808       fprintf (dump_file, "Processing loop %d\n", loop->num);
6809
6810       if (exit)
6811         {
6812           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6813                    exit->src->index, exit->dest->index);
6814           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6815           fprintf (dump_file, "\n");
6816         }
6817
6818       fprintf (dump_file, "\n");
6819     }
6820
6821   body = get_loop_body (loop);
6822   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6823   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6824   free (body);
6825
6826   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6827
6828   /* For each ssa name determines whether it behaves as an induction variable
6829      in some loop.  */
6830   if (!find_induction_variables (data))
6831     goto finish;
6832
6833   /* Finds interesting uses (item 1).  */
6834   find_interesting_uses (data);
6835   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6836     goto finish;
6837
6838   /* Finds candidates for the induction variables (item 2).  */
6839   find_iv_candidates (data);
6840
6841   /* Calculates the costs (item 3, part 1).  */
6842   determine_iv_costs (data);
6843   determine_use_iv_costs (data);
6844   determine_set_costs (data);
6845
6846   /* Find the optimal set of induction variables (item 3, part 2).  */
6847   iv_ca = find_optimal_iv_set (data);
6848   if (!iv_ca)
6849     goto finish;
6850   changed = true;
6851
6852   /* Create the new induction variables (item 4, part 1).  */
6853   create_new_ivs (data, iv_ca);
6854   iv_ca_free (&iv_ca);
6855
6856   /* Rewrite the uses (item 4, part 2).  */
6857   rewrite_uses (data);
6858
6859   /* Remove the ivs that are unused after rewriting.  */
6860   remove_unused_ivs (data);
6861
6862   /* We have changed the structure of induction variables; it might happen
6863      that definitions in the scev database refer to some of them that were
6864      eliminated.  */
6865   scev_reset ();
6866
6867 finish:
6868   free_loop_data (data);
6869
6870   return changed;
6871 }
6872
6873 /* Main entry point.  Optimizes induction variables in loops.  */
6874
6875 void
6876 tree_ssa_iv_optimize (void)
6877 {
6878   struct loop *loop;
6879   struct ivopts_data data;
6880
6881   tree_ssa_iv_optimize_init (&data);
6882
6883   /* Optimize the loops starting with the innermost ones.  */
6884   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6885     {
6886       if (dump_file && (dump_flags & TDF_DETAILS))
6887         flow_loop_dump (loop, dump_file, NULL, 1);
6888
6889       tree_ssa_iv_optimize_loop (&data, loop);
6890     }
6891
6892   tree_ssa_iv_optimize_finalize (&data);
6893 }