gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "alias.h"
  69 #include "symtab.h"
  70 #include "tree.h"
  71 #include "fold-const.h"
  72 #include "stor-layout.h"
  73 #include "tm_p.h"
  74 #include "predict.h"
  75 #include "hard-reg-set.h"
  76 #include "function.h"
  77 #include "dominance.h"
  78 #include "cfg.h"
  79 #include "basic-block.h"
  80 #include "gimple-pretty-print.h"
  81 #include "tree-ssa-alias.h"
  82 #include "internal-fn.h"
  83 #include "tree-eh.h"
  84 #include "gimple-expr.h"
  85 #include "gimple.h"
  86 #include "gimplify.h"
  87 #include "gimple-iterator.h"
  88 #include "gimplify-me.h"
  89 #include "gimple-ssa.h"
  90 #include "cgraph.h"
  91 #include "tree-cfg.h"
  92 #include "tree-phinodes.h"
  93 #include "ssa-iterators.h"
  94 #include "stringpool.h"
  95 #include "tree-ssanames.h"
  96 #include "tree-ssa-loop-ivopts.h"
  97 #include "tree-ssa-loop-manip.h"
  98 #include "tree-ssa-loop-niter.h"
  99 #include "tree-ssa-loop.h"
 100 #include "rtl.h"
 101 #include "flags.h"
 102 #include "insn-config.h"
 103 #include "expmed.h"
 104 #include "dojump.h"
 105 #include "explow.h"
 106 #include "calls.h"
 107 #include "emit-rtl.h"
 108 #include "varasm.h"
 109 #include "stmt.h"
 110 #include "expr.h"
 111 #include "tree-dfa.h"
 112 #include "tree-ssa.h"
 113 #include "cfgloop.h"
 114 #include "tree-pass.h"
 115 #include "tree-chrec.h"
 116 #include "tree-scalar-evolution.h"
 117 #include "params.h"
 118 #include "langhooks.h"
 119 #include "tree-affine.h"
 120 #include "target.h"
 121 #include "tree-inline.h"
 122 #include "tree-ssa-propagate.h"
 123 #include "tree-ssa-address.h"
 124 #include "builtins.h"
 125 #include "tree-vectorizer.h"
 126
 127 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 128    cost of different addressing modes.  This should be moved to a TBD
 129    interface between the GIMPLE and RTL worlds.  */
 130 #include "recog.h"
 131
 132 /* The infinite cost.  */
 133 #define INFTY 10000000
 134
 135 #define AVG_LOOP_NITER(LOOP) 5
 136
 137 /* Returns the expected number of loop iterations for LOOP.
 138    The average trip count is computed from profile data if it
 139    exists. */
 140
 141 static inline HOST_WIDE_INT
 142 avg_loop_niter (struct loop *loop)
 143 {
 144   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 145   if (niter == -1)
 146     return AVG_LOOP_NITER (loop);
 147
 148   return niter;
 149 }
 150
 151 /* Representation of the induction variable.  */
 152 struct iv
 153 {
 154   tree base;            /* Initial value of the iv.  */
 155   tree base_object;     /* A memory object to that the induction variable points.  */
 156   tree step;            /* Step of the iv (constant only).  */
 157   tree ssa_name;        /* The ssa name with the value.  */
 158   unsigned use_id;      /* The identifier in the use if it is the case.  */
 159   bool biv_p;           /* Is it a biv?  */
 160   bool have_use_for;    /* Do we already have a use for it?  */
 161   bool no_overflow;     /* True if the iv doesn't overflow.  */
 162 };
 163
 164 /* Per-ssa version information (induction variable descriptions, etc.).  */
 165 struct version_info
 166 {
 167   tree name;            /* The ssa name.  */
 168   struct iv *iv;        /* Induction variable description.  */
 169   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 170                            an expression that is not an induction variable.  */
 171   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 172   unsigned inv_id;      /* Id of an invariant.  */
 173 };
 174
 175 /* Types of uses.  */
 176 enum use_type
 177 {
 178   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 179   USE_ADDRESS,          /* Use in an address.  */
 180   USE_COMPARE           /* Use is a compare.  */
 181 };
 182
 183 /* Cost of a computation.  */
 184 typedef struct
 185 {
 186   int cost;             /* The runtime cost.  */
 187   unsigned complexity;  /* The estimate of the complexity of the code for
 188                            the computation (in no concrete units --
 189                            complexity field should be larger for more
 190                            complex expressions and addressing modes).  */
 191 } comp_cost;
 192
 193 static const comp_cost no_cost = {0, 0};
 194 static const comp_cost infinite_cost = {INFTY, INFTY};
 195
 196 /* The candidate - cost pair.  */
 197 struct cost_pair
 198 {
 199   struct iv_cand *cand; /* The candidate.  */
 200   comp_cost cost;       /* The cost.  */
 201   bitmap depends_on;    /* The list of invariants that have to be
 202                            preserved.  */
 203   tree value;           /* For final value elimination, the expression for
 204                            the final value of the iv.  For iv elimination,
 205                            the new bound to compare with.  */
 206   enum tree_code comp;  /* For iv elimination, the comparison.  */
 207   int inv_expr_id;      /* Loop invariant expression id.  */
 208 };
 209
 210 /* Use.  */
 211 struct iv_use
 212 {
 213   unsigned id;          /* The id of the use.  */
 214   unsigned sub_id;      /* The id of the sub use.  */
 215   enum use_type type;   /* Type of the use.  */
 216   struct iv *iv;        /* The induction variable it is based on.  */
 217   gimple stmt;          /* Statement in that it occurs.  */
 218   tree *op_p;           /* The place where it occurs.  */
 219   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 220                            important ones.  */
 221
 222   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 223   struct cost_pair *cost_map;
 224                         /* The costs wrto the iv candidates.  */
 225
 226   struct iv_cand *selected;
 227                         /* The selected candidate.  */
 228
 229   struct iv_use *next;  /* The next sub use.  */
 230   tree addr_base;       /* Base address with const offset stripped.  */
 231   unsigned HOST_WIDE_INT addr_offset;
 232                         /* Const offset stripped from base address.  */
 233 };
 234
 235 /* The position where the iv is computed.  */
 236 enum iv_position
 237 {
 238   IP_NORMAL,            /* At the end, just before the exit condition.  */
 239   IP_END,               /* At the end of the latch block.  */
 240   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 241   IP_AFTER_USE,         /* Immediately after a specific use.  */
 242   IP_ORIGINAL           /* The original biv.  */
 243 };
 244
 245 /* The induction variable candidate.  */
 246 struct iv_cand
 247 {
 248   unsigned id;          /* The number of the candidate.  */
 249   bool important;       /* Whether this is an "important" candidate, i.e. such
 250                            that it should be considered by all uses.  */
 251   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 252   gimple incremented_at;/* For original biv, the statement where it is
 253                            incremented.  */
 254   tree var_before;      /* The variable used for it before increment.  */
 255   tree var_after;       /* The variable used for it after increment.  */
 256   struct iv *iv;        /* The value of the candidate.  NULL for
 257                            "pseudocandidate" used to indicate the possibility
 258                            to replace the final value of an iv by direct
 259                            computation of the value.  */
 260   unsigned cost;        /* Cost of the candidate.  */
 261   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 262   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 263                               where it is incremented.  */
 264   bitmap depends_on;    /* The list of invariants that are used in step of the
 265                            biv.  */
 266 };
 267
 268 /* Loop invariant expression hashtable entry.  */
 269 struct iv_inv_expr_ent
 270 {
 271   tree expr;
 272   int id;
 273   hashval_t hash;
 274 };
 275
 276 /* The data used by the induction variable optimizations.  */
 277
 278 typedef struct iv_use *iv_use_p;
 279
 280 typedef struct iv_cand *iv_cand_p;
 281
 282 /* Hashtable helpers.  */
 283
 284 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 285 {
 286   static inline hashval_t hash (const iv_inv_expr_ent *);
 287   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 288 };
 289
 290 /* Hash function for loop invariant expressions.  */
 291
 292 inline hashval_t
 293 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 294 {
 295   return expr->hash;
 296 }
 297
 298 /* Hash table equality function for expressions.  */
 299
 300 inline bool
 301 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 302                            const iv_inv_expr_ent *expr2)
 303 {
 304   return expr1->hash == expr2->hash
 305          && operand_equal_p (expr1->expr, expr2->expr, 0);
 306 }
 307
 308 struct ivopts_data
 309 {
 310   /* The currently optimized loop.  */
 311   struct loop *current_loop;
 312   source_location loop_loc;
 313
 314   /* Numbers of iterations for all exits of the current loop.  */
 315   hash_map<edge, tree_niter_desc *> *niters;
 316
 317   /* Number of registers used in it.  */
 318   unsigned regs_used;
 319
 320   /* The size of version_info array allocated.  */
 321   unsigned version_info_size;
 322
 323   /* The array of information for the ssa names.  */
 324   struct version_info *version_info;
 325
 326   /* The hashtable of loop invariant expressions created
 327      by ivopt.  */
 328   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 329
 330   /* Loop invariant expression id.  */
 331   int inv_expr_id;
 332
 333   /* The bitmap of indices in version_info whose value was changed.  */
 334   bitmap relevant;
 335
 336   /* The uses of induction variables.  */
 337   vec<iv_use_p> iv_uses;
 338
 339   /* The candidates.  */
 340   vec<iv_cand_p> iv_candidates;
 341
 342   /* A bitmap of important candidates.  */
 343   bitmap important_candidates;
 344
 345   /* Cache used by tree_to_aff_combination_expand.  */
 346   hash_map<tree, name_expansion *> *name_expansion_cache;
 347
 348   /* The maximum invariant id.  */
 349   unsigned max_inv_id;
 350
 351   /* Obstack for iv structure.  */
 352   struct obstack iv_obstack;
 353
 354   /* Whether to consider just related and important candidates when replacing a
 355      use.  */
 356   bool consider_all_candidates;
 357
 358   /* Are we optimizing for speed?  */
 359   bool speed;
 360
 361   /* Whether the loop body includes any function calls.  */
 362   bool body_includes_call;
 363
 364   /* Whether the loop body can only be exited via single exit.  */
 365   bool loop_single_exit_p;
 366 };
 367
 368 /* An assignment of iv candidates to uses.  */
 369
 370 struct iv_ca
 371 {
 372   /* The number of uses covered by the assignment.  */
 373   unsigned upto;
 374
 375   /* Number of uses that cannot be expressed by the candidates in the set.  */
 376   unsigned bad_uses;
 377
 378   /* Candidate assigned to a use, together with the related costs.  */
 379   struct cost_pair **cand_for_use;
 380
 381   /* Number of times each candidate is used.  */
 382   unsigned *n_cand_uses;
 383
 384   /* The candidates used.  */
 385   bitmap cands;
 386
 387   /* The number of candidates in the set.  */
 388   unsigned n_cands;
 389
 390   /* Total number of registers needed.  */
 391   unsigned n_regs;
 392
 393   /* Total cost of expressing uses.  */
 394   comp_cost cand_use_cost;
 395
 396   /* Total cost of candidates.  */
 397   unsigned cand_cost;
 398
 399   /* Number of times each invariant is used.  */
 400   unsigned *n_invariant_uses;
 401
 402   /* The array holding the number of uses of each loop
 403      invariant expressions created by ivopt.  */
 404   unsigned *used_inv_expr;
 405
 406   /* The number of created loop invariants.  */
 407   unsigned num_used_inv_expr;
 408
 409   /* Total cost of the assignment.  */
 410   comp_cost cost;
 411 };
 412
 413 /* Difference of two iv candidate assignments.  */
 414
 415 struct iv_ca_delta
 416 {
 417   /* Changed use.  */
 418   struct iv_use *use;
 419
 420   /* An old assignment (for rollback purposes).  */
 421   struct cost_pair *old_cp;
 422
 423   /* A new assignment.  */
 424   struct cost_pair *new_cp;
 425
 426   /* Next change in the list.  */
 427   struct iv_ca_delta *next_change;
 428 };
 429
 430 /* Bound on number of candidates below that all candidates are considered.  */
 431
 432 #define CONSIDER_ALL_CANDIDATES_BOUND \
 433   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 434
 435 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 436    optimizing such a loop would help, and it would take ages).  */
 437
 438 #define MAX_CONSIDERED_USES \
 439   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 440
 441 /* If there are at most this number of ivs in the set, try removing unnecessary
 442    ivs from the set always.  */
 443
 444 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 445   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 446
 447 /* The list of trees for that the decl_rtl field must be reset is stored
 448    here.  */
 449
 450 static vec<tree> decl_rtl_to_reset;
 451
 452 static comp_cost force_expr_to_var_cost (tree, bool);
 453
 454 /* Number of uses recorded in DATA.  */
 455
 456 static inline unsigned
 457 n_iv_uses (struct ivopts_data *data)
 458 {
 459   return data->iv_uses.length ();
 460 }
 461
 462 /* Ith use recorded in DATA.  */
 463
 464 static inline struct iv_use *
 465 iv_use (struct ivopts_data *data, unsigned i)
 466 {
 467   return data->iv_uses[i];
 468 }
 469
 470 /* Number of candidates recorded in DATA.  */
 471
 472 static inline unsigned
 473 n_iv_cands (struct ivopts_data *data)
 474 {
 475   return data->iv_candidates.length ();
 476 }
 477
 478 /* Ith candidate recorded in DATA.  */
 479
 480 static inline struct iv_cand *
 481 iv_cand (struct ivopts_data *data, unsigned i)
 482 {
 483   return data->iv_candidates[i];
 484 }
 485
 486 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 487
 488 edge
 489 single_dom_exit (struct loop *loop)
 490 {
 491   edge exit = single_exit (loop);
 492
 493   if (!exit)
 494     return NULL;
 495
 496   if (!just_once_each_iteration_p (loop, exit->src))
 497     return NULL;
 498
 499   return exit;
 500 }
 501
 502 /* Dumps information about the induction variable IV to FILE.  */
 503
 504 void
 505 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 506 {
 507   if (iv->ssa_name && dump_name)
 508     {
 509       fprintf (file, "ssa name ");
 510       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 511       fprintf (file, "\n");
 512     }
 513
 514   fprintf (file, "  type ");
 515   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 516   fprintf (file, "\n");
 517
 518   if (iv->step)
 519     {
 520       fprintf (file, "  base ");
 521       print_generic_expr (file, iv->base, TDF_SLIM);
 522       fprintf (file, "\n");
 523
 524       fprintf (file, "  step ");
 525       print_generic_expr (file, iv->step, TDF_SLIM);
 526       fprintf (file, "\n");
 527     }
 528   else
 529     {
 530       fprintf (file, "  invariant ");
 531       print_generic_expr (file, iv->base, TDF_SLIM);
 532       fprintf (file, "\n");
 533     }
 534
 535   if (iv->base_object)
 536     {
 537       fprintf (file, "  base object ");
 538       print_generic_expr (file, iv->base_object, TDF_SLIM);
 539       fprintf (file, "\n");
 540     }
 541
 542   if (iv->biv_p)
 543     fprintf (file, "  is a biv\n");
 544 }
 545
 546 /* Dumps information about the USE to FILE.  */
 547
 548 void
 549 dump_use (FILE *file, struct iv_use *use)
 550 {
 551   fprintf (file, "use %d", use->id);
 552   if (use->sub_id)
 553     fprintf (file, ".%d", use->sub_id);
 554
 555   fprintf (file, "\n");
 556
 557   switch (use->type)
 558     {
 559     case USE_NONLINEAR_EXPR:
 560       fprintf (file, "  generic\n");
 561       break;
 562
 563     case USE_ADDRESS:
 564       fprintf (file, "  address\n");
 565       break;
 566
 567     case USE_COMPARE:
 568       fprintf (file, "  compare\n");
 569       break;
 570
 571     default:
 572       gcc_unreachable ();
 573     }
 574
 575   fprintf (file, "  in statement ");
 576   print_gimple_stmt (file, use->stmt, 0, 0);
 577   fprintf (file, "\n");
 578
 579   fprintf (file, "  at position ");
 580   if (use->op_p)
 581     print_generic_expr (file, *use->op_p, TDF_SLIM);
 582   fprintf (file, "\n");
 583
 584   dump_iv (file, use->iv, false);
 585
 586   if (use->related_cands)
 587     {
 588       fprintf (file, "  related candidates ");
 589       dump_bitmap (file, use->related_cands);
 590     }
 591 }
 592
 593 /* Dumps information about the uses to FILE.  */
 594
 595 void
 596 dump_uses (FILE *file, struct ivopts_data *data)
 597 {
 598   unsigned i;
 599   struct iv_use *use;
 600
 601   for (i = 0; i < n_iv_uses (data); i++)
 602     {
 603       use = iv_use (data, i);
 604       do
 605         {
 606           dump_use (file, use);
 607           use = use->next;
 608         }
 609       while (use);
 610       fprintf (file, "\n");
 611     }
 612 }
 613
 614 /* Dumps information about induction variable candidate CAND to FILE.  */
 615
 616 void
 617 dump_cand (FILE *file, struct iv_cand *cand)
 618 {
 619   struct iv *iv = cand->iv;
 620
 621   fprintf (file, "candidate %d%s\n",
 622            cand->id, cand->important ? " (important)" : "");
 623
 624   if (cand->depends_on)
 625     {
 626       fprintf (file, "  depends on ");
 627       dump_bitmap (file, cand->depends_on);
 628     }
 629
 630   if (!iv)
 631     {
 632       fprintf (file, "  final value replacement\n");
 633       return;
 634     }
 635
 636   if (cand->var_before)
 637     {
 638       fprintf (file, "  var_before ");
 639       print_generic_expr (file, cand->var_before, TDF_SLIM);
 640       fprintf (file, "\n");
 641     }
 642   if (cand->var_after)
 643     {
 644       fprintf (file, "  var_after ");
 645       print_generic_expr (file, cand->var_after, TDF_SLIM);
 646       fprintf (file, "\n");
 647     }
 648
 649   switch (cand->pos)
 650     {
 651     case IP_NORMAL:
 652       fprintf (file, "  incremented before exit test\n");
 653       break;
 654
 655     case IP_BEFORE_USE:
 656       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 657       break;
 658
 659     case IP_AFTER_USE:
 660       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 661       break;
 662
 663     case IP_END:
 664       fprintf (file, "  incremented at end\n");
 665       break;
 666
 667     case IP_ORIGINAL:
 668       fprintf (file, "  original biv\n");
 669       break;
 670     }
 671
 672   dump_iv (file, iv, false);
 673 }
 674
 675 /* Returns the info for ssa version VER.  */
 676
 677 static inline struct version_info *
 678 ver_info (struct ivopts_data *data, unsigned ver)
 679 {
 680   return data->version_info + ver;
 681 }
 682
 683 /* Returns the info for ssa name NAME.  */
 684
 685 static inline struct version_info *
 686 name_info (struct ivopts_data *data, tree name)
 687 {
 688   return ver_info (data, SSA_NAME_VERSION (name));
 689 }
 690
 691 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 692    emitted in LOOP.  */
 693
 694 static bool
 695 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 696 {
 697   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 698
 699   gcc_assert (bb);
 700
 701   if (sbb == loop->latch)
 702     return true;
 703
 704   if (sbb != bb)
 705     return false;
 706
 707   return stmt == last_stmt (bb);
 708 }
 709
 710 /* Returns true if STMT if after the place where the original induction
 711    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 712    if the positions are identical.  */
 713
 714 static bool
 715 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 716 {
 717   basic_block cand_bb = gimple_bb (cand->incremented_at);
 718   basic_block stmt_bb = gimple_bb (stmt);
 719
 720   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 721     return false;
 722
 723   if (stmt_bb != cand_bb)
 724     return true;
 725
 726   if (true_if_equal
 727       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 728     return true;
 729   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 730 }
 731
 732 /* Returns true if STMT if after the place where the induction variable
 733    CAND is incremented in LOOP.  */
 734
 735 static bool
 736 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 737 {
 738   switch (cand->pos)
 739     {
 740     case IP_END:
 741       return false;
 742
 743     case IP_NORMAL:
 744       return stmt_after_ip_normal_pos (loop, stmt);
 745
 746     case IP_ORIGINAL:
 747     case IP_AFTER_USE:
 748       return stmt_after_inc_pos (cand, stmt, false);
 749
 750     case IP_BEFORE_USE:
 751       return stmt_after_inc_pos (cand, stmt, true);
 752
 753     default:
 754       gcc_unreachable ();
 755     }
 756 }
 757
 758 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 759
 760 static bool
 761 abnormal_ssa_name_p (tree exp)
 762 {
 763   if (!exp)
 764     return false;
 765
 766   if (TREE_CODE (exp) != SSA_NAME)
 767     return false;
 768
 769   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 770 }
 771
 772 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 773    abnormal phi node.  Callback for for_each_index.  */
 774
 775 static bool
 776 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 777                                   void *data ATTRIBUTE_UNUSED)
 778 {
 779   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 780     {
 781       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 782         return false;
 783       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 784         return false;
 785     }
 786
 787   return !abnormal_ssa_name_p (*index);
 788 }
 789
 790 /* Returns true if EXPR contains a ssa name that occurs in an
 791    abnormal phi node.  */
 792
 793 bool
 794 contains_abnormal_ssa_name_p (tree expr)
 795 {
 796   enum tree_code code;
 797   enum tree_code_class codeclass;
 798
 799   if (!expr)
 800     return false;
 801
 802   code = TREE_CODE (expr);
 803   codeclass = TREE_CODE_CLASS (code);
 804
 805   if (code == SSA_NAME)
 806     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 807
 808   if (code == INTEGER_CST
 809       || is_gimple_min_invariant (expr))
 810     return false;
 811
 812   if (code == ADDR_EXPR)
 813     return !for_each_index (&TREE_OPERAND (expr, 0),
 814                             idx_contains_abnormal_ssa_name_p,
 815                             NULL);
 816
 817   if (code == COND_EXPR)
 818     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 819       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 820       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 821
 822   switch (codeclass)
 823     {
 824     case tcc_binary:
 825     case tcc_comparison:
 826       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 827         return true;
 828
 829       /* Fallthru.  */
 830     case tcc_unary:
 831       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 832         return true;
 833
 834       break;
 835
 836     default:
 837       gcc_unreachable ();
 838     }
 839
 840   return false;
 841 }
 842
 843 /*  Returns the structure describing number of iterations determined from
 844     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 845
 846 static struct tree_niter_desc *
 847 niter_for_exit (struct ivopts_data *data, edge exit)
 848 {
 849   struct tree_niter_desc *desc;
 850   tree_niter_desc **slot;
 851
 852   if (!data->niters)
 853     {
 854       data->niters = new hash_map<edge, tree_niter_desc *>;
 855       slot = NULL;
 856     }
 857   else
 858     slot = data->niters->get (exit);
 859
 860   if (!slot)
 861     {
 862       /* Try to determine number of iterations.  We cannot safely work with ssa
 863          names that appear in phi nodes on abnormal edges, so that we do not
 864          create overlapping life ranges for them (PR 27283).  */
 865       desc = XNEW (struct tree_niter_desc);
 866       if (!number_of_iterations_exit (data->current_loop,
 867                                       exit, desc, true)
 868           || contains_abnormal_ssa_name_p (desc->niter))
 869         {
 870           XDELETE (desc);
 871           desc = NULL;
 872         }
 873       data->niters->put (exit, desc);
 874     }
 875   else
 876     desc = *slot;
 877
 878   return desc;
 879 }
 880
 881 /* Returns the structure describing number of iterations determined from
 882    single dominating exit of DATA->current_loop, or NULL if something
 883    goes wrong.  */
 884
 885 static struct tree_niter_desc *
 886 niter_for_single_dom_exit (struct ivopts_data *data)
 887 {
 888   edge exit = single_dom_exit (data->current_loop);
 889
 890   if (!exit)
 891     return NULL;
 892
 893   return niter_for_exit (data, exit);
 894 }
 895
 896 /* Initializes data structures used by the iv optimization pass, stored
 897    in DATA.  */
 898
 899 static void
 900 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 901 {
 902   data->version_info_size = 2 * num_ssa_names;
 903   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 904   data->relevant = BITMAP_ALLOC (NULL);
 905   data->important_candidates = BITMAP_ALLOC (NULL);
 906   data->max_inv_id = 0;
 907   data->niters = NULL;
 908   data->iv_uses.create (20);
 909   data->iv_candidates.create (20);
 910   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 911   data->inv_expr_id = 0;
 912   data->name_expansion_cache = NULL;
 913   decl_rtl_to_reset.create (20);
 914   gcc_obstack_init (&data->iv_obstack);
 915 }
 916
 917 /* Returns a memory object to that EXPR points.  In case we are able to
 918    determine that it does not point to any such object, NULL is returned.  */
 919
 920 static tree
 921 determine_base_object (tree expr)
 922 {
 923   enum tree_code code = TREE_CODE (expr);
 924   tree base, obj;
 925
 926   /* If this is a pointer casted to any type, we need to determine
 927      the base object for the pointer; so handle conversions before
 928      throwing away non-pointer expressions.  */
 929   if (CONVERT_EXPR_P (expr))
 930     return determine_base_object (TREE_OPERAND (expr, 0));
 931
 932   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 933     return NULL_TREE;
 934
 935   switch (code)
 936     {
 937     case INTEGER_CST:
 938       return NULL_TREE;
 939
 940     case ADDR_EXPR:
 941       obj = TREE_OPERAND (expr, 0);
 942       base = get_base_address (obj);
 943
 944       if (!base)
 945         return expr;
 946
 947       if (TREE_CODE (base) == MEM_REF)
 948         return determine_base_object (TREE_OPERAND (base, 0));
 949
 950       return fold_convert (ptr_type_node,
 951                            build_fold_addr_expr (base));
 952
 953     case POINTER_PLUS_EXPR:
 954       return determine_base_object (TREE_OPERAND (expr, 0));
 955
 956     case PLUS_EXPR:
 957     case MINUS_EXPR:
 958       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 959       gcc_unreachable ();
 960
 961     default:
 962       return fold_convert (ptr_type_node, expr);
 963     }
 964 }
 965
 966 /* Return true if address expression with non-DECL_P operand appears
 967    in EXPR.  */
 968
 969 static bool
 970 contain_complex_addr_expr (tree expr)
 971 {
 972   bool res = false;
 973
 974   STRIP_NOPS (expr);
 975   switch (TREE_CODE (expr))
 976     {
 977     case POINTER_PLUS_EXPR:
 978     case PLUS_EXPR:
 979     case MINUS_EXPR:
 980       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 981       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 982       break;
 983
 984     case ADDR_EXPR:
 985       return (!DECL_P (TREE_OPERAND (expr, 0)));
 986
 987     default:
 988       return false;
 989     }
 990
 991   return res;
 992 }
 993
 994 /* Allocates an induction variable with given initial value BASE and step STEP
 995    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 996
 997 static struct iv *
 998 alloc_iv (struct ivopts_data *data, tree base, tree step,
 999           bool no_overflow = false)
1000 {
1001   tree expr = base;
1002   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1003                                               sizeof (struct iv));
1004   gcc_assert (step != NULL_TREE);
1005
1006   /* Lower address expression in base except ones with DECL_P as operand.
1007      By doing this:
1008        1) More accurate cost can be computed for address expressions;
1009        2) Duplicate candidates won't be created for bases in different
1010           forms, like &a[0] and &a.  */
1011   STRIP_NOPS (expr);
1012   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1013       || contain_complex_addr_expr (expr))
1014     {
1015       aff_tree comb;
1016       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1017       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1018     }
1019
1020   iv->base = base;
1021   iv->base_object = determine_base_object (base);
1022   iv->step = step;
1023   iv->biv_p = false;
1024   iv->have_use_for = false;
1025   iv->use_id = 0;
1026   iv->ssa_name = NULL_TREE;
1027   iv->no_overflow = no_overflow;
1028
1029   return iv;
1030 }
1031
1032 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1033    doesn't overflow.  */
1034
1035 static void
1036 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1037         bool no_overflow)
1038 {
1039   struct version_info *info = name_info (data, iv);
1040
1041   gcc_assert (!info->iv);
1042
1043   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1044   info->iv = alloc_iv (data, base, step, no_overflow);
1045   info->iv->ssa_name = iv;
1046 }
1047
1048 /* Finds induction variable declaration for VAR.  */
1049
1050 static struct iv *
1051 get_iv (struct ivopts_data *data, tree var)
1052 {
1053   basic_block bb;
1054   tree type = TREE_TYPE (var);
1055
1056   if (!POINTER_TYPE_P (type)
1057       && !INTEGRAL_TYPE_P (type))
1058     return NULL;
1059
1060   if (!name_info (data, var)->iv)
1061     {
1062       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1063
1064       if (!bb
1065           || !flow_bb_inside_loop_p (data->current_loop, bb))
1066         set_iv (data, var, var, build_int_cst (type, 0), true);
1067     }
1068
1069   return name_info (data, var)->iv;
1070 }
1071
1072 /* Return the first non-invariant ssa var found in EXPR.  */
1073
1074 static tree
1075 extract_single_var_from_expr (tree expr)
1076 {
1077   int i, n;
1078   tree tmp;
1079   enum tree_code code;
1080
1081   if (!expr || is_gimple_min_invariant (expr))
1082     return NULL;
1083
1084   code = TREE_CODE (expr);
1085   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1086     {
1087       n = TREE_OPERAND_LENGTH (expr);
1088       for (i = 0; i < n; i++)
1089         {
1090           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1091
1092           if (tmp)
1093             return tmp;
1094         }
1095     }
1096   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1097 }
1098
1099 /* Finds basic ivs.  */
1100
1101 static bool
1102 find_bivs (struct ivopts_data *data)
1103 {
1104   gphi *phi;
1105   affine_iv iv;
1106   tree step, type, base, stop;
1107   bool found = false;
1108   struct loop *loop = data->current_loop;
1109   gphi_iterator psi;
1110
1111   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1112     {
1113       phi = psi.phi ();
1114
1115       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1116         continue;
1117
1118       if (virtual_operand_p (PHI_RESULT (phi)))
1119         continue;
1120
1121       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1122         continue;
1123
1124       if (integer_zerop (iv.step))
1125         continue;
1126
1127       step = iv.step;
1128       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1129       /* Stop expanding iv base at the first ssa var referred by iv step.
1130          Ideally we should stop at any ssa var, because that's expensive
1131          and unusual to happen, we just do it on the first one.
1132
1133          See PR64705 for the rationale.  */
1134       stop = extract_single_var_from_expr (step);
1135       base = expand_simple_operations (base, stop);
1136       if (contains_abnormal_ssa_name_p (base)
1137           || contains_abnormal_ssa_name_p (step))
1138         continue;
1139
1140       type = TREE_TYPE (PHI_RESULT (phi));
1141       base = fold_convert (type, base);
1142       if (step)
1143         {
1144           if (POINTER_TYPE_P (type))
1145             step = convert_to_ptrofftype (step);
1146           else
1147             step = fold_convert (type, step);
1148         }
1149
1150       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1151       found = true;
1152     }
1153
1154   return found;
1155 }
1156
1157 /* Marks basic ivs.  */
1158
1159 static void
1160 mark_bivs (struct ivopts_data *data)
1161 {
1162   gphi *phi;
1163   gimple def;
1164   tree var;
1165   struct iv *iv, *incr_iv;
1166   struct loop *loop = data->current_loop;
1167   basic_block incr_bb;
1168   gphi_iterator psi;
1169
1170   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1171     {
1172       phi = psi.phi ();
1173
1174       iv = get_iv (data, PHI_RESULT (phi));
1175       if (!iv)
1176         continue;
1177
1178       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1179       def = SSA_NAME_DEF_STMT (var);
1180       /* Don't mark iv peeled from other one as biv.  */
1181       if (def
1182           && gimple_code (def) == GIMPLE_PHI
1183           && gimple_bb (def) == loop->header)
1184         continue;
1185
1186       incr_iv = get_iv (data, var);
1187       if (!incr_iv)
1188         continue;
1189
1190       /* If the increment is in the subloop, ignore it.  */
1191       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1192       if (incr_bb->loop_father != data->current_loop
1193           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1194         continue;
1195
1196       iv->biv_p = true;
1197       incr_iv->biv_p = true;
1198     }
1199 }
1200
1201 /* Checks whether STMT defines a linear induction variable and stores its
1202    parameters to IV.  */
1203
1204 static bool
1205 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1206 {
1207   tree lhs, stop;
1208   struct loop *loop = data->current_loop;
1209
1210   iv->base = NULL_TREE;
1211   iv->step = NULL_TREE;
1212
1213   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1214     return false;
1215
1216   lhs = gimple_assign_lhs (stmt);
1217   if (TREE_CODE (lhs) != SSA_NAME)
1218     return false;
1219
1220   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1221     return false;
1222
1223   /* Stop expanding iv base at the first ssa var referred by iv step.
1224      Ideally we should stop at any ssa var, because that's expensive
1225      and unusual to happen, we just do it on the first one.
1226
1227      See PR64705 for the rationale.  */
1228   stop = extract_single_var_from_expr (iv->step);
1229   iv->base = expand_simple_operations (iv->base, stop);
1230   if (contains_abnormal_ssa_name_p (iv->base)
1231       || contains_abnormal_ssa_name_p (iv->step))
1232     return false;
1233
1234   /* If STMT could throw, then do not consider STMT as defining a GIV.
1235      While this will suppress optimizations, we can not safely delete this
1236      GIV and associated statements, even if it appears it is not used.  */
1237   if (stmt_could_throw_p (stmt))
1238     return false;
1239
1240   return true;
1241 }
1242
1243 /* Finds general ivs in statement STMT.  */
1244
1245 static void
1246 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1247 {
1248   affine_iv iv;
1249
1250   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1251     return;
1252
1253   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1254 }
1255
1256 /* Finds general ivs in basic block BB.  */
1257
1258 static void
1259 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1260 {
1261   gimple_stmt_iterator bsi;
1262
1263   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1264     find_givs_in_stmt (data, gsi_stmt (bsi));
1265 }
1266
1267 /* Finds general ivs.  */
1268
1269 static void
1270 find_givs (struct ivopts_data *data)
1271 {
1272   struct loop *loop = data->current_loop;
1273   basic_block *body = get_loop_body_in_dom_order (loop);
1274   unsigned i;
1275
1276   for (i = 0; i < loop->num_nodes; i++)
1277     find_givs_in_bb (data, body[i]);
1278   free (body);
1279 }
1280
1281 /* For each ssa name defined in LOOP determines whether it is an induction
1282    variable and if so, its initial value and step.  */
1283
1284 static bool
1285 find_induction_variables (struct ivopts_data *data)
1286 {
1287   unsigned i;
1288   bitmap_iterator bi;
1289
1290   if (!find_bivs (data))
1291     return false;
1292
1293   find_givs (data);
1294   mark_bivs (data);
1295
1296   if (dump_file && (dump_flags & TDF_DETAILS))
1297     {
1298       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1299
1300       if (niter)
1301         {
1302           fprintf (dump_file, "  number of iterations ");
1303           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1304           if (!integer_zerop (niter->may_be_zero))
1305             {
1306               fprintf (dump_file, "; zero if ");
1307               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1308             }
1309           fprintf (dump_file, "\n\n");
1310         };
1311
1312       fprintf (dump_file, "Induction variables:\n\n");
1313
1314       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1315         {
1316           if (ver_info (data, i)->iv)
1317             dump_iv (dump_file, ver_info (data, i)->iv, true);
1318         }
1319     }
1320
1321   return true;
1322 }
1323
1324 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1325    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1326    is the const offset stripped from IV base.  For uses of other types,
1327    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1328
1329 static struct iv_use *
1330 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1331             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1332             unsigned HOST_WIDE_INT addr_offset = 0)
1333 {
1334   struct iv_use *use = XCNEW (struct iv_use);
1335
1336   use->id = n_iv_uses (data);
1337   use->sub_id = 0;
1338   use->type = use_type;
1339   use->iv = iv;
1340   use->stmt = stmt;
1341   use->op_p = use_p;
1342   use->related_cands = BITMAP_ALLOC (NULL);
1343   use->next = NULL;
1344   use->addr_base = addr_base;
1345   use->addr_offset = addr_offset;
1346
1347   data->iv_uses.safe_push (use);
1348
1349   return use;
1350 }
1351
1352 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1353    The sub use is recorded under the one whose use id is ID_GROUP.  */
1354
1355 static struct iv_use *
1356 record_sub_use (struct ivopts_data *data, tree *use_p,
1357                     struct iv *iv, gimple stmt, enum use_type use_type,
1358                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1359                     unsigned int id_group)
1360 {
1361   struct iv_use *use = XCNEW (struct iv_use);
1362   struct iv_use *group = iv_use (data, id_group);
1363
1364   use->id = group->id;
1365   use->sub_id = 0;
1366   use->type = use_type;
1367   use->iv = iv;
1368   use->stmt = stmt;
1369   use->op_p = use_p;
1370   use->related_cands = NULL;
1371   use->addr_base = addr_base;
1372   use->addr_offset = addr_offset;
1373
1374   /* Sub use list is maintained in offset ascending order.  */
1375   if (addr_offset <= group->addr_offset)
1376     {
1377       use->related_cands = group->related_cands;
1378       group->related_cands = NULL;
1379       use->next = group;
1380       data->iv_uses[id_group] = use;
1381     }
1382   else
1383     {
1384       struct iv_use *pre;
1385       do
1386         {
1387           pre = group;
1388           group = group->next;
1389         }
1390       while (group && addr_offset > group->addr_offset);
1391       use->next = pre->next;
1392       pre->next = use;
1393     }
1394
1395   return use;
1396 }
1397
1398 /* Checks whether OP is a loop-level invariant and if so, records it.
1399    NONLINEAR_USE is true if the invariant is used in a way we do not
1400    handle specially.  */
1401
1402 static void
1403 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1404 {
1405   basic_block bb;
1406   struct version_info *info;
1407
1408   if (TREE_CODE (op) != SSA_NAME
1409       || virtual_operand_p (op))
1410     return;
1411
1412   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1413   if (bb
1414       && flow_bb_inside_loop_p (data->current_loop, bb))
1415     return;
1416
1417   info = name_info (data, op);
1418   info->name = op;
1419   info->has_nonlin_use |= nonlinear_use;
1420   if (!info->inv_id)
1421     info->inv_id = ++data->max_inv_id;
1422   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1423 }
1424
1425 /* Checks whether the use OP is interesting and if so, records it.  */
1426
1427 static struct iv_use *
1428 find_interesting_uses_op (struct ivopts_data *data, tree op)
1429 {
1430   struct iv *iv;
1431   gimple stmt;
1432   struct iv_use *use;
1433
1434   if (TREE_CODE (op) != SSA_NAME)
1435     return NULL;
1436
1437   iv = get_iv (data, op);
1438   if (!iv)
1439     return NULL;
1440
1441   if (iv->have_use_for)
1442     {
1443       use = iv_use (data, iv->use_id);
1444
1445       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1446       return use;
1447     }
1448
1449   if (integer_zerop (iv->step))
1450     {
1451       record_invariant (data, op, true);
1452       return NULL;
1453     }
1454   iv->have_use_for = true;
1455
1456   stmt = SSA_NAME_DEF_STMT (op);
1457   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1458               || is_gimple_assign (stmt));
1459
1460   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1461   iv->use_id = use->id;
1462
1463   return use;
1464 }
1465
1466 /* Given a condition in statement STMT, checks whether it is a compare
1467    of an induction variable and an invariant.  If this is the case,
1468    CONTROL_VAR is set to location of the iv, BOUND to the location of
1469    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1470    induction variable descriptions, and true is returned.  If this is not
1471    the case, CONTROL_VAR and BOUND are set to the arguments of the
1472    condition and false is returned.  */
1473
1474 static bool
1475 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1476                        tree **control_var, tree **bound,
1477                        struct iv **iv_var, struct iv **iv_bound)
1478 {
1479   /* The objects returned when COND has constant operands.  */
1480   static struct iv const_iv;
1481   static tree zero;
1482   tree *op0 = &zero, *op1 = &zero;
1483   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1484   bool ret = false;
1485
1486   if (gimple_code (stmt) == GIMPLE_COND)
1487     {
1488       gcond *cond_stmt = as_a <gcond *> (stmt);
1489       op0 = gimple_cond_lhs_ptr (cond_stmt);
1490       op1 = gimple_cond_rhs_ptr (cond_stmt);
1491     }
1492   else
1493     {
1494       op0 = gimple_assign_rhs1_ptr (stmt);
1495       op1 = gimple_assign_rhs2_ptr (stmt);
1496     }
1497
1498   zero = integer_zero_node;
1499   const_iv.step = integer_zero_node;
1500
1501   if (TREE_CODE (*op0) == SSA_NAME)
1502     iv0 = get_iv (data, *op0);
1503   if (TREE_CODE (*op1) == SSA_NAME)
1504     iv1 = get_iv (data, *op1);
1505
1506   /* Exactly one of the compared values must be an iv, and the other one must
1507      be an invariant.  */
1508   if (!iv0 || !iv1)
1509     goto end;
1510
1511   if (integer_zerop (iv0->step))
1512     {
1513       /* Control variable may be on the other side.  */
1514       std::swap (op0, op1);
1515       std::swap (iv0, iv1);
1516     }
1517   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1518
1519 end:
1520   if (control_var)
1521     *control_var = op0;
1522   if (iv_var)
1523     *iv_var = iv0;
1524   if (bound)
1525     *bound = op1;
1526   if (iv_bound)
1527     *iv_bound = iv1;
1528
1529   return ret;
1530 }
1531
1532 /* Checks whether the condition in STMT is interesting and if so,
1533    records it.  */
1534
1535 static void
1536 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1537 {
1538   tree *var_p, *bound_p;
1539   struct iv *var_iv;
1540
1541   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1542     {
1543       find_interesting_uses_op (data, *var_p);
1544       find_interesting_uses_op (data, *bound_p);
1545       return;
1546     }
1547
1548   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1549 }
1550
1551 /* Returns the outermost loop EXPR is obviously invariant in
1552    relative to the loop LOOP, i.e. if all its operands are defined
1553    outside of the returned loop.  Returns NULL if EXPR is not
1554    even obviously invariant in LOOP.  */
1555
1556 struct loop *
1557 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1558 {
1559   basic_block def_bb;
1560   unsigned i, len;
1561
1562   if (is_gimple_min_invariant (expr))
1563     return current_loops->tree_root;
1564
1565   if (TREE_CODE (expr) == SSA_NAME)
1566     {
1567       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1568       if (def_bb)
1569         {
1570           if (flow_bb_inside_loop_p (loop, def_bb))
1571             return NULL;
1572           return superloop_at_depth (loop,
1573                                      loop_depth (def_bb->loop_father) + 1);
1574         }
1575
1576       return current_loops->tree_root;
1577     }
1578
1579   if (!EXPR_P (expr))
1580     return NULL;
1581
1582   unsigned maxdepth = 0;
1583   len = TREE_OPERAND_LENGTH (expr);
1584   for (i = 0; i < len; i++)
1585     {
1586       struct loop *ivloop;
1587       if (!TREE_OPERAND (expr, i))
1588         continue;
1589
1590       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1591       if (!ivloop)
1592         return NULL;
1593       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1594     }
1595
1596   return superloop_at_depth (loop, maxdepth);
1597 }
1598
1599 /* Returns true if expression EXPR is obviously invariant in LOOP,
1600    i.e. if all its operands are defined outside of the LOOP.  LOOP
1601    should not be the function body.  */
1602
1603 bool
1604 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1605 {
1606   basic_block def_bb;
1607   unsigned i, len;
1608
1609   gcc_assert (loop_depth (loop) > 0);
1610
1611   if (is_gimple_min_invariant (expr))
1612     return true;
1613
1614   if (TREE_CODE (expr) == SSA_NAME)
1615     {
1616       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1617       if (def_bb
1618           && flow_bb_inside_loop_p (loop, def_bb))
1619         return false;
1620
1621       return true;
1622     }
1623
1624   if (!EXPR_P (expr))
1625     return false;
1626
1627   len = TREE_OPERAND_LENGTH (expr);
1628   for (i = 0; i < len; i++)
1629     if (TREE_OPERAND (expr, i)
1630         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1631       return false;
1632
1633   return true;
1634 }
1635
1636 /* Cumulates the steps of indices into DATA and replaces their values with the
1637    initial ones.  Returns false when the value of the index cannot be determined.
1638    Callback for for_each_index.  */
1639
1640 struct ifs_ivopts_data
1641 {
1642   struct ivopts_data *ivopts_data;
1643   gimple stmt;
1644   tree step;
1645 };
1646
1647 static bool
1648 idx_find_step (tree base, tree *idx, void *data)
1649 {
1650   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1651   struct iv *iv;
1652   bool use_overflow_semantics = false;
1653   tree step, iv_base, iv_step, lbound, off;
1654   struct loop *loop = dta->ivopts_data->current_loop;
1655
1656   /* If base is a component ref, require that the offset of the reference
1657      be invariant.  */
1658   if (TREE_CODE (base) == COMPONENT_REF)
1659     {
1660       off = component_ref_field_offset (base);
1661       return expr_invariant_in_loop_p (loop, off);
1662     }
1663
1664   /* If base is array, first check whether we will be able to move the
1665      reference out of the loop (in order to take its address in strength
1666      reduction).  In order for this to work we need both lower bound
1667      and step to be loop invariants.  */
1668   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1669     {
1670       /* Moreover, for a range, the size needs to be invariant as well.  */
1671       if (TREE_CODE (base) == ARRAY_RANGE_REF
1672           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1673         return false;
1674
1675       step = array_ref_element_size (base);
1676       lbound = array_ref_low_bound (base);
1677
1678       if (!expr_invariant_in_loop_p (loop, step)
1679           || !expr_invariant_in_loop_p (loop, lbound))
1680         return false;
1681     }
1682
1683   if (TREE_CODE (*idx) != SSA_NAME)
1684     return true;
1685
1686   iv = get_iv (dta->ivopts_data, *idx);
1687   if (!iv)
1688     return false;
1689
1690   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1691           *&x[0], which is not folded and does not trigger the
1692           ARRAY_REF path below.  */
1693   *idx = iv->base;
1694
1695   if (integer_zerop (iv->step))
1696     return true;
1697
1698   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1699     {
1700       step = array_ref_element_size (base);
1701
1702       /* We only handle addresses whose step is an integer constant.  */
1703       if (TREE_CODE (step) != INTEGER_CST)
1704         return false;
1705     }
1706   else
1707     /* The step for pointer arithmetics already is 1 byte.  */
1708     step = size_one_node;
1709
1710   iv_base = iv->base;
1711   iv_step = iv->step;
1712   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1713     use_overflow_semantics = true;
1714
1715   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1716                             sizetype, &iv_base, &iv_step, dta->stmt,
1717                             use_overflow_semantics))
1718     {
1719       /* The index might wrap.  */
1720       return false;
1721     }
1722
1723   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1724   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1725
1726   return true;
1727 }
1728
1729 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1730    object is passed to it in DATA.  */
1731
1732 static bool
1733 idx_record_use (tree base, tree *idx,
1734                 void *vdata)
1735 {
1736   struct ivopts_data *data = (struct ivopts_data *) vdata;
1737   find_interesting_uses_op (data, *idx);
1738   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1739     {
1740       find_interesting_uses_op (data, array_ref_element_size (base));
1741       find_interesting_uses_op (data, array_ref_low_bound (base));
1742     }
1743   return true;
1744 }
1745
1746 /* If we can prove that TOP = cst * BOT for some constant cst,
1747    store cst to MUL and return true.  Otherwise return false.
1748    The returned value is always sign-extended, regardless of the
1749    signedness of TOP and BOT.  */
1750
1751 static bool
1752 constant_multiple_of (tree top, tree bot, widest_int *mul)
1753 {
1754   tree mby;
1755   enum tree_code code;
1756   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1757   widest_int res, p0, p1;
1758
1759   STRIP_NOPS (top);
1760   STRIP_NOPS (bot);
1761
1762   if (operand_equal_p (top, bot, 0))
1763     {
1764       *mul = 1;
1765       return true;
1766     }
1767
1768   code = TREE_CODE (top);
1769   switch (code)
1770     {
1771     case MULT_EXPR:
1772       mby = TREE_OPERAND (top, 1);
1773       if (TREE_CODE (mby) != INTEGER_CST)
1774         return false;
1775
1776       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1777         return false;
1778
1779       *mul = wi::sext (res * wi::to_widest (mby), precision);
1780       return true;
1781
1782     case PLUS_EXPR:
1783     case MINUS_EXPR:
1784       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1785           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1786         return false;
1787
1788       if (code == MINUS_EXPR)
1789         p1 = -p1;
1790       *mul = wi::sext (p0 + p1, precision);
1791       return true;
1792
1793     case INTEGER_CST:
1794       if (TREE_CODE (bot) != INTEGER_CST)
1795         return false;
1796
1797       p0 = widest_int::from (top, SIGNED);
1798       p1 = widest_int::from (bot, SIGNED);
1799       if (p1 == 0)
1800         return false;
1801       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1802       return res == 0;
1803
1804     default:
1805       return false;
1806     }
1807 }
1808
1809 /* Return true if memory reference REF with step STEP may be unaligned.  */
1810
1811 static bool
1812 may_be_unaligned_p (tree ref, tree step)
1813 {
1814   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1815      thus they are not misaligned.  */
1816   if (TREE_CODE (ref) == TARGET_MEM_REF)
1817     return false;
1818
1819   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1820   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1821     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1822
1823   unsigned HOST_WIDE_INT bitpos;
1824   unsigned int ref_align;
1825   get_object_alignment_1 (ref, &ref_align, &bitpos);
1826   if (ref_align < align
1827       || (bitpos % align) != 0
1828       || (bitpos % BITS_PER_UNIT) != 0)
1829     return true;
1830
1831   unsigned int trailing_zeros = tree_ctz (step);
1832   if (trailing_zeros < HOST_BITS_PER_INT
1833       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1834     return true;
1835
1836   return false;
1837 }
1838
1839 /* Return true if EXPR may be non-addressable.   */
1840
1841 bool
1842 may_be_nonaddressable_p (tree expr)
1843 {
1844   switch (TREE_CODE (expr))
1845     {
1846     case TARGET_MEM_REF:
1847       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1848          target, thus they are always addressable.  */
1849       return false;
1850
1851     case COMPONENT_REF:
1852       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1853              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1854
1855     case VIEW_CONVERT_EXPR:
1856       /* This kind of view-conversions may wrap non-addressable objects
1857          and make them look addressable.  After some processing the
1858          non-addressability may be uncovered again, causing ADDR_EXPRs
1859          of inappropriate objects to be built.  */
1860       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1861           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1862         return true;
1863
1864       /* ... fall through ... */
1865
1866     case ARRAY_REF:
1867     case ARRAY_RANGE_REF:
1868       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1869
1870     CASE_CONVERT:
1871       return true;
1872
1873     default:
1874       break;
1875     }
1876
1877   return false;
1878 }
1879
1880 static tree
1881 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1882
1883 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1884    If there is an existing use which has same stripped iv base and step,
1885    this function records this one as a sub use to that; otherwise records
1886    it as a normal one.  */
1887
1888 static struct iv_use *
1889 record_group_use (struct ivopts_data *data, tree *use_p,
1890                   struct iv *iv, gimple stmt, enum use_type use_type)
1891 {
1892   unsigned int i;
1893   struct iv_use *use;
1894   tree addr_base;
1895   unsigned HOST_WIDE_INT addr_offset;
1896
1897   /* Only support sub use for address type uses, that is, with base
1898      object.  */
1899   if (!iv->base_object)
1900     return record_use (data, use_p, iv, stmt, use_type);
1901
1902   addr_base = strip_offset (iv->base, &addr_offset);
1903   for (i = 0; i < n_iv_uses (data); i++)
1904     {
1905       use = iv_use (data, i);
1906       if (use->type != USE_ADDRESS || !use->iv->base_object)
1907         continue;
1908
1909       /* Check if it has the same stripped base and step.  */
1910       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1911           && operand_equal_p (iv->step, use->iv->step, 0)
1912           && operand_equal_p (addr_base, use->addr_base, 0))
1913         break;
1914     }
1915
1916   if (i == n_iv_uses (data))
1917     return record_use (data, use_p, iv, stmt,
1918                        use_type, addr_base, addr_offset);
1919   else
1920     return record_sub_use (data, use_p, iv, stmt,
1921                            use_type, addr_base, addr_offset, i);
1922 }
1923
1924 /* Finds addresses in *OP_P inside STMT.  */
1925
1926 static void
1927 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1928 {
1929   tree base = *op_p, step = size_zero_node;
1930   struct iv *civ;
1931   struct ifs_ivopts_data ifs_ivopts_data;
1932
1933   /* Do not play with volatile memory references.  A bit too conservative,
1934      perhaps, but safe.  */
1935   if (gimple_has_volatile_ops (stmt))
1936     goto fail;
1937
1938   /* Ignore bitfields for now.  Not really something terribly complicated
1939      to handle.  TODO.  */
1940   if (TREE_CODE (base) == BIT_FIELD_REF)
1941     goto fail;
1942
1943   base = unshare_expr (base);
1944
1945   if (TREE_CODE (base) == TARGET_MEM_REF)
1946     {
1947       tree type = build_pointer_type (TREE_TYPE (base));
1948       tree astep;
1949
1950       if (TMR_BASE (base)
1951           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1952         {
1953           civ = get_iv (data, TMR_BASE (base));
1954           if (!civ)
1955             goto fail;
1956
1957           TMR_BASE (base) = civ->base;
1958           step = civ->step;
1959         }
1960       if (TMR_INDEX2 (base)
1961           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1962         {
1963           civ = get_iv (data, TMR_INDEX2 (base));
1964           if (!civ)
1965             goto fail;
1966
1967           TMR_INDEX2 (base) = civ->base;
1968           step = civ->step;
1969         }
1970       if (TMR_INDEX (base)
1971           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1972         {
1973           civ = get_iv (data, TMR_INDEX (base));
1974           if (!civ)
1975             goto fail;
1976
1977           TMR_INDEX (base) = civ->base;
1978           astep = civ->step;
1979
1980           if (astep)
1981             {
1982               if (TMR_STEP (base))
1983                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1984
1985               step = fold_build2 (PLUS_EXPR, type, step, astep);
1986             }
1987         }
1988
1989       if (integer_zerop (step))
1990         goto fail;
1991       base = tree_mem_ref_addr (type, base);
1992     }
1993   else
1994     {
1995       ifs_ivopts_data.ivopts_data = data;
1996       ifs_ivopts_data.stmt = stmt;
1997       ifs_ivopts_data.step = size_zero_node;
1998       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1999           || integer_zerop (ifs_ivopts_data.step))
2000         goto fail;
2001       step = ifs_ivopts_data.step;
2002
2003       /* Check that the base expression is addressable.  This needs
2004          to be done after substituting bases of IVs into it.  */
2005       if (may_be_nonaddressable_p (base))
2006         goto fail;
2007
2008       /* Moreover, on strict alignment platforms, check that it is
2009          sufficiently aligned.  */
2010       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2011         goto fail;
2012
2013       base = build_fold_addr_expr (base);
2014
2015       /* Substituting bases of IVs into the base expression might
2016          have caused folding opportunities.  */
2017       if (TREE_CODE (base) == ADDR_EXPR)
2018         {
2019           tree *ref = &TREE_OPERAND (base, 0);
2020           while (handled_component_p (*ref))
2021             ref = &TREE_OPERAND (*ref, 0);
2022           if (TREE_CODE (*ref) == MEM_REF)
2023             {
2024               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2025                                       TREE_OPERAND (*ref, 0),
2026                                       TREE_OPERAND (*ref, 1));
2027               if (tem)
2028                 *ref = tem;
2029             }
2030         }
2031     }
2032
2033   civ = alloc_iv (data, base, step);
2034   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2035   return;
2036
2037 fail:
2038   for_each_index (op_p, idx_record_use, data);
2039 }
2040
2041 /* Finds and records invariants used in STMT.  */
2042
2043 static void
2044 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2045 {
2046   ssa_op_iter iter;
2047   use_operand_p use_p;
2048   tree op;
2049
2050   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2051     {
2052       op = USE_FROM_PTR (use_p);
2053       record_invariant (data, op, false);
2054     }
2055 }
2056
2057 /* Finds interesting uses of induction variables in the statement STMT.  */
2058
2059 static void
2060 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2061 {
2062   struct iv *iv;
2063   tree op, *lhs, *rhs;
2064   ssa_op_iter iter;
2065   use_operand_p use_p;
2066   enum tree_code code;
2067
2068   find_invariants_stmt (data, stmt);
2069
2070   if (gimple_code (stmt) == GIMPLE_COND)
2071     {
2072       find_interesting_uses_cond (data, stmt);
2073       return;
2074     }
2075
2076   if (is_gimple_assign (stmt))
2077     {
2078       lhs = gimple_assign_lhs_ptr (stmt);
2079       rhs = gimple_assign_rhs1_ptr (stmt);
2080
2081       if (TREE_CODE (*lhs) == SSA_NAME)
2082         {
2083           /* If the statement defines an induction variable, the uses are not
2084              interesting by themselves.  */
2085
2086           iv = get_iv (data, *lhs);
2087
2088           if (iv && !integer_zerop (iv->step))
2089             return;
2090         }
2091
2092       code = gimple_assign_rhs_code (stmt);
2093       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2094           && (REFERENCE_CLASS_P (*rhs)
2095               || is_gimple_val (*rhs)))
2096         {
2097           if (REFERENCE_CLASS_P (*rhs))
2098             find_interesting_uses_address (data, stmt, rhs);
2099           else
2100             find_interesting_uses_op (data, *rhs);
2101
2102           if (REFERENCE_CLASS_P (*lhs))
2103             find_interesting_uses_address (data, stmt, lhs);
2104           return;
2105         }
2106       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2107         {
2108           find_interesting_uses_cond (data, stmt);
2109           return;
2110         }
2111
2112       /* TODO -- we should also handle address uses of type
2113
2114          memory = call (whatever);
2115
2116          and
2117
2118          call (memory).  */
2119     }
2120
2121   if (gimple_code (stmt) == GIMPLE_PHI
2122       && gimple_bb (stmt) == data->current_loop->header)
2123     {
2124       iv = get_iv (data, PHI_RESULT (stmt));
2125
2126       if (iv && !integer_zerop (iv->step))
2127         return;
2128     }
2129
2130   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2131     {
2132       op = USE_FROM_PTR (use_p);
2133
2134       if (TREE_CODE (op) != SSA_NAME)
2135         continue;
2136
2137       iv = get_iv (data, op);
2138       if (!iv)
2139         continue;
2140
2141       find_interesting_uses_op (data, op);
2142     }
2143 }
2144
2145 /* Finds interesting uses of induction variables outside of loops
2146    on loop exit edge EXIT.  */
2147
2148 static void
2149 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2150 {
2151   gphi *phi;
2152   gphi_iterator psi;
2153   tree def;
2154
2155   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2156     {
2157       phi = psi.phi ();
2158       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2159       if (!virtual_operand_p (def))
2160         find_interesting_uses_op (data, def);
2161     }
2162 }
2163
2164 /* Finds uses of the induction variables that are interesting.  */
2165
2166 static void
2167 find_interesting_uses (struct ivopts_data *data)
2168 {
2169   basic_block bb;
2170   gimple_stmt_iterator bsi;
2171   basic_block *body = get_loop_body (data->current_loop);
2172   unsigned i;
2173   struct version_info *info;
2174   edge e;
2175
2176   if (dump_file && (dump_flags & TDF_DETAILS))
2177     fprintf (dump_file, "Uses:\n\n");
2178
2179   for (i = 0; i < data->current_loop->num_nodes; i++)
2180     {
2181       edge_iterator ei;
2182       bb = body[i];
2183
2184       FOR_EACH_EDGE (e, ei, bb->succs)
2185         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2186             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2187           find_interesting_uses_outside (data, e);
2188
2189       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2190         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2191       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2192         if (!is_gimple_debug (gsi_stmt (bsi)))
2193           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2194     }
2195
2196   if (dump_file && (dump_flags & TDF_DETAILS))
2197     {
2198       bitmap_iterator bi;
2199
2200       fprintf (dump_file, "\n");
2201
2202       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2203         {
2204           info = ver_info (data, i);
2205           if (info->inv_id)
2206             {
2207               fprintf (dump_file, "  ");
2208               print_generic_expr (dump_file, info->name, TDF_SLIM);
2209               fprintf (dump_file, " is invariant (%d)%s\n",
2210                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2211             }
2212         }
2213
2214       fprintf (dump_file, "\n");
2215     }
2216
2217   free (body);
2218 }
2219
2220 /* Compute maximum offset of [base + offset] addressing mode
2221    for memory reference represented by USE.  */
2222
2223 static HOST_WIDE_INT
2224 compute_max_addr_offset (struct iv_use *use)
2225 {
2226   int width;
2227   rtx reg, addr;
2228   HOST_WIDE_INT i, off;
2229   unsigned list_index, num;
2230   addr_space_t as;
2231   machine_mode mem_mode, addr_mode;
2232   static vec<HOST_WIDE_INT> max_offset_list;
2233
2234   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2235   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2236
2237   num = max_offset_list.length ();
2238   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2239   if (list_index >= num)
2240     {
2241       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2242       for (; num < max_offset_list.length (); num++)
2243         max_offset_list[num] = -1;
2244     }
2245
2246   off = max_offset_list[list_index];
2247   if (off != -1)
2248     return off;
2249
2250   addr_mode = targetm.addr_space.address_mode (as);
2251   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2252   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2253
2254   width = GET_MODE_BITSIZE (addr_mode) - 1;
2255   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2256     width = HOST_BITS_PER_WIDE_INT - 1;
2257
2258   for (i = width; i > 0; i--)
2259     {
2260       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2261       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2262       if (memory_address_addr_space_p (mem_mode, addr, as))
2263         break;
2264
2265       /* For some strict-alignment targets, the offset must be naturally
2266          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2267       off = ((unsigned HOST_WIDE_INT) 1 << i);
2268       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2269         {
2270           off -= GET_MODE_SIZE (mem_mode);
2271           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2272           if (memory_address_addr_space_p (mem_mode, addr, as))
2273             break;
2274         }
2275     }
2276   if (i == 0)
2277     off = 0;
2278
2279   max_offset_list[list_index] = off;
2280   return off;
2281 }
2282
2283 /* Check if all small groups should be split.  Return true if and
2284    only if:
2285
2286      1) At least one groups contain two uses with different offsets.
2287      2) No group contains more than two uses with different offsets.
2288
2289    Return false otherwise.  We want to split such groups because:
2290
2291      1) Small groups don't have much benefit and may interfer with
2292         general candidate selection.
2293      2) Size for problem with only small groups is usually small and
2294         general algorithm can handle it well.
2295
2296    TODO -- Above claim may not hold when auto increment is supported.  */
2297
2298 static bool
2299 split_all_small_groups (struct ivopts_data *data)
2300 {
2301   bool split_p = false;
2302   unsigned int i, n, distinct;
2303   struct iv_use *pre, *use;
2304
2305   n = n_iv_uses (data);
2306   for (i = 0; i < n; i++)
2307     {
2308       use = iv_use (data, i);
2309       if (!use->next)
2310         continue;
2311
2312       distinct = 1;
2313       gcc_assert (use->type == USE_ADDRESS);
2314       for (pre = use, use = use->next; use; pre = use, use = use->next)
2315         {
2316           if (pre->addr_offset != use->addr_offset)
2317             distinct++;
2318
2319           if (distinct > 2)
2320             return false;
2321         }
2322       if (distinct == 2)
2323         split_p = true;
2324     }
2325
2326   return split_p;
2327 }
2328
2329 /* For each group of address type uses, this function further groups
2330    these uses according to the maximum offset supported by target's
2331    [base + offset] addressing mode.  */
2332
2333 static void
2334 group_address_uses (struct ivopts_data *data)
2335 {
2336   HOST_WIDE_INT max_offset = -1;
2337   unsigned int i, n, sub_id;
2338   struct iv_use *pre, *use;
2339   unsigned HOST_WIDE_INT addr_offset_first;
2340
2341   /* Reset max offset to split all small groups.  */
2342   if (split_all_small_groups (data))
2343     max_offset = 0;
2344
2345   n = n_iv_uses (data);
2346   for (i = 0; i < n; i++)
2347     {
2348       use = iv_use (data, i);
2349       if (!use->next)
2350         continue;
2351
2352       gcc_assert (use->type == USE_ADDRESS);
2353       if (max_offset != 0)
2354         max_offset = compute_max_addr_offset (use);
2355
2356       while (use)
2357         {
2358           sub_id = 0;
2359           addr_offset_first = use->addr_offset;
2360           /* Only uses with offset that can fit in offset part against
2361              the first use can be grouped together.  */
2362           for (pre = use, use = use->next;
2363                use && (use->addr_offset - addr_offset_first
2364                        <= (unsigned HOST_WIDE_INT) max_offset);
2365                pre = use, use = use->next)
2366             {
2367               use->id = pre->id;
2368               use->sub_id = ++sub_id;
2369             }
2370
2371           /* Break the list and create new group.  */
2372           if (use)
2373             {
2374               pre->next = NULL;
2375               use->id = n_iv_uses (data);
2376               use->related_cands = BITMAP_ALLOC (NULL);
2377               data->iv_uses.safe_push (use);
2378             }
2379         }
2380     }
2381
2382   if (dump_file && (dump_flags & TDF_DETAILS))
2383     dump_uses (dump_file, data);
2384 }
2385
2386 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2387    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2388    we are at the top-level of the processed address.  */
2389
2390 static tree
2391 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2392                 HOST_WIDE_INT *offset)
2393 {
2394   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2395   enum tree_code code;
2396   tree type, orig_type = TREE_TYPE (expr);
2397   HOST_WIDE_INT off0, off1, st;
2398   tree orig_expr = expr;
2399
2400   STRIP_NOPS (expr);
2401
2402   type = TREE_TYPE (expr);
2403   code = TREE_CODE (expr);
2404   *offset = 0;
2405
2406   switch (code)
2407     {
2408     case INTEGER_CST:
2409       if (!cst_and_fits_in_hwi (expr)
2410           || integer_zerop (expr))
2411         return orig_expr;
2412
2413       *offset = int_cst_value (expr);
2414       return build_int_cst (orig_type, 0);
2415
2416     case POINTER_PLUS_EXPR:
2417     case PLUS_EXPR:
2418     case MINUS_EXPR:
2419       op0 = TREE_OPERAND (expr, 0);
2420       op1 = TREE_OPERAND (expr, 1);
2421
2422       op0 = strip_offset_1 (op0, false, false, &off0);
2423       op1 = strip_offset_1 (op1, false, false, &off1);
2424
2425       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2426       if (op0 == TREE_OPERAND (expr, 0)
2427           && op1 == TREE_OPERAND (expr, 1))
2428         return orig_expr;
2429
2430       if (integer_zerop (op1))
2431         expr = op0;
2432       else if (integer_zerop (op0))
2433         {
2434           if (code == MINUS_EXPR)
2435             expr = fold_build1 (NEGATE_EXPR, type, op1);
2436           else
2437             expr = op1;
2438         }
2439       else
2440         expr = fold_build2 (code, type, op0, op1);
2441
2442       return fold_convert (orig_type, expr);
2443
2444     case MULT_EXPR:
2445       op1 = TREE_OPERAND (expr, 1);
2446       if (!cst_and_fits_in_hwi (op1))
2447         return orig_expr;
2448
2449       op0 = TREE_OPERAND (expr, 0);
2450       op0 = strip_offset_1 (op0, false, false, &off0);
2451       if (op0 == TREE_OPERAND (expr, 0))
2452         return orig_expr;
2453
2454       *offset = off0 * int_cst_value (op1);
2455       if (integer_zerop (op0))
2456         expr = op0;
2457       else
2458         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2459
2460       return fold_convert (orig_type, expr);
2461
2462     case ARRAY_REF:
2463     case ARRAY_RANGE_REF:
2464       if (!inside_addr)
2465         return orig_expr;
2466
2467       step = array_ref_element_size (expr);
2468       if (!cst_and_fits_in_hwi (step))
2469         break;
2470
2471       st = int_cst_value (step);
2472       op1 = TREE_OPERAND (expr, 1);
2473       op1 = strip_offset_1 (op1, false, false, &off1);
2474       *offset = off1 * st;
2475
2476       if (top_compref
2477           && integer_zerop (op1))
2478         {
2479           /* Strip the component reference completely.  */
2480           op0 = TREE_OPERAND (expr, 0);
2481           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2482           *offset += off0;
2483           return op0;
2484         }
2485       break;
2486
2487     case COMPONENT_REF:
2488       {
2489         tree field;
2490
2491         if (!inside_addr)
2492           return orig_expr;
2493
2494         tmp = component_ref_field_offset (expr);
2495         field = TREE_OPERAND (expr, 1);
2496         if (top_compref
2497             && cst_and_fits_in_hwi (tmp)
2498             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2499           {
2500             HOST_WIDE_INT boffset, abs_off;
2501
2502             /* Strip the component reference completely.  */
2503             op0 = TREE_OPERAND (expr, 0);
2504             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2505             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2506             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2507             if (boffset < 0)
2508               abs_off = -abs_off;
2509
2510             *offset = off0 + int_cst_value (tmp) + abs_off;
2511             return op0;
2512           }
2513       }
2514       break;
2515
2516     case ADDR_EXPR:
2517       op0 = TREE_OPERAND (expr, 0);
2518       op0 = strip_offset_1 (op0, true, true, &off0);
2519       *offset += off0;
2520
2521       if (op0 == TREE_OPERAND (expr, 0))
2522         return orig_expr;
2523
2524       expr = build_fold_addr_expr (op0);
2525       return fold_convert (orig_type, expr);
2526
2527     case MEM_REF:
2528       /* ???  Offset operand?  */
2529       inside_addr = false;
2530       break;
2531
2532     default:
2533       return orig_expr;
2534     }
2535
2536   /* Default handling of expressions for that we want to recurse into
2537      the first operand.  */
2538   op0 = TREE_OPERAND (expr, 0);
2539   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2540   *offset += off0;
2541
2542   if (op0 == TREE_OPERAND (expr, 0)
2543       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2544     return orig_expr;
2545
2546   expr = copy_node (expr);
2547   TREE_OPERAND (expr, 0) = op0;
2548   if (op1)
2549     TREE_OPERAND (expr, 1) = op1;
2550
2551   /* Inside address, we might strip the top level component references,
2552      thus changing type of the expression.  Handling of ADDR_EXPR
2553      will fix that.  */
2554   expr = fold_convert (orig_type, expr);
2555
2556   return expr;
2557 }
2558
2559 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2560
2561 static tree
2562 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2563 {
2564   HOST_WIDE_INT off;
2565   tree core = strip_offset_1 (expr, false, false, &off);
2566   *offset = off;
2567   return core;
2568 }
2569
2570 /* Returns variant of TYPE that can be used as base for different uses.
2571    We return unsigned type with the same precision, which avoids problems
2572    with overflows.  */
2573
2574 static tree
2575 generic_type_for (tree type)
2576 {
2577   if (POINTER_TYPE_P (type))
2578     return unsigned_type_for (type);
2579
2580   if (TYPE_UNSIGNED (type))
2581     return type;
2582
2583   return unsigned_type_for (type);
2584 }
2585
2586 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2587    the bitmap to that we should store it.  */
2588
2589 static struct ivopts_data *fd_ivopts_data;
2590 static tree
2591 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2592 {
2593   bitmap *depends_on = (bitmap *) data;
2594   struct version_info *info;
2595
2596   if (TREE_CODE (*expr_p) != SSA_NAME)
2597     return NULL_TREE;
2598   info = name_info (fd_ivopts_data, *expr_p);
2599
2600   if (!info->inv_id || info->has_nonlin_use)
2601     return NULL_TREE;
2602
2603   if (!*depends_on)
2604     *depends_on = BITMAP_ALLOC (NULL);
2605   bitmap_set_bit (*depends_on, info->inv_id);
2606
2607   return NULL_TREE;
2608 }
2609
2610 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2611    position to POS.  If USE is not NULL, the candidate is set as related to
2612    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2613    replacement of the final value of the iv by a direct computation.  */
2614
2615 static struct iv_cand *
2616 add_candidate_1 (struct ivopts_data *data,
2617                  tree base, tree step, bool important, enum iv_position pos,
2618                  struct iv_use *use, gimple incremented_at)
2619 {
2620   unsigned i;
2621   struct iv_cand *cand = NULL;
2622   tree type, orig_type;
2623
2624   /* For non-original variables, make sure their values are computed in a type
2625      that does not invoke undefined behavior on overflows (since in general,
2626      we cannot prove that these induction variables are non-wrapping).  */
2627   if (pos != IP_ORIGINAL)
2628     {
2629       orig_type = TREE_TYPE (base);
2630       type = generic_type_for (orig_type);
2631       if (type != orig_type)
2632         {
2633           base = fold_convert (type, base);
2634           step = fold_convert (type, step);
2635         }
2636     }
2637
2638   for (i = 0; i < n_iv_cands (data); i++)
2639     {
2640       cand = iv_cand (data, i);
2641
2642       if (cand->pos != pos)
2643         continue;
2644
2645       if (cand->incremented_at != incremented_at
2646           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2647               && cand->ainc_use != use))
2648         continue;
2649
2650       if (!cand->iv)
2651         {
2652           if (!base && !step)
2653             break;
2654
2655           continue;
2656         }
2657
2658       if (!base && !step)
2659         continue;
2660
2661       if (operand_equal_p (base, cand->iv->base, 0)
2662           && operand_equal_p (step, cand->iv->step, 0)
2663           && (TYPE_PRECISION (TREE_TYPE (base))
2664               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2665         break;
2666     }
2667
2668   if (i == n_iv_cands (data))
2669     {
2670       cand = XCNEW (struct iv_cand);
2671       cand->id = i;
2672
2673       if (!base && !step)
2674         cand->iv = NULL;
2675       else
2676         cand->iv = alloc_iv (data, base, step);
2677
2678       cand->pos = pos;
2679       if (pos != IP_ORIGINAL && cand->iv)
2680         {
2681           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2682           cand->var_after = cand->var_before;
2683         }
2684       cand->important = important;
2685       cand->incremented_at = incremented_at;
2686       data->iv_candidates.safe_push (cand);
2687
2688       if (step
2689           && TREE_CODE (step) != INTEGER_CST)
2690         {
2691           fd_ivopts_data = data;
2692           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2693         }
2694
2695       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2696         cand->ainc_use = use;
2697       else
2698         cand->ainc_use = NULL;
2699
2700       if (dump_file && (dump_flags & TDF_DETAILS))
2701         dump_cand (dump_file, cand);
2702     }
2703
2704   if (important && !cand->important)
2705     {
2706       cand->important = true;
2707       if (dump_file && (dump_flags & TDF_DETAILS))
2708         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2709     }
2710
2711   if (use)
2712     {
2713       bitmap_set_bit (use->related_cands, i);
2714       if (dump_file && (dump_flags & TDF_DETAILS))
2715         fprintf (dump_file, "Candidate %d is related to use %d\n",
2716                  cand->id, use->id);
2717     }
2718
2719   return cand;
2720 }
2721
2722 /* Returns true if incrementing the induction variable at the end of the LOOP
2723    is allowed.
2724
2725    The purpose is to avoid splitting latch edge with a biv increment, thus
2726    creating a jump, possibly confusing other optimization passes and leaving
2727    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2728    is not available (so we do not have a better alternative), or if the latch
2729    edge is already nonempty.  */
2730
2731 static bool
2732 allow_ip_end_pos_p (struct loop *loop)
2733 {
2734   if (!ip_normal_pos (loop))
2735     return true;
2736
2737   if (!empty_block_p (ip_end_pos (loop)))
2738     return true;
2739
2740   return false;
2741 }
2742
2743 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2744    Important field is set to IMPORTANT.  */
2745
2746 static void
2747 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2748                         bool important, struct iv_use *use)
2749 {
2750   basic_block use_bb = gimple_bb (use->stmt);
2751   machine_mode mem_mode;
2752   unsigned HOST_WIDE_INT cstepi;
2753
2754   /* If we insert the increment in any position other than the standard
2755      ones, we must ensure that it is incremented once per iteration.
2756      It must not be in an inner nested loop, or one side of an if
2757      statement.  */
2758   if (use_bb->loop_father != data->current_loop
2759       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2760       || stmt_could_throw_p (use->stmt)
2761       || !cst_and_fits_in_hwi (step))
2762     return;
2763
2764   cstepi = int_cst_value (step);
2765
2766   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2767   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2768         || USE_STORE_PRE_INCREMENT (mem_mode))
2769        && GET_MODE_SIZE (mem_mode) == cstepi)
2770       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2771            || USE_STORE_PRE_DECREMENT (mem_mode))
2772           && GET_MODE_SIZE (mem_mode) == -cstepi))
2773     {
2774       enum tree_code code = MINUS_EXPR;
2775       tree new_base;
2776       tree new_step = step;
2777
2778       if (POINTER_TYPE_P (TREE_TYPE (base)))
2779         {
2780           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2781           code = POINTER_PLUS_EXPR;
2782         }
2783       else
2784         new_step = fold_convert (TREE_TYPE (base), new_step);
2785       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2786       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2787                        use->stmt);
2788     }
2789   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2790         || USE_STORE_POST_INCREMENT (mem_mode))
2791        && GET_MODE_SIZE (mem_mode) == cstepi)
2792       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2793            || USE_STORE_POST_DECREMENT (mem_mode))
2794           && GET_MODE_SIZE (mem_mode) == -cstepi))
2795     {
2796       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2797                        use->stmt);
2798     }
2799 }
2800
2801 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2802    position to POS.  If USE is not NULL, the candidate is set as related to
2803    it.  The candidate computation is scheduled on all available positions.  */
2804
2805 static void
2806 add_candidate (struct ivopts_data *data,
2807                tree base, tree step, bool important, struct iv_use *use)
2808 {
2809   gcc_assert (use == NULL || use->sub_id == 0);
2810
2811   if (ip_normal_pos (data->current_loop))
2812     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2813   if (ip_end_pos (data->current_loop)
2814       && allow_ip_end_pos_p (data->current_loop))
2815     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2816
2817   if (use != NULL && use->type == USE_ADDRESS)
2818     add_autoinc_candidates (data, base, step, important, use);
2819 }
2820
2821 /* Adds standard iv candidates.  */
2822
2823 static void
2824 add_standard_iv_candidates (struct ivopts_data *data)
2825 {
2826   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2827
2828   /* The same for a double-integer type if it is still fast enough.  */
2829   if (TYPE_PRECISION
2830         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2831       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2832     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2833                    build_int_cst (long_integer_type_node, 1), true, NULL);
2834
2835   /* The same for a double-integer type if it is still fast enough.  */
2836   if (TYPE_PRECISION
2837         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2838       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2839     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2840                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2841 }
2842
2843
2844 /* Adds candidates bases on the old induction variable IV.  */
2845
2846 static void
2847 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2848 {
2849   gimple phi;
2850   tree def;
2851   struct iv_cand *cand;
2852
2853   add_candidate (data, iv->base, iv->step, true, NULL);
2854
2855   /* The same, but with initial value zero.  */
2856   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2857     add_candidate (data, size_int (0), iv->step, true, NULL);
2858   else
2859     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2860                    iv->step, true, NULL);
2861
2862   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2863   if (gimple_code (phi) == GIMPLE_PHI)
2864     {
2865       /* Additionally record the possibility of leaving the original iv
2866          untouched.  */
2867       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2868       /* Don't add candidate if it's from another PHI node because
2869          it's an affine iv appearing in the form of PEELED_CHREC.  */
2870       phi = SSA_NAME_DEF_STMT (def);
2871       if (gimple_code (phi) != GIMPLE_PHI)
2872         {
2873           cand = add_candidate_1 (data,
2874                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2875                                   SSA_NAME_DEF_STMT (def));
2876           cand->var_before = iv->ssa_name;
2877           cand->var_after = def;
2878         }
2879       else
2880         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2881     }
2882 }
2883
2884 /* Adds candidates based on the old induction variables.  */
2885
2886 static void
2887 add_old_ivs_candidates (struct ivopts_data *data)
2888 {
2889   unsigned i;
2890   struct iv *iv;
2891   bitmap_iterator bi;
2892
2893   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2894     {
2895       iv = ver_info (data, i)->iv;
2896       if (iv && iv->biv_p && !integer_zerop (iv->step))
2897         add_old_iv_candidates (data, iv);
2898     }
2899 }
2900
2901 /* Adds candidates based on the value of the induction variable IV and USE.  */
2902
2903 static void
2904 add_iv_value_candidates (struct ivopts_data *data,
2905                          struct iv *iv, struct iv_use *use)
2906 {
2907   unsigned HOST_WIDE_INT offset;
2908   tree base;
2909   tree basetype;
2910
2911   add_candidate (data, iv->base, iv->step, false, use);
2912
2913   /* The same, but with initial value zero.  Make such variable important,
2914      since it is generic enough so that possibly many uses may be based
2915      on it.  */
2916   basetype = TREE_TYPE (iv->base);
2917   if (POINTER_TYPE_P (basetype))
2918     basetype = sizetype;
2919   add_candidate (data, build_int_cst (basetype, 0),
2920                  iv->step, true, use);
2921
2922   /* Third, try removing the constant offset.  Make sure to even
2923      add a candidate for &a[0] vs. (T *)&a.  */
2924   base = strip_offset (iv->base, &offset);
2925   if (offset
2926       || base != iv->base)
2927     add_candidate (data, base, iv->step, false, use);
2928 }
2929
2930 /* Adds candidates based on the uses.  */
2931
2932 static void
2933 add_derived_ivs_candidates (struct ivopts_data *data)
2934 {
2935   unsigned i;
2936
2937   for (i = 0; i < n_iv_uses (data); i++)
2938     {
2939       struct iv_use *use = iv_use (data, i);
2940
2941       if (!use)
2942         continue;
2943
2944       switch (use->type)
2945         {
2946         case USE_NONLINEAR_EXPR:
2947         case USE_COMPARE:
2948         case USE_ADDRESS:
2949           /* Just add the ivs based on the value of the iv used here.  */
2950           add_iv_value_candidates (data, use->iv, use);
2951           break;
2952
2953         default:
2954           gcc_unreachable ();
2955         }
2956     }
2957 }
2958
2959 /* Record important candidates and add them to related_cands bitmaps
2960    if needed.  */
2961
2962 static void
2963 record_important_candidates (struct ivopts_data *data)
2964 {
2965   unsigned i;
2966   struct iv_use *use;
2967
2968   for (i = 0; i < n_iv_cands (data); i++)
2969     {
2970       struct iv_cand *cand = iv_cand (data, i);
2971
2972       if (cand->important)
2973         bitmap_set_bit (data->important_candidates, i);
2974     }
2975
2976   data->consider_all_candidates = (n_iv_cands (data)
2977                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2978
2979   if (data->consider_all_candidates)
2980     {
2981       /* We will not need "related_cands" bitmaps in this case,
2982          so release them to decrease peak memory consumption.  */
2983       for (i = 0; i < n_iv_uses (data); i++)
2984         {
2985           use = iv_use (data, i);
2986           BITMAP_FREE (use->related_cands);
2987         }
2988     }
2989   else
2990     {
2991       /* Add important candidates to the related_cands bitmaps.  */
2992       for (i = 0; i < n_iv_uses (data); i++)
2993         bitmap_ior_into (iv_use (data, i)->related_cands,
2994                          data->important_candidates);
2995     }
2996 }
2997
2998 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2999    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3000    we allocate a simple list to every use.  */
3001
3002 static void
3003 alloc_use_cost_map (struct ivopts_data *data)
3004 {
3005   unsigned i, size, s;
3006
3007   for (i = 0; i < n_iv_uses (data); i++)
3008     {
3009       struct iv_use *use = iv_use (data, i);
3010
3011       if (data->consider_all_candidates)
3012         size = n_iv_cands (data);
3013       else
3014         {
3015           s = bitmap_count_bits (use->related_cands);
3016
3017           /* Round up to the power of two, so that moduling by it is fast.  */
3018           size = s ? (1 << ceil_log2 (s)) : 1;
3019         }
3020
3021       use->n_map_members = size;
3022       use->cost_map = XCNEWVEC (struct cost_pair, size);
3023     }
3024 }
3025
3026 /* Returns description of computation cost of expression whose runtime
3027    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3028
3029 static comp_cost
3030 new_cost (unsigned runtime, unsigned complexity)
3031 {
3032   comp_cost cost;
3033
3034   cost.cost = runtime;
3035   cost.complexity = complexity;
3036
3037   return cost;
3038 }
3039
3040 /* Returns true if COST is infinite.  */
3041
3042 static bool
3043 infinite_cost_p (comp_cost cost)
3044 {
3045   return cost.cost == INFTY;
3046 }
3047
3048 /* Adds costs COST1 and COST2.  */
3049
3050 static comp_cost
3051 add_costs (comp_cost cost1, comp_cost cost2)
3052 {
3053   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3054     return infinite_cost;
3055
3056   cost1.cost += cost2.cost;
3057   cost1.complexity += cost2.complexity;
3058
3059   return cost1;
3060 }
3061 /* Subtracts costs COST1 and COST2.  */
3062
3063 static comp_cost
3064 sub_costs (comp_cost cost1, comp_cost cost2)
3065 {
3066   cost1.cost -= cost2.cost;
3067   cost1.complexity -= cost2.complexity;
3068
3069   return cost1;
3070 }
3071
3072 /* Returns a negative number if COST1 < COST2, a positive number if
3073    COST1 > COST2, and 0 if COST1 = COST2.  */
3074
3075 static int
3076 compare_costs (comp_cost cost1, comp_cost cost2)
3077 {
3078   if (cost1.cost == cost2.cost)
3079     return cost1.complexity - cost2.complexity;
3080
3081   return cost1.cost - cost2.cost;
3082 }
3083
3084 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3085    on invariants DEPENDS_ON and that the value used in expressing it
3086    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3087
3088 static void
3089 set_use_iv_cost (struct ivopts_data *data,
3090                  struct iv_use *use, struct iv_cand *cand,
3091                  comp_cost cost, bitmap depends_on, tree value,
3092                  enum tree_code comp, int inv_expr_id)
3093 {
3094   unsigned i, s;
3095
3096   if (infinite_cost_p (cost))
3097     {
3098       BITMAP_FREE (depends_on);
3099       return;
3100     }
3101
3102   if (data->consider_all_candidates)
3103     {
3104       use->cost_map[cand->id].cand = cand;
3105       use->cost_map[cand->id].cost = cost;
3106       use->cost_map[cand->id].depends_on = depends_on;
3107       use->cost_map[cand->id].value = value;
3108       use->cost_map[cand->id].comp = comp;
3109       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3110       return;
3111     }
3112
3113   /* n_map_members is a power of two, so this computes modulo.  */
3114   s = cand->id & (use->n_map_members - 1);
3115   for (i = s; i < use->n_map_members; i++)
3116     if (!use->cost_map[i].cand)
3117       goto found;
3118   for (i = 0; i < s; i++)
3119     if (!use->cost_map[i].cand)
3120       goto found;
3121
3122   gcc_unreachable ();
3123
3124 found:
3125   use->cost_map[i].cand = cand;
3126   use->cost_map[i].cost = cost;
3127   use->cost_map[i].depends_on = depends_on;
3128   use->cost_map[i].value = value;
3129   use->cost_map[i].comp = comp;
3130   use->cost_map[i].inv_expr_id = inv_expr_id;
3131 }
3132
3133 /* Gets cost of (USE, CANDIDATE) pair.  */
3134
3135 static struct cost_pair *
3136 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3137                  struct iv_cand *cand)
3138 {
3139   unsigned i, s;
3140   struct cost_pair *ret;
3141
3142   if (!cand)
3143     return NULL;
3144
3145   if (data->consider_all_candidates)
3146     {
3147       ret = use->cost_map + cand->id;
3148       if (!ret->cand)
3149         return NULL;
3150
3151       return ret;
3152     }
3153
3154   /* n_map_members is a power of two, so this computes modulo.  */
3155   s = cand->id & (use->n_map_members - 1);
3156   for (i = s; i < use->n_map_members; i++)
3157     if (use->cost_map[i].cand == cand)
3158       return use->cost_map + i;
3159     else if (use->cost_map[i].cand == NULL)
3160       return NULL;
3161   for (i = 0; i < s; i++)
3162     if (use->cost_map[i].cand == cand)
3163       return use->cost_map + i;
3164     else if (use->cost_map[i].cand == NULL)
3165       return NULL;
3166
3167   return NULL;
3168 }
3169
3170 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3171 static rtx
3172 produce_memory_decl_rtl (tree obj, int *regno)
3173 {
3174   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3175   machine_mode address_mode = targetm.addr_space.address_mode (as);
3176   rtx x;
3177
3178   gcc_assert (obj);
3179   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3180     {
3181       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3182       x = gen_rtx_SYMBOL_REF (address_mode, name);
3183       SET_SYMBOL_REF_DECL (x, obj);
3184       x = gen_rtx_MEM (DECL_MODE (obj), x);
3185       set_mem_addr_space (x, as);
3186       targetm.encode_section_info (obj, x, true);
3187     }
3188   else
3189     {
3190       x = gen_raw_REG (address_mode, (*regno)++);
3191       x = gen_rtx_MEM (DECL_MODE (obj), x);
3192       set_mem_addr_space (x, as);
3193     }
3194
3195   return x;
3196 }
3197
3198 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3199    walk_tree.  DATA contains the actual fake register number.  */
3200
3201 static tree
3202 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3203 {
3204   tree obj = NULL_TREE;
3205   rtx x = NULL_RTX;
3206   int *regno = (int *) data;
3207
3208   switch (TREE_CODE (*expr_p))
3209     {
3210     case ADDR_EXPR:
3211       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3212            handled_component_p (*expr_p);
3213            expr_p = &TREE_OPERAND (*expr_p, 0))
3214         continue;
3215       obj = *expr_p;
3216       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3217         x = produce_memory_decl_rtl (obj, regno);
3218       break;
3219
3220     case SSA_NAME:
3221       *ws = 0;
3222       obj = SSA_NAME_VAR (*expr_p);
3223       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3224       if (!obj)
3225         return NULL_TREE;
3226       if (!DECL_RTL_SET_P (obj))
3227         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3228       break;
3229
3230     case VAR_DECL:
3231     case PARM_DECL:
3232     case RESULT_DECL:
3233       *ws = 0;
3234       obj = *expr_p;
3235
3236       if (DECL_RTL_SET_P (obj))
3237         break;
3238
3239       if (DECL_MODE (obj) == BLKmode)
3240         x = produce_memory_decl_rtl (obj, regno);
3241       else
3242         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3243
3244       break;
3245
3246     default:
3247       break;
3248     }
3249
3250   if (x)
3251     {
3252       decl_rtl_to_reset.safe_push (obj);
3253       SET_DECL_RTL (obj, x);
3254     }
3255
3256   return NULL_TREE;
3257 }
3258
3259 /* Determines cost of the computation of EXPR.  */
3260
3261 static unsigned
3262 computation_cost (tree expr, bool speed)
3263 {
3264   rtx_insn *seq;
3265   rtx rslt;
3266   tree type = TREE_TYPE (expr);
3267   unsigned cost;
3268   /* Avoid using hard regs in ways which may be unsupported.  */
3269   int regno = LAST_VIRTUAL_REGISTER + 1;
3270   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3271   enum node_frequency real_frequency = node->frequency;
3272
3273   node->frequency = NODE_FREQUENCY_NORMAL;
3274   crtl->maybe_hot_insn_p = speed;
3275   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3276   start_sequence ();
3277   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3278   seq = get_insns ();
3279   end_sequence ();
3280   default_rtl_profile ();
3281   node->frequency = real_frequency;
3282
3283   cost = seq_cost (seq, speed);
3284   if (MEM_P (rslt))
3285     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3286                           TYPE_ADDR_SPACE (type), speed);
3287   else if (!REG_P (rslt))
3288     cost += set_src_cost (rslt, speed);
3289
3290   return cost;
3291 }
3292
3293 /* Returns variable containing the value of candidate CAND at statement AT.  */
3294
3295 static tree
3296 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3297 {
3298   if (stmt_after_increment (loop, cand, stmt))
3299     return cand->var_after;
3300   else
3301     return cand->var_before;
3302 }
3303
3304 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3305    same precision that is at least as wide as the precision of TYPE, stores
3306    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3307    type of A and B.  */
3308
3309 static tree
3310 determine_common_wider_type (tree *a, tree *b)
3311 {
3312   tree wider_type = NULL;
3313   tree suba, subb;
3314   tree atype = TREE_TYPE (*a);
3315
3316   if (CONVERT_EXPR_P (*a))
3317     {
3318       suba = TREE_OPERAND (*a, 0);
3319       wider_type = TREE_TYPE (suba);
3320       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3321         return atype;
3322     }
3323   else
3324     return atype;
3325
3326   if (CONVERT_EXPR_P (*b))
3327     {
3328       subb = TREE_OPERAND (*b, 0);
3329       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3330         return atype;
3331     }
3332   else
3333     return atype;
3334
3335   *a = suba;
3336   *b = subb;
3337   return wider_type;
3338 }
3339
3340 /* Determines the expression by that USE is expressed from induction variable
3341    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3342    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3343
3344 static bool
3345 get_computation_aff (struct loop *loop,
3346                      struct iv_use *use, struct iv_cand *cand, gimple at,
3347                      struct aff_tree *aff)
3348 {
3349   tree ubase = use->iv->base;
3350   tree ustep = use->iv->step;
3351   tree cbase = cand->iv->base;
3352   tree cstep = cand->iv->step, cstep_common;
3353   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3354   tree common_type, var;
3355   tree uutype;
3356   aff_tree cbase_aff, var_aff;
3357   widest_int rat;
3358
3359   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3360     {
3361       /* We do not have a precision to express the values of use.  */
3362       return false;
3363     }
3364
3365   var = var_at_stmt (loop, cand, at);
3366   uutype = unsigned_type_for (utype);
3367
3368   /* If the conversion is not noop, perform it.  */
3369   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3370     {
3371       cstep = fold_convert (uutype, cstep);
3372       cbase = fold_convert (uutype, cbase);
3373       var = fold_convert (uutype, var);
3374     }
3375
3376   if (!constant_multiple_of (ustep, cstep, &rat))
3377     return false;
3378
3379   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3380      type, we achieve better folding by computing their difference in this
3381      wider type, and cast the result to UUTYPE.  We do not need to worry about
3382      overflows, as all the arithmetics will in the end be performed in UUTYPE
3383      anyway.  */
3384   common_type = determine_common_wider_type (&ubase, &cbase);
3385
3386   /* use = ubase - ratio * cbase + ratio * var.  */
3387   tree_to_aff_combination (ubase, common_type, aff);
3388   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3389   tree_to_aff_combination (var, uutype, &var_aff);
3390
3391   /* We need to shift the value if we are after the increment.  */
3392   if (stmt_after_increment (loop, cand, at))
3393     {
3394       aff_tree cstep_aff;
3395
3396       if (common_type != uutype)
3397         cstep_common = fold_convert (common_type, cstep);
3398       else
3399         cstep_common = cstep;
3400
3401       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3402       aff_combination_add (&cbase_aff, &cstep_aff);
3403     }
3404
3405   aff_combination_scale (&cbase_aff, -rat);
3406   aff_combination_add (aff, &cbase_aff);
3407   if (common_type != uutype)
3408     aff_combination_convert (aff, uutype);
3409
3410   aff_combination_scale (&var_aff, rat);
3411   aff_combination_add (aff, &var_aff);
3412
3413   return true;
3414 }
3415
3416 /* Return the type of USE.  */
3417
3418 static tree
3419 get_use_type (struct iv_use *use)
3420 {
3421   tree base_type = TREE_TYPE (use->iv->base);
3422   tree type;
3423
3424   if (use->type == USE_ADDRESS)
3425     {
3426       /* The base_type may be a void pointer.  Create a pointer type based on
3427          the mem_ref instead.  */
3428       type = build_pointer_type (TREE_TYPE (*use->op_p));
3429       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3430                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3431     }
3432   else
3433     type = base_type;
3434
3435   return type;
3436 }
3437
3438 /* Determines the expression by that USE is expressed from induction variable
3439    CAND at statement AT in LOOP.  The computation is unshared.  */
3440
3441 static tree
3442 get_computation_at (struct loop *loop,
3443                     struct iv_use *use, struct iv_cand *cand, gimple at)
3444 {
3445   aff_tree aff;
3446   tree type = get_use_type (use);
3447
3448   if (!get_computation_aff (loop, use, cand, at, &aff))
3449     return NULL_TREE;
3450   unshare_aff_combination (&aff);
3451   return fold_convert (type, aff_combination_to_tree (&aff));
3452 }
3453
3454 /* Determines the expression by that USE is expressed from induction variable
3455    CAND in LOOP.  The computation is unshared.  */
3456
3457 static tree
3458 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3459 {
3460   return get_computation_at (loop, use, cand, use->stmt);
3461 }
3462
3463 /* Adjust the cost COST for being in loop setup rather than loop body.
3464    If we're optimizing for space, the loop setup overhead is constant;
3465    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3466 static unsigned
3467 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3468 {
3469   if (cost == INFTY)
3470     return cost;
3471   else if (optimize_loop_for_speed_p (data->current_loop))
3472     return cost / avg_loop_niter (data->current_loop);
3473   else
3474     return cost;
3475 }
3476
3477 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3478    validity for a memory reference accessing memory of mode MODE in
3479    address space AS.  */
3480
3481
3482 bool
3483 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3484                                  addr_space_t as)
3485 {
3486 #define MAX_RATIO 128
3487   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3488   static vec<sbitmap> valid_mult_list;
3489   sbitmap valid_mult;
3490
3491   if (data_index >= valid_mult_list.length ())
3492     valid_mult_list.safe_grow_cleared (data_index + 1);
3493
3494   valid_mult = valid_mult_list[data_index];
3495   if (!valid_mult)
3496     {
3497       machine_mode address_mode = targetm.addr_space.address_mode (as);
3498       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3499       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3500       rtx addr, scaled;
3501       HOST_WIDE_INT i;
3502
3503       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3504       bitmap_clear (valid_mult);
3505       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3506       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3507       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3508         {
3509           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3510           if (memory_address_addr_space_p (mode, addr, as)
3511               || memory_address_addr_space_p (mode, scaled, as))
3512             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3513         }
3514
3515       if (dump_file && (dump_flags & TDF_DETAILS))
3516         {
3517           fprintf (dump_file, "  allowed multipliers:");
3518           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3519             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3520               fprintf (dump_file, " %d", (int) i);
3521           fprintf (dump_file, "\n");
3522           fprintf (dump_file, "\n");
3523         }
3524
3525       valid_mult_list[data_index] = valid_mult;
3526     }
3527
3528   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3529     return false;
3530
3531   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3532 }
3533
3534 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3535    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3536    variable is omitted.  Compute the cost for a memory reference that accesses
3537    a memory location of mode MEM_MODE in address space AS.
3538
3539    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3540    size of MEM_MODE / RATIO) is available.  To make this determination, we
3541    look at the size of the increment to be made, which is given in CSTEP.
3542    CSTEP may be zero if the step is unknown.
3543    STMT_AFTER_INC is true iff the statement we're looking at is after the
3544    increment of the original biv.
3545
3546    TODO -- there must be some better way.  This all is quite crude.  */
3547
3548 enum ainc_type
3549 {
3550   AINC_PRE_INC,         /* Pre increment.  */
3551   AINC_PRE_DEC,         /* Pre decrement.  */
3552   AINC_POST_INC,        /* Post increment.  */
3553   AINC_POST_DEC,        /* Post decrement.  */
3554   AINC_NONE             /* Also the number of auto increment types.  */
3555 };
3556
3557 typedef struct address_cost_data_s
3558 {
3559   HOST_WIDE_INT min_offset, max_offset;
3560   unsigned costs[2][2][2][2];
3561   unsigned ainc_costs[AINC_NONE];
3562 } *address_cost_data;
3563
3564
3565 static comp_cost
3566 get_address_cost (bool symbol_present, bool var_present,
3567                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3568                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3569                   addr_space_t as, bool speed,
3570                   bool stmt_after_inc, bool *may_autoinc)
3571 {
3572   machine_mode address_mode = targetm.addr_space.address_mode (as);
3573   static vec<address_cost_data> address_cost_data_list;
3574   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3575   address_cost_data data;
3576   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3577   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3578   unsigned cost, acost, complexity;
3579   enum ainc_type autoinc_type;
3580   bool offset_p, ratio_p, autoinc;
3581   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3582   unsigned HOST_WIDE_INT mask;
3583   unsigned bits;
3584
3585   if (data_index >= address_cost_data_list.length ())
3586     address_cost_data_list.safe_grow_cleared (data_index + 1);
3587
3588   data = address_cost_data_list[data_index];
3589   if (!data)
3590     {
3591       HOST_WIDE_INT i;
3592       HOST_WIDE_INT rat, off = 0;
3593       int old_cse_not_expected, width;
3594       unsigned sym_p, var_p, off_p, rat_p, add_c;
3595       rtx_insn *seq;
3596       rtx addr, base;
3597       rtx reg0, reg1;
3598
3599       data = (address_cost_data) xcalloc (1, sizeof (*data));
3600
3601       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3602
3603       width = GET_MODE_BITSIZE (address_mode) - 1;
3604       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3605         width = HOST_BITS_PER_WIDE_INT - 1;
3606       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3607
3608       for (i = width; i >= 0; i--)
3609         {
3610           off = -((unsigned HOST_WIDE_INT) 1 << i);
3611           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3612           if (memory_address_addr_space_p (mem_mode, addr, as))
3613             break;
3614         }
3615       data->min_offset = (i == -1? 0 : off);
3616
3617       for (i = width; i >= 0; i--)
3618         {
3619           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3620           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3621           if (memory_address_addr_space_p (mem_mode, addr, as))
3622             break;
3623           /* For some strict-alignment targets, the offset must be naturally
3624              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3625           off = mem_mode != QImode
3626                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3627                     - GET_MODE_SIZE (mem_mode)
3628                 : 0;
3629           if (off > 0)
3630             {
3631               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3632               if (memory_address_addr_space_p (mem_mode, addr, as))
3633                 break;
3634             }
3635         }
3636       if (i == -1)
3637         off = 0;
3638       data->max_offset = off;
3639
3640       if (dump_file && (dump_flags & TDF_DETAILS))
3641         {
3642           fprintf (dump_file, "get_address_cost:\n");
3643           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3644                    GET_MODE_NAME (mem_mode),
3645                    data->min_offset);
3646           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3647                    GET_MODE_NAME (mem_mode),
3648                    data->max_offset);
3649         }
3650
3651       rat = 1;
3652       for (i = 2; i <= MAX_RATIO; i++)
3653         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3654           {
3655             rat = i;
3656             break;
3657           }
3658
3659       /* Compute the cost of various addressing modes.  */
3660       acost = 0;
3661       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3662       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3663
3664       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3665           || USE_STORE_PRE_DECREMENT (mem_mode))
3666         {
3667           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3668           has_predec[mem_mode]
3669             = memory_address_addr_space_p (mem_mode, addr, as);
3670
3671           if (has_predec[mem_mode])
3672             data->ainc_costs[AINC_PRE_DEC]
3673               = address_cost (addr, mem_mode, as, speed);
3674         }
3675       if (USE_LOAD_POST_DECREMENT (mem_mode)
3676           || USE_STORE_POST_DECREMENT (mem_mode))
3677         {
3678           addr = gen_rtx_POST_DEC (address_mode, reg0);
3679           has_postdec[mem_mode]
3680             = memory_address_addr_space_p (mem_mode, addr, as);
3681
3682           if (has_postdec[mem_mode])
3683             data->ainc_costs[AINC_POST_DEC]
3684               = address_cost (addr, mem_mode, as, speed);
3685         }
3686       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3687           || USE_STORE_PRE_DECREMENT (mem_mode))
3688         {
3689           addr = gen_rtx_PRE_INC (address_mode, reg0);
3690           has_preinc[mem_mode]
3691             = memory_address_addr_space_p (mem_mode, addr, as);
3692
3693           if (has_preinc[mem_mode])
3694             data->ainc_costs[AINC_PRE_INC]
3695               = address_cost (addr, mem_mode, as, speed);
3696         }
3697       if (USE_LOAD_POST_INCREMENT (mem_mode)
3698           || USE_STORE_POST_INCREMENT (mem_mode))
3699         {
3700           addr = gen_rtx_POST_INC (address_mode, reg0);
3701           has_postinc[mem_mode]
3702             = memory_address_addr_space_p (mem_mode, addr, as);
3703
3704           if (has_postinc[mem_mode])
3705             data->ainc_costs[AINC_POST_INC]
3706               = address_cost (addr, mem_mode, as, speed);
3707         }
3708       for (i = 0; i < 16; i++)
3709         {
3710           sym_p = i & 1;
3711           var_p = (i >> 1) & 1;
3712           off_p = (i >> 2) & 1;
3713           rat_p = (i >> 3) & 1;
3714
3715           addr = reg0;
3716           if (rat_p)
3717             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3718                                    gen_int_mode (rat, address_mode));
3719
3720           if (var_p)
3721             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3722
3723           if (sym_p)
3724             {
3725               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3726               /* ??? We can run into trouble with some backends by presenting
3727                  it with symbols which haven't been properly passed through
3728                  targetm.encode_section_info.  By setting the local bit, we
3729                  enhance the probability of things working.  */
3730               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3731
3732               if (off_p)
3733                 base = gen_rtx_fmt_e (CONST, address_mode,
3734                                       gen_rtx_fmt_ee
3735                                         (PLUS, address_mode, base,
3736                                          gen_int_mode (off, address_mode)));
3737             }
3738           else if (off_p)
3739             base = gen_int_mode (off, address_mode);
3740           else
3741             base = NULL_RTX;
3742
3743           if (base)
3744             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3745
3746           start_sequence ();
3747           /* To avoid splitting addressing modes, pretend that no cse will
3748              follow.  */
3749           old_cse_not_expected = cse_not_expected;
3750           cse_not_expected = true;
3751           addr = memory_address_addr_space (mem_mode, addr, as);
3752           cse_not_expected = old_cse_not_expected;
3753           seq = get_insns ();
3754           end_sequence ();
3755
3756           acost = seq_cost (seq, speed);
3757           acost += address_cost (addr, mem_mode, as, speed);
3758
3759           if (!acost)
3760             acost = 1;
3761           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3762         }
3763
3764       /* On some targets, it is quite expensive to load symbol to a register,
3765          which makes addresses that contain symbols look much more expensive.
3766          However, the symbol will have to be loaded in any case before the
3767          loop (and quite likely we have it in register already), so it does not
3768          make much sense to penalize them too heavily.  So make some final
3769          tweaks for the SYMBOL_PRESENT modes:
3770
3771          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3772          var is cheaper, use this mode with small penalty.
3773          If VAR_PRESENT is true, try whether the mode with
3774          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3775          if this is the case, use it.  */
3776       add_c = add_cost (speed, address_mode);
3777       for (i = 0; i < 8; i++)
3778         {
3779           var_p = i & 1;
3780           off_p = (i >> 1) & 1;
3781           rat_p = (i >> 2) & 1;
3782
3783           acost = data->costs[0][1][off_p][rat_p] + 1;
3784           if (var_p)
3785             acost += add_c;
3786
3787           if (acost < data->costs[1][var_p][off_p][rat_p])
3788             data->costs[1][var_p][off_p][rat_p] = acost;
3789         }
3790
3791       if (dump_file && (dump_flags & TDF_DETAILS))
3792         {
3793           fprintf (dump_file, "Address costs:\n");
3794
3795           for (i = 0; i < 16; i++)
3796             {
3797               sym_p = i & 1;
3798               var_p = (i >> 1) & 1;
3799               off_p = (i >> 2) & 1;
3800               rat_p = (i >> 3) & 1;
3801
3802               fprintf (dump_file, "  ");
3803               if (sym_p)
3804                 fprintf (dump_file, "sym + ");
3805               if (var_p)
3806                 fprintf (dump_file, "var + ");
3807               if (off_p)
3808                 fprintf (dump_file, "cst + ");
3809               if (rat_p)
3810                 fprintf (dump_file, "rat * ");
3811
3812               acost = data->costs[sym_p][var_p][off_p][rat_p];
3813               fprintf (dump_file, "index costs %d\n", acost);
3814             }
3815           if (has_predec[mem_mode] || has_postdec[mem_mode]
3816               || has_preinc[mem_mode] || has_postinc[mem_mode])
3817             fprintf (dump_file, "  May include autoinc/dec\n");
3818           fprintf (dump_file, "\n");
3819         }
3820
3821       address_cost_data_list[data_index] = data;
3822     }
3823
3824   bits = GET_MODE_BITSIZE (address_mode);
3825   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3826   offset &= mask;
3827   if ((offset >> (bits - 1) & 1))
3828     offset |= ~mask;
3829   s_offset = offset;
3830
3831   autoinc = false;
3832   autoinc_type = AINC_NONE;
3833   msize = GET_MODE_SIZE (mem_mode);
3834   autoinc_offset = offset;
3835   if (stmt_after_inc)
3836     autoinc_offset += ratio * cstep;
3837   if (symbol_present || var_present || ratio != 1)
3838     autoinc = false;
3839   else
3840     {
3841       if (has_postinc[mem_mode] && autoinc_offset == 0
3842           && msize == cstep)
3843         autoinc_type = AINC_POST_INC;
3844       else if (has_postdec[mem_mode] && autoinc_offset == 0
3845                && msize == -cstep)
3846         autoinc_type = AINC_POST_DEC;
3847       else if (has_preinc[mem_mode] && autoinc_offset == msize
3848                && msize == cstep)
3849         autoinc_type = AINC_PRE_INC;
3850       else if (has_predec[mem_mode] && autoinc_offset == -msize
3851                && msize == -cstep)
3852         autoinc_type = AINC_PRE_DEC;
3853
3854       if (autoinc_type != AINC_NONE)
3855         autoinc = true;
3856     }
3857
3858   cost = 0;
3859   offset_p = (s_offset != 0
3860               && data->min_offset <= s_offset
3861               && s_offset <= data->max_offset);
3862   ratio_p = (ratio != 1
3863              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3864
3865   if (ratio != 1 && !ratio_p)
3866     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3867
3868   if (s_offset && !offset_p && !symbol_present)
3869     cost += add_cost (speed, address_mode);
3870
3871   if (may_autoinc)
3872     *may_autoinc = autoinc;
3873   if (autoinc)
3874     acost = data->ainc_costs[autoinc_type];
3875   else
3876     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3877   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3878   return new_cost (cost + acost, complexity);
3879 }
3880
3881  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3882     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3883     calculating the operands of EXPR.  Returns true if successful, and returns
3884     the cost in COST.  */
3885
3886 static bool
3887 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3888                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3889 {
3890   comp_cost res;
3891   tree op1 = TREE_OPERAND (expr, 1);
3892   tree cst = TREE_OPERAND (mult, 1);
3893   tree multop = TREE_OPERAND (mult, 0);
3894   int m = exact_log2 (int_cst_value (cst));
3895   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3896   int as_cost, sa_cost;
3897   bool mult_in_op1;
3898
3899   if (!(m >= 0 && m < maxm))
3900     return false;
3901
3902   mult_in_op1 = operand_equal_p (op1, mult, 0);
3903
3904   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3905
3906   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3907      use that in preference to a shift insn followed by an add insn.  */
3908   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3909              ? shiftadd_cost (speed, mode, m)
3910              : (mult_in_op1
3911                 ? shiftsub1_cost (speed, mode, m)
3912                 : shiftsub0_cost (speed, mode, m)));
3913
3914   res = new_cost (MIN (as_cost, sa_cost), 0);
3915   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3916
3917   STRIP_NOPS (multop);
3918   if (!is_gimple_val (multop))
3919     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3920
3921   *cost = res;
3922   return true;
3923 }
3924
3925 /* Estimates cost of forcing expression EXPR into a variable.  */
3926
3927 static comp_cost
3928 force_expr_to_var_cost (tree expr, bool speed)
3929 {
3930   static bool costs_initialized = false;
3931   static unsigned integer_cost [2];
3932   static unsigned symbol_cost [2];
3933   static unsigned address_cost [2];
3934   tree op0, op1;
3935   comp_cost cost0, cost1, cost;
3936   machine_mode mode;
3937
3938   if (!costs_initialized)
3939     {
3940       tree type = build_pointer_type (integer_type_node);
3941       tree var, addr;
3942       rtx x;
3943       int i;
3944
3945       var = create_tmp_var_raw (integer_type_node, "test_var");
3946       TREE_STATIC (var) = 1;
3947       x = produce_memory_decl_rtl (var, NULL);
3948       SET_DECL_RTL (var, x);
3949
3950       addr = build1 (ADDR_EXPR, type, var);
3951
3952
3953       for (i = 0; i < 2; i++)
3954         {
3955           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3956                                                              2000), i);
3957
3958           symbol_cost[i] = computation_cost (addr, i) + 1;
3959
3960           address_cost[i]
3961             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3962           if (dump_file && (dump_flags & TDF_DETAILS))
3963             {
3964               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3965               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3966               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3967               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3968               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3969               fprintf (dump_file, "\n");
3970             }
3971         }
3972
3973       costs_initialized = true;
3974     }
3975
3976   STRIP_NOPS (expr);
3977
3978   if (SSA_VAR_P (expr))
3979     return no_cost;
3980
3981   if (is_gimple_min_invariant (expr))
3982     {
3983       if (TREE_CODE (expr) == INTEGER_CST)
3984         return new_cost (integer_cost [speed], 0);
3985
3986       if (TREE_CODE (expr) == ADDR_EXPR)
3987         {
3988           tree obj = TREE_OPERAND (expr, 0);
3989
3990           if (TREE_CODE (obj) == VAR_DECL
3991               || TREE_CODE (obj) == PARM_DECL
3992               || TREE_CODE (obj) == RESULT_DECL)
3993             return new_cost (symbol_cost [speed], 0);
3994         }
3995
3996       return new_cost (address_cost [speed], 0);
3997     }
3998
3999   switch (TREE_CODE (expr))
4000     {
4001     case POINTER_PLUS_EXPR:
4002     case PLUS_EXPR:
4003     case MINUS_EXPR:
4004     case MULT_EXPR:
4005       op0 = TREE_OPERAND (expr, 0);
4006       op1 = TREE_OPERAND (expr, 1);
4007       STRIP_NOPS (op0);
4008       STRIP_NOPS (op1);
4009       break;
4010
4011     CASE_CONVERT:
4012     case NEGATE_EXPR:
4013       op0 = TREE_OPERAND (expr, 0);
4014       STRIP_NOPS (op0);
4015       op1 = NULL_TREE;
4016       break;
4017
4018     default:
4019       /* Just an arbitrary value, FIXME.  */
4020       return new_cost (target_spill_cost[speed], 0);
4021     }
4022
4023   if (op0 == NULL_TREE
4024       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4025     cost0 = no_cost;
4026   else
4027     cost0 = force_expr_to_var_cost (op0, speed);
4028
4029   if (op1 == NULL_TREE
4030       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4031     cost1 = no_cost;
4032   else
4033     cost1 = force_expr_to_var_cost (op1, speed);
4034
4035   mode = TYPE_MODE (TREE_TYPE (expr));
4036   switch (TREE_CODE (expr))
4037     {
4038     case POINTER_PLUS_EXPR:
4039     case PLUS_EXPR:
4040     case MINUS_EXPR:
4041     case NEGATE_EXPR:
4042       cost = new_cost (add_cost (speed, mode), 0);
4043       if (TREE_CODE (expr) != NEGATE_EXPR)
4044         {
4045           tree mult = NULL_TREE;
4046           comp_cost sa_cost;
4047           if (TREE_CODE (op1) == MULT_EXPR)
4048             mult = op1;
4049           else if (TREE_CODE (op0) == MULT_EXPR)
4050             mult = op0;
4051
4052           if (mult != NULL_TREE
4053               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4054               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4055                                     speed, &sa_cost))
4056             return sa_cost;
4057         }
4058       break;
4059
4060     CASE_CONVERT:
4061       {
4062         tree inner_mode, outer_mode;
4063         outer_mode = TREE_TYPE (expr);
4064         inner_mode = TREE_TYPE (op0);
4065         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4066                                        TYPE_MODE (inner_mode), speed), 0);
4067       }
4068       break;
4069
4070     case MULT_EXPR:
4071       if (cst_and_fits_in_hwi (op0))
4072         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4073                                              mode, speed), 0);
4074       else if (cst_and_fits_in_hwi (op1))
4075         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4076                                              mode, speed), 0);
4077       else
4078         return new_cost (target_spill_cost [speed], 0);
4079       break;
4080
4081     default:
4082       gcc_unreachable ();
4083     }
4084
4085   cost = add_costs (cost, cost0);
4086   cost = add_costs (cost, cost1);
4087
4088   /* Bound the cost by target_spill_cost.  The parts of complicated
4089      computations often are either loop invariant or at least can
4090      be shared between several iv uses, so letting this grow without
4091      limits would not give reasonable results.  */
4092   if (cost.cost > (int) target_spill_cost [speed])
4093     cost.cost = target_spill_cost [speed];
4094
4095   return cost;
4096 }
4097
4098 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4099    invariants the computation depends on.  */
4100
4101 static comp_cost
4102 force_var_cost (struct ivopts_data *data,
4103                 tree expr, bitmap *depends_on)
4104 {
4105   if (depends_on)
4106     {
4107       fd_ivopts_data = data;
4108       walk_tree (&expr, find_depends, depends_on, NULL);
4109     }
4110
4111   return force_expr_to_var_cost (expr, data->speed);
4112 }
4113
4114 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4115    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4116    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4117    invariants the computation depends on.  */
4118
4119 static comp_cost
4120 split_address_cost (struct ivopts_data *data,
4121                     tree addr, bool *symbol_present, bool *var_present,
4122                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4123 {
4124   tree core;
4125   HOST_WIDE_INT bitsize;
4126   HOST_WIDE_INT bitpos;
4127   tree toffset;
4128   machine_mode mode;
4129   int unsignedp, volatilep;
4130
4131   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4132                               &unsignedp, &volatilep, false);
4133
4134   if (toffset != 0
4135       || bitpos % BITS_PER_UNIT != 0
4136       || TREE_CODE (core) != VAR_DECL)
4137     {
4138       *symbol_present = false;
4139       *var_present = true;
4140       fd_ivopts_data = data;
4141       walk_tree (&addr, find_depends, depends_on, NULL);
4142       return new_cost (target_spill_cost[data->speed], 0);
4143     }
4144
4145   *offset += bitpos / BITS_PER_UNIT;
4146   if (TREE_STATIC (core)
4147       || DECL_EXTERNAL (core))
4148     {
4149       *symbol_present = true;
4150       *var_present = false;
4151       return no_cost;
4152     }
4153
4154   *symbol_present = false;
4155   *var_present = true;
4156   return no_cost;
4157 }
4158
4159 /* Estimates cost of expressing difference of addresses E1 - E2 as
4160    var + symbol + offset.  The value of offset is added to OFFSET,
4161    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4162    part is missing.  DEPENDS_ON is a set of the invariants the computation
4163    depends on.  */
4164
4165 static comp_cost
4166 ptr_difference_cost (struct ivopts_data *data,
4167                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4168                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4169 {
4170   HOST_WIDE_INT diff = 0;
4171   aff_tree aff_e1, aff_e2;
4172   tree type;
4173
4174   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4175
4176   if (ptr_difference_const (e1, e2, &diff))
4177     {
4178       *offset += diff;
4179       *symbol_present = false;
4180       *var_present = false;
4181       return no_cost;
4182     }
4183
4184   if (integer_zerop (e2))
4185     return split_address_cost (data, TREE_OPERAND (e1, 0),
4186                                symbol_present, var_present, offset, depends_on);
4187
4188   *symbol_present = false;
4189   *var_present = true;
4190
4191   type = signed_type_for (TREE_TYPE (e1));
4192   tree_to_aff_combination (e1, type, &aff_e1);
4193   tree_to_aff_combination (e2, type, &aff_e2);
4194   aff_combination_scale (&aff_e2, -1);
4195   aff_combination_add (&aff_e1, &aff_e2);
4196
4197   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4198 }
4199
4200 /* Estimates cost of expressing difference E1 - E2 as
4201    var + symbol + offset.  The value of offset is added to OFFSET,
4202    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4203    part is missing.  DEPENDS_ON is a set of the invariants the computation
4204    depends on.  */
4205
4206 static comp_cost
4207 difference_cost (struct ivopts_data *data,
4208                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4209                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4210 {
4211   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4212   unsigned HOST_WIDE_INT off1, off2;
4213   aff_tree aff_e1, aff_e2;
4214   tree type;
4215
4216   e1 = strip_offset (e1, &off1);
4217   e2 = strip_offset (e2, &off2);
4218   *offset += off1 - off2;
4219
4220   STRIP_NOPS (e1);
4221   STRIP_NOPS (e2);
4222
4223   if (TREE_CODE (e1) == ADDR_EXPR)
4224     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4225                                 offset, depends_on);
4226   *symbol_present = false;
4227
4228   if (operand_equal_p (e1, e2, 0))
4229     {
4230       *var_present = false;
4231       return no_cost;
4232     }
4233
4234   *var_present = true;
4235
4236   if (integer_zerop (e2))
4237     return force_var_cost (data, e1, depends_on);
4238
4239   if (integer_zerop (e1))
4240     {
4241       comp_cost cost = force_var_cost (data, e2, depends_on);
4242       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4243       return cost;
4244     }
4245
4246   type = signed_type_for (TREE_TYPE (e1));
4247   tree_to_aff_combination (e1, type, &aff_e1);
4248   tree_to_aff_combination (e2, type, &aff_e2);
4249   aff_combination_scale (&aff_e2, -1);
4250   aff_combination_add (&aff_e1, &aff_e2);
4251
4252   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4253 }
4254
4255 /* Returns true if AFF1 and AFF2 are identical.  */
4256
4257 static bool
4258 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4259 {
4260   unsigned i;
4261
4262   if (aff1->n != aff2->n)
4263     return false;
4264
4265   for (i = 0; i < aff1->n; i++)
4266     {
4267       if (aff1->elts[i].coef != aff2->elts[i].coef)
4268         return false;
4269
4270       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4271         return false;
4272     }
4273   return true;
4274 }
4275
4276 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4277
4278 static int
4279 get_expr_id (struct ivopts_data *data, tree expr)
4280 {
4281   struct iv_inv_expr_ent ent;
4282   struct iv_inv_expr_ent **slot;
4283
4284   ent.expr = expr;
4285   ent.hash = iterative_hash_expr (expr, 0);
4286   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4287   if (*slot)
4288     return (*slot)->id;
4289
4290   *slot = XNEW (struct iv_inv_expr_ent);
4291   (*slot)->expr = expr;
4292   (*slot)->hash = ent.hash;
4293   (*slot)->id = data->inv_expr_id++;
4294   return (*slot)->id;
4295 }
4296
4297 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4298    requires a new compiler generated temporary.  Returns -1 otherwise.
4299    ADDRESS_P is a flag indicating if the expression is for address
4300    computation.  */
4301
4302 static int
4303 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4304                             tree cbase, HOST_WIDE_INT ratio,
4305                             bool address_p)
4306 {
4307   aff_tree ubase_aff, cbase_aff;
4308   tree expr, ub, cb;
4309
4310   STRIP_NOPS (ubase);
4311   STRIP_NOPS (cbase);
4312   ub = ubase;
4313   cb = cbase;
4314
4315   if ((TREE_CODE (ubase) == INTEGER_CST)
4316       && (TREE_CODE (cbase) == INTEGER_CST))
4317     return -1;
4318
4319   /* Strips the constant part. */
4320   if (TREE_CODE (ubase) == PLUS_EXPR
4321       || TREE_CODE (ubase) == MINUS_EXPR
4322       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4323     {
4324       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4325         ubase = TREE_OPERAND (ubase, 0);
4326     }
4327
4328   /* Strips the constant part. */
4329   if (TREE_CODE (cbase) == PLUS_EXPR
4330       || TREE_CODE (cbase) == MINUS_EXPR
4331       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4332     {
4333       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4334         cbase = TREE_OPERAND (cbase, 0);
4335     }
4336
4337   if (address_p)
4338     {
4339       if (((TREE_CODE (ubase) == SSA_NAME)
4340            || (TREE_CODE (ubase) == ADDR_EXPR
4341                && is_gimple_min_invariant (ubase)))
4342           && (TREE_CODE (cbase) == INTEGER_CST))
4343         return -1;
4344
4345       if (((TREE_CODE (cbase) == SSA_NAME)
4346            || (TREE_CODE (cbase) == ADDR_EXPR
4347                && is_gimple_min_invariant (cbase)))
4348           && (TREE_CODE (ubase) == INTEGER_CST))
4349         return -1;
4350     }
4351
4352   if (ratio == 1)
4353     {
4354       if (operand_equal_p (ubase, cbase, 0))
4355         return -1;
4356
4357       if (TREE_CODE (ubase) == ADDR_EXPR
4358           && TREE_CODE (cbase) == ADDR_EXPR)
4359         {
4360           tree usym, csym;
4361
4362           usym = TREE_OPERAND (ubase, 0);
4363           csym = TREE_OPERAND (cbase, 0);
4364           if (TREE_CODE (usym) == ARRAY_REF)
4365             {
4366               tree ind = TREE_OPERAND (usym, 1);
4367               if (TREE_CODE (ind) == INTEGER_CST
4368                   && tree_fits_shwi_p (ind)
4369                   && tree_to_shwi (ind) == 0)
4370                 usym = TREE_OPERAND (usym, 0);
4371             }
4372           if (TREE_CODE (csym) == ARRAY_REF)
4373             {
4374               tree ind = TREE_OPERAND (csym, 1);
4375               if (TREE_CODE (ind) == INTEGER_CST
4376                   && tree_fits_shwi_p (ind)
4377                   && tree_to_shwi (ind) == 0)
4378                 csym = TREE_OPERAND (csym, 0);
4379             }
4380           if (operand_equal_p (usym, csym, 0))
4381             return -1;
4382         }
4383       /* Now do more complex comparison  */
4384       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4385       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4386       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4387         return -1;
4388     }
4389
4390   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4391   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4392
4393   aff_combination_scale (&cbase_aff, -1 * ratio);
4394   aff_combination_add (&ubase_aff, &cbase_aff);
4395   expr = aff_combination_to_tree (&ubase_aff);
4396   return get_expr_id (data, expr);
4397 }
4398
4399
4400
4401 /* Determines the cost of the computation by that USE is expressed
4402    from induction variable CAND.  If ADDRESS_P is true, we just need
4403    to create an address from it, otherwise we want to get it into
4404    register.  A set of invariants we depend on is stored in
4405    DEPENDS_ON.  AT is the statement at that the value is computed.
4406    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4407    addressing is likely.  */
4408
4409 static comp_cost
4410 get_computation_cost_at (struct ivopts_data *data,
4411                          struct iv_use *use, struct iv_cand *cand,
4412                          bool address_p, bitmap *depends_on, gimple at,
4413                          bool *can_autoinc,
4414                          int *inv_expr_id)
4415 {
4416   tree ubase = use->iv->base, ustep = use->iv->step;
4417   tree cbase, cstep;
4418   tree utype = TREE_TYPE (ubase), ctype;
4419   unsigned HOST_WIDE_INT cstepi, offset = 0;
4420   HOST_WIDE_INT ratio, aratio;
4421   bool var_present, symbol_present, stmt_is_after_inc;
4422   comp_cost cost;
4423   widest_int rat;
4424   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4425   machine_mode mem_mode = (address_p
4426                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4427                                 : VOIDmode);
4428
4429   *depends_on = NULL;
4430
4431   /* Only consider real candidates.  */
4432   if (!cand->iv)
4433     return infinite_cost;
4434
4435   cbase = cand->iv->base;
4436   cstep = cand->iv->step;
4437   ctype = TREE_TYPE (cbase);
4438
4439   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4440     {
4441       /* We do not have a precision to express the values of use.  */
4442       return infinite_cost;
4443     }
4444
4445   if (address_p
4446       || (use->iv->base_object
4447           && cand->iv->base_object
4448           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4449           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4450     {
4451       /* Do not try to express address of an object with computation based
4452          on address of a different object.  This may cause problems in rtl
4453          level alias analysis (that does not expect this to be happening,
4454          as this is illegal in C), and would be unlikely to be useful
4455          anyway.  */
4456       if (use->iv->base_object
4457           && cand->iv->base_object
4458           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4459         return infinite_cost;
4460     }
4461
4462   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4463     {
4464       /* TODO -- add direct handling of this case.  */
4465       goto fallback;
4466     }
4467
4468   /* CSTEPI is removed from the offset in case statement is after the
4469      increment.  If the step is not constant, we use zero instead.
4470      This is a bit imprecise (there is the extra addition), but
4471      redundancy elimination is likely to transform the code so that
4472      it uses value of the variable before increment anyway,
4473      so it is not that much unrealistic.  */
4474   if (cst_and_fits_in_hwi (cstep))
4475     cstepi = int_cst_value (cstep);
4476   else
4477     cstepi = 0;
4478
4479   if (!constant_multiple_of (ustep, cstep, &rat))
4480     return infinite_cost;
4481
4482   if (wi::fits_shwi_p (rat))
4483     ratio = rat.to_shwi ();
4484   else
4485     return infinite_cost;
4486
4487   STRIP_NOPS (cbase);
4488   ctype = TREE_TYPE (cbase);
4489
4490   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4491
4492   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4493      or ratio == 1, it is better to handle this like
4494
4495      ubase - ratio * cbase + ratio * var
4496
4497      (also holds in the case ratio == -1, TODO.  */
4498
4499   if (cst_and_fits_in_hwi (cbase))
4500     {
4501       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4502       cost = difference_cost (data,
4503                               ubase, build_int_cst (utype, 0),
4504                               &symbol_present, &var_present, &offset,
4505                               depends_on);
4506       cost.cost /= avg_loop_niter (data->current_loop);
4507     }
4508   else if (ratio == 1)
4509     {
4510       tree real_cbase = cbase;
4511
4512       /* Check to see if any adjustment is needed.  */
4513       if (cstepi == 0 && stmt_is_after_inc)
4514         {
4515           aff_tree real_cbase_aff;
4516           aff_tree cstep_aff;
4517
4518           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4519                                    &real_cbase_aff);
4520           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4521
4522           aff_combination_add (&real_cbase_aff, &cstep_aff);
4523           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4524         }
4525
4526       cost = difference_cost (data,
4527                               ubase, real_cbase,
4528                               &symbol_present, &var_present, &offset,
4529                               depends_on);
4530       cost.cost /= avg_loop_niter (data->current_loop);
4531     }
4532   else if (address_p
4533            && !POINTER_TYPE_P (ctype)
4534            && multiplier_allowed_in_address_p
4535                 (ratio, mem_mode,
4536                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4537     {
4538       cbase
4539         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4540       cost = difference_cost (data,
4541                               ubase, cbase,
4542                               &symbol_present, &var_present, &offset,
4543                               depends_on);
4544       cost.cost /= avg_loop_niter (data->current_loop);
4545     }
4546   else
4547     {
4548       cost = force_var_cost (data, cbase, depends_on);
4549       cost = add_costs (cost,
4550                         difference_cost (data,
4551                                          ubase, build_int_cst (utype, 0),
4552                                          &symbol_present, &var_present,
4553                                          &offset, depends_on));
4554       cost.cost /= avg_loop_niter (data->current_loop);
4555       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4556     }
4557
4558   /* Set of invariants depended on by sub use has already been computed
4559      for the first use in the group.  */
4560   if (use->sub_id)
4561     {
4562       cost.cost = 0;
4563       if (depends_on && *depends_on)
4564         bitmap_clear (*depends_on);
4565     }
4566   else if (inv_expr_id)
4567     {
4568       *inv_expr_id =
4569           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4570       /* Clear depends on.  */
4571       if (*inv_expr_id != -1 && depends_on && *depends_on)
4572         bitmap_clear (*depends_on);
4573     }
4574
4575   /* If we are after the increment, the value of the candidate is higher by
4576      one iteration.  */
4577   if (stmt_is_after_inc)
4578     offset -= ratio * cstepi;
4579
4580   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4581      (symbol/var1/const parts may be omitted).  If we are looking for an
4582      address, find the cost of addressing this.  */
4583   if (address_p)
4584     return add_costs (cost,
4585                       get_address_cost (symbol_present, var_present,
4586                                         offset, ratio, cstepi,
4587                                         mem_mode,
4588                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4589                                         speed, stmt_is_after_inc,
4590                                         can_autoinc));
4591
4592   /* Otherwise estimate the costs for computing the expression.  */
4593   if (!symbol_present && !var_present && !offset)
4594     {
4595       if (ratio != 1)
4596         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4597       return cost;
4598     }
4599
4600   /* Symbol + offset should be compile-time computable so consider that they
4601       are added once to the variable, if present.  */
4602   if (var_present && (symbol_present || offset))
4603     cost.cost += adjust_setup_cost (data,
4604                                     add_cost (speed, TYPE_MODE (ctype)));
4605
4606   /* Having offset does not affect runtime cost in case it is added to
4607      symbol, but it increases complexity.  */
4608   if (offset)
4609     cost.complexity++;
4610
4611   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4612
4613   aratio = ratio > 0 ? ratio : -ratio;
4614   if (aratio != 1)
4615     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4616   return cost;
4617
4618 fallback:
4619   if (can_autoinc)
4620     *can_autoinc = false;
4621
4622   {
4623     /* Just get the expression, expand it and measure the cost.  */
4624     tree comp = get_computation_at (data->current_loop, use, cand, at);
4625
4626     if (!comp)
4627       return infinite_cost;
4628
4629     if (address_p)
4630       comp = build_simple_mem_ref (comp);
4631
4632     return new_cost (computation_cost (comp, speed), 0);
4633   }
4634 }
4635
4636 /* Determines the cost of the computation by that USE is expressed
4637    from induction variable CAND.  If ADDRESS_P is true, we just need
4638    to create an address from it, otherwise we want to get it into
4639    register.  A set of invariants we depend on is stored in
4640    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4641    autoinc addressing is likely.  */
4642
4643 static comp_cost
4644 get_computation_cost (struct ivopts_data *data,
4645                       struct iv_use *use, struct iv_cand *cand,
4646                       bool address_p, bitmap *depends_on,
4647                       bool *can_autoinc, int *inv_expr_id)
4648 {
4649   return get_computation_cost_at (data,
4650                                   use, cand, address_p, depends_on, use->stmt,
4651                                   can_autoinc, inv_expr_id);
4652 }
4653
4654 /* Determines cost of basing replacement of USE on CAND in a generic
4655    expression.  */
4656
4657 static bool
4658 determine_use_iv_cost_generic (struct ivopts_data *data,
4659                                struct iv_use *use, struct iv_cand *cand)
4660 {
4661   bitmap depends_on;
4662   comp_cost cost;
4663   int inv_expr_id = -1;
4664
4665   /* The simple case first -- if we need to express value of the preserved
4666      original biv, the cost is 0.  This also prevents us from counting the
4667      cost of increment twice -- once at this use and once in the cost of
4668      the candidate.  */
4669   if (cand->pos == IP_ORIGINAL
4670       && cand->incremented_at == use->stmt)
4671     {
4672       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4673                        ERROR_MARK, -1);
4674       return true;
4675     }
4676
4677   cost = get_computation_cost (data, use, cand, false, &depends_on,
4678                                NULL, &inv_expr_id);
4679
4680   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4681                    inv_expr_id);
4682
4683   return !infinite_cost_p (cost);
4684 }
4685
4686 /* Determines cost of basing replacement of USE on CAND in an address.  */
4687
4688 static bool
4689 determine_use_iv_cost_address (struct ivopts_data *data,
4690                                struct iv_use *use, struct iv_cand *cand)
4691 {
4692   bitmap depends_on;
4693   bool can_autoinc;
4694   int inv_expr_id = -1;
4695   struct iv_use *sub_use;
4696   comp_cost sub_cost;
4697   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4698                                          &can_autoinc, &inv_expr_id);
4699
4700   if (cand->ainc_use == use)
4701     {
4702       if (can_autoinc)
4703         cost.cost -= cand->cost_step;
4704       /* If we generated the candidate solely for exploiting autoincrement
4705          opportunities, and it turns out it can't be used, set the cost to
4706          infinity to make sure we ignore it.  */
4707       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4708         cost = infinite_cost;
4709     }
4710   for (sub_use = use->next;
4711        sub_use && !infinite_cost_p (cost);
4712        sub_use = sub_use->next)
4713     {
4714        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4715                                         &can_autoinc, &inv_expr_id);
4716        cost = add_costs (cost, sub_cost);
4717     }
4718
4719   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4720                    inv_expr_id);
4721
4722   return !infinite_cost_p (cost);
4723 }
4724
4725 /* Computes value of candidate CAND at position AT in iteration NITER, and
4726    stores it to VAL.  */
4727
4728 static void
4729 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4730                aff_tree *val)
4731 {
4732   aff_tree step, delta, nit;
4733   struct iv *iv = cand->iv;
4734   tree type = TREE_TYPE (iv->base);
4735   tree steptype = type;
4736   if (POINTER_TYPE_P (type))
4737     steptype = sizetype;
4738   steptype = unsigned_type_for (type);
4739
4740   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4741   aff_combination_convert (&step, steptype);
4742   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4743   aff_combination_convert (&nit, steptype);
4744   aff_combination_mult (&nit, &step, &delta);
4745   if (stmt_after_increment (loop, cand, at))
4746     aff_combination_add (&delta, &step);
4747
4748   tree_to_aff_combination (iv->base, type, val);
4749   if (!POINTER_TYPE_P (type))
4750     aff_combination_convert (val, steptype);
4751   aff_combination_add (val, &delta);
4752 }
4753
4754 /* Returns period of induction variable iv.  */
4755
4756 static tree
4757 iv_period (struct iv *iv)
4758 {
4759   tree step = iv->step, period, type;
4760   tree pow2div;
4761
4762   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4763
4764   type = unsigned_type_for (TREE_TYPE (step));
4765   /* Period of the iv is lcm (step, type_range)/step -1,
4766      i.e., N*type_range/step - 1. Since type range is power
4767      of two, N == (step >> num_of_ending_zeros_binary (step),
4768      so the final result is
4769
4770        (type_range >> num_of_ending_zeros_binary (step)) - 1
4771
4772   */
4773   pow2div = num_ending_zeros (step);
4774
4775   period = build_low_bits_mask (type,
4776                                 (TYPE_PRECISION (type)
4777                                  - tree_to_uhwi (pow2div)));
4778
4779   return period;
4780 }
4781
4782 /* Returns the comparison operator used when eliminating the iv USE.  */
4783
4784 static enum tree_code
4785 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4786 {
4787   struct loop *loop = data->current_loop;
4788   basic_block ex_bb;
4789   edge exit;
4790
4791   ex_bb = gimple_bb (use->stmt);
4792   exit = EDGE_SUCC (ex_bb, 0);
4793   if (flow_bb_inside_loop_p (loop, exit->dest))
4794     exit = EDGE_SUCC (ex_bb, 1);
4795
4796   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4797 }
4798
4799 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4800    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4801    calculation is performed in non-wrapping type.
4802
4803    TODO: More generally, we could test for the situation that
4804          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4805          This would require knowing the sign of OFFSET.  */
4806
4807 static bool
4808 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4809 {
4810   enum tree_code code;
4811   tree e1, e2;
4812   aff_tree aff_e1, aff_e2, aff_offset;
4813
4814   if (!nowrap_type_p (TREE_TYPE (base)))
4815     return false;
4816
4817   base = expand_simple_operations (base);
4818
4819   if (TREE_CODE (base) == SSA_NAME)
4820     {
4821       gimple stmt = SSA_NAME_DEF_STMT (base);
4822
4823       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4824         return false;
4825
4826       code = gimple_assign_rhs_code (stmt);
4827       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4828         return false;
4829
4830       e1 = gimple_assign_rhs1 (stmt);
4831       e2 = gimple_assign_rhs2 (stmt);
4832     }
4833   else
4834     {
4835       code = TREE_CODE (base);
4836       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4837         return false;
4838       e1 = TREE_OPERAND (base, 0);
4839       e2 = TREE_OPERAND (base, 1);
4840     }
4841
4842   /* Use affine expansion as deeper inspection to prove the equality.  */
4843   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4844                                   &aff_e2, &data->name_expansion_cache);
4845   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4846                                   &aff_offset, &data->name_expansion_cache);
4847   aff_combination_scale (&aff_offset, -1);
4848   switch (code)
4849     {
4850     case PLUS_EXPR:
4851       aff_combination_add (&aff_e2, &aff_offset);
4852       if (aff_combination_zero_p (&aff_e2))
4853         return true;
4854
4855       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4856                                       &aff_e1, &data->name_expansion_cache);
4857       aff_combination_add (&aff_e1, &aff_offset);
4858       return aff_combination_zero_p (&aff_e1);
4859
4860     case POINTER_PLUS_EXPR:
4861       aff_combination_add (&aff_e2, &aff_offset);
4862       return aff_combination_zero_p (&aff_e2);
4863
4864     default:
4865       return false;
4866     }
4867 }
4868
4869 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4870    comparison with CAND.  NITER describes the number of iterations of
4871    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4872
4873    We aim to handle the following situation:
4874
4875    sometype *base, *p;
4876    int a, b, i;
4877
4878    i = a;
4879    p = p_0 = base + a;
4880
4881    do
4882      {
4883        bla (*p);
4884        p++;
4885        i++;
4886      }
4887    while (i < b);
4888
4889    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4890    We aim to optimize this to
4891
4892    p = p_0 = base + a;
4893    do
4894      {
4895        bla (*p);
4896        p++;
4897      }
4898    while (p < p_0 - a + b);
4899
4900    This preserves the correctness, since the pointer arithmetics does not
4901    overflow.  More precisely:
4902
4903    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4904       overflow in computing it or the values of p.
4905    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4906       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4907
4908 static bool
4909 iv_elimination_compare_lt (struct ivopts_data *data,
4910                            struct iv_cand *cand, enum tree_code *comp_p,
4911                            struct tree_niter_desc *niter)
4912 {
4913   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4914   struct aff_tree nit, tmpa, tmpb;
4915   enum tree_code comp;
4916   HOST_WIDE_INT step;
4917
4918   /* We need to know that the candidate induction variable does not overflow.
4919      While more complex analysis may be used to prove this, for now just
4920      check that the variable appears in the original program and that it
4921      is computed in a type that guarantees no overflows.  */
4922   cand_type = TREE_TYPE (cand->iv->base);
4923   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4924     return false;
4925
4926   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4927      the calculation of the BOUND could overflow, making the comparison
4928      invalid.  */
4929   if (!data->loop_single_exit_p)
4930     return false;
4931
4932   /* We need to be able to decide whether candidate is increasing or decreasing
4933      in order to choose the right comparison operator.  */
4934   if (!cst_and_fits_in_hwi (cand->iv->step))
4935     return false;
4936   step = int_cst_value (cand->iv->step);
4937
4938   /* Check that the number of iterations matches the expected pattern:
4939      a + 1 > b ? 0 : b - a - 1.  */
4940   mbz = niter->may_be_zero;
4941   if (TREE_CODE (mbz) == GT_EXPR)
4942     {
4943       /* Handle a + 1 > b.  */
4944       tree op0 = TREE_OPERAND (mbz, 0);
4945       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4946         {
4947           a = TREE_OPERAND (op0, 0);
4948           b = TREE_OPERAND (mbz, 1);
4949         }
4950       else
4951         return false;
4952     }
4953   else if (TREE_CODE (mbz) == LT_EXPR)
4954     {
4955       tree op1 = TREE_OPERAND (mbz, 1);
4956
4957       /* Handle b < a + 1.  */
4958       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4959         {
4960           a = TREE_OPERAND (op1, 0);
4961           b = TREE_OPERAND (mbz, 0);
4962         }
4963       else
4964         return false;
4965     }
4966   else
4967     return false;
4968
4969   /* Expected number of iterations is B - A - 1.  Check that it matches
4970      the actual number, i.e., that B - A - NITER = 1.  */
4971   tree_to_aff_combination (niter->niter, nit_type, &nit);
4972   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4973   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4974   aff_combination_scale (&nit, -1);
4975   aff_combination_scale (&tmpa, -1);
4976   aff_combination_add (&tmpb, &tmpa);
4977   aff_combination_add (&tmpb, &nit);
4978   if (tmpb.n != 0 || tmpb.offset != 1)
4979     return false;
4980
4981   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4982      overflow.  */
4983   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4984                         cand->iv->step,
4985                         fold_convert (TREE_TYPE (cand->iv->step), a));
4986   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4987     return false;
4988
4989   /* Determine the new comparison operator.  */
4990   comp = step < 0 ? GT_EXPR : LT_EXPR;
4991   if (*comp_p == NE_EXPR)
4992     *comp_p = comp;
4993   else if (*comp_p == EQ_EXPR)
4994     *comp_p = invert_tree_comparison (comp, false);
4995   else
4996     gcc_unreachable ();
4997
4998   return true;
4999 }
5000
5001 /* Check whether it is possible to express the condition in USE by comparison
5002    of candidate CAND.  If so, store the value compared with to BOUND, and the
5003    comparison operator to COMP.  */
5004
5005 static bool
5006 may_eliminate_iv (struct ivopts_data *data,
5007                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5008                   enum tree_code *comp)
5009 {
5010   basic_block ex_bb;
5011   edge exit;
5012   tree period;
5013   struct loop *loop = data->current_loop;
5014   aff_tree bnd;
5015   struct tree_niter_desc *desc = NULL;
5016
5017   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5018     return false;
5019
5020   /* For now works only for exits that dominate the loop latch.
5021      TODO: extend to other conditions inside loop body.  */
5022   ex_bb = gimple_bb (use->stmt);
5023   if (use->stmt != last_stmt (ex_bb)
5024       || gimple_code (use->stmt) != GIMPLE_COND
5025       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5026     return false;
5027
5028   exit = EDGE_SUCC (ex_bb, 0);
5029   if (flow_bb_inside_loop_p (loop, exit->dest))
5030     exit = EDGE_SUCC (ex_bb, 1);
5031   if (flow_bb_inside_loop_p (loop, exit->dest))
5032     return false;
5033
5034   desc = niter_for_exit (data, exit);
5035   if (!desc)
5036     return false;
5037
5038   /* Determine whether we can use the variable to test the exit condition.
5039      This is the case iff the period of the induction variable is greater
5040      than the number of iterations for which the exit condition is true.  */
5041   period = iv_period (cand->iv);
5042
5043   /* If the number of iterations is constant, compare against it directly.  */
5044   if (TREE_CODE (desc->niter) == INTEGER_CST)
5045     {
5046       /* See cand_value_at.  */
5047       if (stmt_after_increment (loop, cand, use->stmt))
5048         {
5049           if (!tree_int_cst_lt (desc->niter, period))
5050             return false;
5051         }
5052       else
5053         {
5054           if (tree_int_cst_lt (period, desc->niter))
5055             return false;
5056         }
5057     }
5058
5059   /* If not, and if this is the only possible exit of the loop, see whether
5060      we can get a conservative estimate on the number of iterations of the
5061      entire loop and compare against that instead.  */
5062   else
5063     {
5064       widest_int period_value, max_niter;
5065
5066       max_niter = desc->max;
5067       if (stmt_after_increment (loop, cand, use->stmt))
5068         max_niter += 1;
5069       period_value = wi::to_widest (period);
5070       if (wi::gtu_p (max_niter, period_value))
5071         {
5072           /* See if we can take advantage of inferred loop bound information.  */
5073           if (data->loop_single_exit_p)
5074             {
5075               if (!max_loop_iterations (loop, &max_niter))
5076                 return false;
5077               /* The loop bound is already adjusted by adding 1.  */
5078               if (wi::gtu_p (max_niter, period_value))
5079                 return false;
5080             }
5081           else
5082             return false;
5083         }
5084     }
5085
5086   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5087
5088   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5089                          aff_combination_to_tree (&bnd));
5090   *comp = iv_elimination_compare (data, use);
5091
5092   /* It is unlikely that computing the number of iterations using division
5093      would be more profitable than keeping the original induction variable.  */
5094   if (expression_expensive_p (*bound))
5095     return false;
5096
5097   /* Sometimes, it is possible to handle the situation that the number of
5098      iterations may be zero unless additional assumtions by using <
5099      instead of != in the exit condition.
5100
5101      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5102            base the exit condition on it.  However, that is often too
5103            expensive.  */
5104   if (!integer_zerop (desc->may_be_zero))
5105     return iv_elimination_compare_lt (data, cand, comp, desc);
5106
5107   return true;
5108 }
5109
5110  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5111     be copied, if is is used in the loop body and DATA->body_includes_call.  */
5112
5113 static int
5114 parm_decl_cost (struct ivopts_data *data, tree bound)
5115 {
5116   tree sbound = bound;
5117   STRIP_NOPS (sbound);
5118
5119   if (TREE_CODE (sbound) == SSA_NAME
5120       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5121       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5122       && data->body_includes_call)
5123     return COSTS_N_INSNS (1);
5124
5125   return 0;
5126 }
5127
5128 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5129
5130 static bool
5131 determine_use_iv_cost_condition (struct ivopts_data *data,
5132                                  struct iv_use *use, struct iv_cand *cand)
5133 {
5134   tree bound = NULL_TREE;
5135   struct iv *cmp_iv;
5136   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5137   comp_cost elim_cost, express_cost, cost, bound_cost;
5138   bool ok;
5139   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5140   tree *control_var, *bound_cst;
5141   enum tree_code comp = ERROR_MARK;
5142
5143   /* Only consider real candidates.  */
5144   if (!cand->iv)
5145     {
5146       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5147                        ERROR_MARK, -1);
5148       return false;
5149     }
5150
5151   /* Try iv elimination.  */
5152   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5153     {
5154       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5155       if (elim_cost.cost == 0)
5156         elim_cost.cost = parm_decl_cost (data, bound);
5157       else if (TREE_CODE (bound) == INTEGER_CST)
5158         elim_cost.cost = 0;
5159       /* If we replace a loop condition 'i < n' with 'p < base + n',
5160          depends_on_elim will have 'base' and 'n' set, which implies
5161          that both 'base' and 'n' will be live during the loop.  More likely,
5162          'base + n' will be loop invariant, resulting in only one live value
5163          during the loop.  So in that case we clear depends_on_elim and set
5164         elim_inv_expr_id instead.  */
5165       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5166         {
5167           elim_inv_expr_id = get_expr_id (data, bound);
5168           bitmap_clear (depends_on_elim);
5169         }
5170       /* The bound is a loop invariant, so it will be only computed
5171          once.  */
5172       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5173     }
5174   else
5175     elim_cost = infinite_cost;
5176
5177   /* Try expressing the original giv.  If it is compared with an invariant,
5178      note that we cannot get rid of it.  */
5179   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5180                               NULL, &cmp_iv);
5181   gcc_assert (ok);
5182
5183   /* When the condition is a comparison of the candidate IV against
5184      zero, prefer this IV.
5185
5186      TODO: The constant that we're subtracting from the cost should
5187      be target-dependent.  This information should be added to the
5188      target costs for each backend.  */
5189   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5190       && integer_zerop (*bound_cst)
5191       && (operand_equal_p (*control_var, cand->var_after, 0)
5192           || operand_equal_p (*control_var, cand->var_before, 0)))
5193     elim_cost.cost -= 1;
5194
5195   express_cost = get_computation_cost (data, use, cand, false,
5196                                        &depends_on_express, NULL,
5197                                        &express_inv_expr_id);
5198   fd_ivopts_data = data;
5199   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5200
5201   /* Count the cost of the original bound as well.  */
5202   bound_cost = force_var_cost (data, *bound_cst, NULL);
5203   if (bound_cost.cost == 0)
5204     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5205   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5206     bound_cost.cost = 0;
5207   express_cost.cost += bound_cost.cost;
5208
5209   /* Choose the better approach, preferring the eliminated IV. */
5210   if (compare_costs (elim_cost, express_cost) <= 0)
5211     {
5212       cost = elim_cost;
5213       depends_on = depends_on_elim;
5214       depends_on_elim = NULL;
5215       inv_expr_id = elim_inv_expr_id;
5216     }
5217   else
5218     {
5219       cost = express_cost;
5220       depends_on = depends_on_express;
5221       depends_on_express = NULL;
5222       bound = NULL_TREE;
5223       comp = ERROR_MARK;
5224       inv_expr_id = express_inv_expr_id;
5225     }
5226
5227   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5228
5229   if (depends_on_elim)
5230     BITMAP_FREE (depends_on_elim);
5231   if (depends_on_express)
5232     BITMAP_FREE (depends_on_express);
5233
5234   return !infinite_cost_p (cost);
5235 }
5236
5237 /* Determines cost of basing replacement of USE on CAND.  Returns false
5238    if USE cannot be based on CAND.  */
5239
5240 static bool
5241 determine_use_iv_cost (struct ivopts_data *data,
5242                        struct iv_use *use, struct iv_cand *cand)
5243 {
5244   switch (use->type)
5245     {
5246     case USE_NONLINEAR_EXPR:
5247       return determine_use_iv_cost_generic (data, use, cand);
5248
5249     case USE_ADDRESS:
5250       return determine_use_iv_cost_address (data, use, cand);
5251
5252     case USE_COMPARE:
5253       return determine_use_iv_cost_condition (data, use, cand);
5254
5255     default:
5256       gcc_unreachable ();
5257     }
5258 }
5259
5260 /* Return true if get_computation_cost indicates that autoincrement is
5261    a possibility for the pair of USE and CAND, false otherwise.  */
5262
5263 static bool
5264 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5265                            struct iv_cand *cand)
5266 {
5267   bitmap depends_on;
5268   bool can_autoinc;
5269   comp_cost cost;
5270
5271   if (use->type != USE_ADDRESS)
5272     return false;
5273
5274   cost = get_computation_cost (data, use, cand, true, &depends_on,
5275                                &can_autoinc, NULL);
5276
5277   BITMAP_FREE (depends_on);
5278
5279   return !infinite_cost_p (cost) && can_autoinc;
5280 }
5281
5282 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5283    use that allows autoincrement, and set their AINC_USE if possible.  */
5284
5285 static void
5286 set_autoinc_for_original_candidates (struct ivopts_data *data)
5287 {
5288   unsigned i, j;
5289
5290   for (i = 0; i < n_iv_cands (data); i++)
5291     {
5292       struct iv_cand *cand = iv_cand (data, i);
5293       struct iv_use *closest_before = NULL;
5294       struct iv_use *closest_after = NULL;
5295       if (cand->pos != IP_ORIGINAL)
5296         continue;
5297
5298       for (j = 0; j < n_iv_uses (data); j++)
5299         {
5300           struct iv_use *use = iv_use (data, j);
5301           unsigned uid = gimple_uid (use->stmt);
5302
5303           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5304             continue;
5305
5306           if (uid < gimple_uid (cand->incremented_at)
5307               && (closest_before == NULL
5308                   || uid > gimple_uid (closest_before->stmt)))
5309             closest_before = use;
5310
5311           if (uid > gimple_uid (cand->incremented_at)
5312               && (closest_after == NULL
5313                   || uid < gimple_uid (closest_after->stmt)))
5314             closest_after = use;
5315         }
5316
5317       if (closest_before != NULL
5318           && autoinc_possible_for_pair (data, closest_before, cand))
5319         cand->ainc_use = closest_before;
5320       else if (closest_after != NULL
5321                && autoinc_possible_for_pair (data, closest_after, cand))
5322         cand->ainc_use = closest_after;
5323     }
5324 }
5325
5326 /* Finds the candidates for the induction variables.  */
5327
5328 static void
5329 find_iv_candidates (struct ivopts_data *data)
5330 {
5331   /* Add commonly used ivs.  */
5332   add_standard_iv_candidates (data);
5333
5334   /* Add old induction variables.  */
5335   add_old_ivs_candidates (data);
5336
5337   /* Add induction variables derived from uses.  */
5338   add_derived_ivs_candidates (data);
5339
5340   set_autoinc_for_original_candidates (data);
5341
5342   /* Record the important candidates.  */
5343   record_important_candidates (data);
5344 }
5345
5346 /* Determines costs of basing the use of the iv on an iv candidate.  */
5347
5348 static void
5349 determine_use_iv_costs (struct ivopts_data *data)
5350 {
5351   unsigned i, j;
5352   struct iv_use *use;
5353   struct iv_cand *cand;
5354   bitmap to_clear = BITMAP_ALLOC (NULL);
5355
5356   alloc_use_cost_map (data);
5357
5358   for (i = 0; i < n_iv_uses (data); i++)
5359     {
5360       use = iv_use (data, i);
5361
5362       if (data->consider_all_candidates)
5363         {
5364           for (j = 0; j < n_iv_cands (data); j++)
5365             {
5366               cand = iv_cand (data, j);
5367               determine_use_iv_cost (data, use, cand);
5368             }
5369         }
5370       else
5371         {
5372           bitmap_iterator bi;
5373
5374           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5375             {
5376               cand = iv_cand (data, j);
5377               if (!determine_use_iv_cost (data, use, cand))
5378                 bitmap_set_bit (to_clear, j);
5379             }
5380
5381           /* Remove the candidates for that the cost is infinite from
5382              the list of related candidates.  */
5383           bitmap_and_compl_into (use->related_cands, to_clear);
5384           bitmap_clear (to_clear);
5385         }
5386     }
5387
5388   BITMAP_FREE (to_clear);
5389
5390   if (dump_file && (dump_flags & TDF_DETAILS))
5391     {
5392       fprintf (dump_file, "Use-candidate costs:\n");
5393
5394       for (i = 0; i < n_iv_uses (data); i++)
5395         {
5396           use = iv_use (data, i);
5397
5398           fprintf (dump_file, "Use %d:\n", i);
5399           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5400           for (j = 0; j < use->n_map_members; j++)
5401             {
5402               if (!use->cost_map[j].cand
5403                   || infinite_cost_p (use->cost_map[j].cost))
5404                 continue;
5405
5406               fprintf (dump_file, "  %d\t%d\t%d\t",
5407                        use->cost_map[j].cand->id,
5408                        use->cost_map[j].cost.cost,
5409                        use->cost_map[j].cost.complexity);
5410               if (use->cost_map[j].depends_on)
5411                 bitmap_print (dump_file,
5412                               use->cost_map[j].depends_on, "","");
5413               if (use->cost_map[j].inv_expr_id != -1)
5414                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5415               fprintf (dump_file, "\n");
5416             }
5417
5418           fprintf (dump_file, "\n");
5419         }
5420       fprintf (dump_file, "\n");
5421     }
5422 }
5423
5424 /* Determines cost of the candidate CAND.  */
5425
5426 static void
5427 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5428 {
5429   comp_cost cost_base;
5430   unsigned cost, cost_step;
5431   tree base;
5432
5433   if (!cand->iv)
5434     {
5435       cand->cost = 0;
5436       return;
5437     }
5438
5439   /* There are two costs associated with the candidate -- its increment
5440      and its initialization.  The second is almost negligible for any loop
5441      that rolls enough, so we take it just very little into account.  */
5442
5443   base = cand->iv->base;
5444   cost_base = force_var_cost (data, base, NULL);
5445   /* It will be exceptional that the iv register happens to be initialized with
5446      the proper value at no cost.  In general, there will at least be a regcopy
5447      or a const set.  */
5448   if (cost_base.cost == 0)
5449     cost_base.cost = COSTS_N_INSNS (1);
5450   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5451
5452   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5453
5454   /* Prefer the original ivs unless we may gain something by replacing it.
5455      The reason is to make debugging simpler; so this is not relevant for
5456      artificial ivs created by other optimization passes.  */
5457   if (cand->pos != IP_ORIGINAL
5458       || !SSA_NAME_VAR (cand->var_before)
5459       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5460     cost++;
5461
5462   /* Prefer not to insert statements into latch unless there are some
5463      already (so that we do not create unnecessary jumps).  */
5464   if (cand->pos == IP_END
5465       && empty_block_p (ip_end_pos (data->current_loop)))
5466     cost++;
5467
5468   cand->cost = cost;
5469   cand->cost_step = cost_step;
5470 }
5471
5472 /* Determines costs of computation of the candidates.  */
5473
5474 static void
5475 determine_iv_costs (struct ivopts_data *data)
5476 {
5477   unsigned i;
5478
5479   if (dump_file && (dump_flags & TDF_DETAILS))
5480     {
5481       fprintf (dump_file, "Candidate costs:\n");
5482       fprintf (dump_file, "  cand\tcost\n");
5483     }
5484
5485   for (i = 0; i < n_iv_cands (data); i++)
5486     {
5487       struct iv_cand *cand = iv_cand (data, i);
5488
5489       determine_iv_cost (data, cand);
5490
5491       if (dump_file && (dump_flags & TDF_DETAILS))
5492         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5493     }
5494
5495   if (dump_file && (dump_flags & TDF_DETAILS))
5496     fprintf (dump_file, "\n");
5497 }
5498
5499 /* Calculates cost for having SIZE induction variables.  */
5500
5501 static unsigned
5502 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5503 {
5504   /* We add size to the cost, so that we prefer eliminating ivs
5505      if possible.  */
5506   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5507                                             data->body_includes_call);
5508 }
5509
5510 /* For each size of the induction variable set determine the penalty.  */
5511
5512 static void
5513 determine_set_costs (struct ivopts_data *data)
5514 {
5515   unsigned j, n;
5516   gphi *phi;
5517   gphi_iterator psi;
5518   tree op;
5519   struct loop *loop = data->current_loop;
5520   bitmap_iterator bi;
5521
5522   if (dump_file && (dump_flags & TDF_DETAILS))
5523     {
5524       fprintf (dump_file, "Global costs:\n");
5525       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5526       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5527       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5528       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5529     }
5530
5531   n = 0;
5532   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5533     {
5534       phi = psi.phi ();
5535       op = PHI_RESULT (phi);
5536
5537       if (virtual_operand_p (op))
5538         continue;
5539
5540       if (get_iv (data, op))
5541         continue;
5542
5543       n++;
5544     }
5545
5546   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5547     {
5548       struct version_info *info = ver_info (data, j);
5549
5550       if (info->inv_id && info->has_nonlin_use)
5551         n++;
5552     }
5553
5554   data->regs_used = n;
5555   if (dump_file && (dump_flags & TDF_DETAILS))
5556     fprintf (dump_file, "  regs_used %d\n", n);
5557
5558   if (dump_file && (dump_flags & TDF_DETAILS))
5559     {
5560       fprintf (dump_file, "  cost for size:\n");
5561       fprintf (dump_file, "  ivs\tcost\n");
5562       for (j = 0; j <= 2 * target_avail_regs; j++)
5563         fprintf (dump_file, "  %d\t%d\n", j,
5564                  ivopts_global_cost_for_size (data, j));
5565       fprintf (dump_file, "\n");
5566     }
5567 }
5568
5569 /* Returns true if A is a cheaper cost pair than B.  */
5570
5571 static bool
5572 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5573 {
5574   int cmp;
5575
5576   if (!a)
5577     return false;
5578
5579   if (!b)
5580     return true;
5581
5582   cmp = compare_costs (a->cost, b->cost);
5583   if (cmp < 0)
5584     return true;
5585
5586   if (cmp > 0)
5587     return false;
5588
5589   /* In case the costs are the same, prefer the cheaper candidate.  */
5590   if (a->cand->cost < b->cand->cost)
5591     return true;
5592
5593   return false;
5594 }
5595
5596
5597 /* Returns candidate by that USE is expressed in IVS.  */
5598
5599 static struct cost_pair *
5600 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5601 {
5602   return ivs->cand_for_use[use->id];
5603 }
5604
5605 /* Computes the cost field of IVS structure.  */
5606
5607 static void
5608 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5609 {
5610   comp_cost cost = ivs->cand_use_cost;
5611
5612   cost.cost += ivs->cand_cost;
5613
5614   cost.cost += ivopts_global_cost_for_size (data,
5615                                             ivs->n_regs + ivs->num_used_inv_expr);
5616
5617   ivs->cost = cost;
5618 }
5619
5620 /* Remove invariants in set INVS to set IVS.  */
5621
5622 static void
5623 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5624 {
5625   bitmap_iterator bi;
5626   unsigned iid;
5627
5628   if (!invs)
5629     return;
5630
5631   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5632     {
5633       ivs->n_invariant_uses[iid]--;
5634       if (ivs->n_invariant_uses[iid] == 0)
5635         ivs->n_regs--;
5636     }
5637 }
5638
5639 /* Set USE not to be expressed by any candidate in IVS.  */
5640
5641 static void
5642 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5643                  struct iv_use *use)
5644 {
5645   unsigned uid = use->id, cid;
5646   struct cost_pair *cp;
5647
5648   cp = ivs->cand_for_use[uid];
5649   if (!cp)
5650     return;
5651   cid = cp->cand->id;
5652
5653   ivs->bad_uses++;
5654   ivs->cand_for_use[uid] = NULL;
5655   ivs->n_cand_uses[cid]--;
5656
5657   if (ivs->n_cand_uses[cid] == 0)
5658     {
5659       bitmap_clear_bit (ivs->cands, cid);
5660       /* Do not count the pseudocandidates.  */
5661       if (cp->cand->iv)
5662         ivs->n_regs--;
5663       ivs->n_cands--;
5664       ivs->cand_cost -= cp->cand->cost;
5665
5666       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5667     }
5668
5669   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5670
5671   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5672
5673   if (cp->inv_expr_id != -1)
5674     {
5675       ivs->used_inv_expr[cp->inv_expr_id]--;
5676       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5677         ivs->num_used_inv_expr--;
5678     }
5679   iv_ca_recount_cost (data, ivs);
5680 }
5681
5682 /* Add invariants in set INVS to set IVS.  */
5683
5684 static void
5685 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5686 {
5687   bitmap_iterator bi;
5688   unsigned iid;
5689
5690   if (!invs)
5691     return;
5692
5693   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5694     {
5695       ivs->n_invariant_uses[iid]++;
5696       if (ivs->n_invariant_uses[iid] == 1)
5697         ivs->n_regs++;
5698     }
5699 }
5700
5701 /* Set cost pair for USE in set IVS to CP.  */
5702
5703 static void
5704 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5705               struct iv_use *use, struct cost_pair *cp)
5706 {
5707   unsigned uid = use->id, cid;
5708
5709   if (ivs->cand_for_use[uid] == cp)
5710     return;
5711
5712   if (ivs->cand_for_use[uid])
5713     iv_ca_set_no_cp (data, ivs, use);
5714
5715   if (cp)
5716     {
5717       cid = cp->cand->id;
5718
5719       ivs->bad_uses--;
5720       ivs->cand_for_use[uid] = cp;
5721       ivs->n_cand_uses[cid]++;
5722       if (ivs->n_cand_uses[cid] == 1)
5723         {
5724           bitmap_set_bit (ivs->cands, cid);
5725           /* Do not count the pseudocandidates.  */
5726           if (cp->cand->iv)
5727             ivs->n_regs++;
5728           ivs->n_cands++;
5729           ivs->cand_cost += cp->cand->cost;
5730
5731           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5732         }
5733
5734       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5735       iv_ca_set_add_invariants (ivs, cp->depends_on);
5736
5737       if (cp->inv_expr_id != -1)
5738         {
5739           ivs->used_inv_expr[cp->inv_expr_id]++;
5740           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5741             ivs->num_used_inv_expr++;
5742         }
5743       iv_ca_recount_cost (data, ivs);
5744     }
5745 }
5746
5747 /* Extend set IVS by expressing USE by some of the candidates in it
5748    if possible.  Consider all important candidates if candidates in
5749    set IVS don't give any result.  */
5750
5751 static void
5752 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5753                struct iv_use *use)
5754 {
5755   struct cost_pair *best_cp = NULL, *cp;
5756   bitmap_iterator bi;
5757   unsigned i;
5758   struct iv_cand *cand;
5759
5760   gcc_assert (ivs->upto >= use->id);
5761   ivs->upto++;
5762   ivs->bad_uses++;
5763
5764   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5765     {
5766       cand = iv_cand (data, i);
5767       cp = get_use_iv_cost (data, use, cand);
5768       if (cheaper_cost_pair (cp, best_cp))
5769         best_cp = cp;
5770     }
5771
5772   if (best_cp == NULL)
5773     {
5774       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5775         {
5776           cand = iv_cand (data, i);
5777           cp = get_use_iv_cost (data, use, cand);
5778           if (cheaper_cost_pair (cp, best_cp))
5779             best_cp = cp;
5780         }
5781     }
5782
5783   iv_ca_set_cp (data, ivs, use, best_cp);
5784 }
5785
5786 /* Get cost for assignment IVS.  */
5787
5788 static comp_cost
5789 iv_ca_cost (struct iv_ca *ivs)
5790 {
5791   /* This was a conditional expression but it triggered a bug in
5792      Sun C 5.5.  */
5793   if (ivs->bad_uses)
5794     return infinite_cost;
5795   else
5796     return ivs->cost;
5797 }
5798
5799 /* Returns true if all dependences of CP are among invariants in IVS.  */
5800
5801 static bool
5802 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5803 {
5804   unsigned i;
5805   bitmap_iterator bi;
5806
5807   if (!cp->depends_on)
5808     return true;
5809
5810   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5811     {
5812       if (ivs->n_invariant_uses[i] == 0)
5813         return false;
5814     }
5815
5816   return true;
5817 }
5818
5819 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5820    it before NEXT_CHANGE.  */
5821
5822 static struct iv_ca_delta *
5823 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5824                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5825 {
5826   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5827
5828   change->use = use;
5829   change->old_cp = old_cp;
5830   change->new_cp = new_cp;
5831   change->next_change = next_change;
5832
5833   return change;
5834 }
5835
5836 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5837    are rewritten.  */
5838
5839 static struct iv_ca_delta *
5840 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5841 {
5842   struct iv_ca_delta *last;
5843
5844   if (!l2)
5845     return l1;
5846
5847   if (!l1)
5848     return l2;
5849
5850   for (last = l1; last->next_change; last = last->next_change)
5851     continue;
5852   last->next_change = l2;
5853
5854   return l1;
5855 }
5856
5857 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5858
5859 static struct iv_ca_delta *
5860 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5861 {
5862   struct iv_ca_delta *act, *next, *prev = NULL;
5863
5864   for (act = delta; act; act = next)
5865     {
5866       next = act->next_change;
5867       act->next_change = prev;
5868       prev = act;
5869
5870       std::swap (act->old_cp, act->new_cp);
5871     }
5872
5873   return prev;
5874 }
5875
5876 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5877    reverted instead.  */
5878
5879 static void
5880 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5881                     struct iv_ca_delta *delta, bool forward)
5882 {
5883   struct cost_pair *from, *to;
5884   struct iv_ca_delta *act;
5885
5886   if (!forward)
5887     delta = iv_ca_delta_reverse (delta);
5888
5889   for (act = delta; act; act = act->next_change)
5890     {
5891       from = act->old_cp;
5892       to = act->new_cp;
5893       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5894       iv_ca_set_cp (data, ivs, act->use, to);
5895     }
5896
5897   if (!forward)
5898     iv_ca_delta_reverse (delta);
5899 }
5900
5901 /* Returns true if CAND is used in IVS.  */
5902
5903 static bool
5904 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5905 {
5906   return ivs->n_cand_uses[cand->id] > 0;
5907 }
5908
5909 /* Returns number of induction variable candidates in the set IVS.  */
5910
5911 static unsigned
5912 iv_ca_n_cands (struct iv_ca *ivs)
5913 {
5914   return ivs->n_cands;
5915 }
5916
5917 /* Free the list of changes DELTA.  */
5918
5919 static void
5920 iv_ca_delta_free (struct iv_ca_delta **delta)
5921 {
5922   struct iv_ca_delta *act, *next;
5923
5924   for (act = *delta; act; act = next)
5925     {
5926       next = act->next_change;
5927       free (act);
5928     }
5929
5930   *delta = NULL;
5931 }
5932
5933 /* Allocates new iv candidates assignment.  */
5934
5935 static struct iv_ca *
5936 iv_ca_new (struct ivopts_data *data)
5937 {
5938   struct iv_ca *nw = XNEW (struct iv_ca);
5939
5940   nw->upto = 0;
5941   nw->bad_uses = 0;
5942   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5943   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5944   nw->cands = BITMAP_ALLOC (NULL);
5945   nw->n_cands = 0;
5946   nw->n_regs = 0;
5947   nw->cand_use_cost = no_cost;
5948   nw->cand_cost = 0;
5949   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5950   nw->cost = no_cost;
5951   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5952   nw->num_used_inv_expr = 0;
5953
5954   return nw;
5955 }
5956
5957 /* Free memory occupied by the set IVS.  */
5958
5959 static void
5960 iv_ca_free (struct iv_ca **ivs)
5961 {
5962   free ((*ivs)->cand_for_use);
5963   free ((*ivs)->n_cand_uses);
5964   BITMAP_FREE ((*ivs)->cands);
5965   free ((*ivs)->n_invariant_uses);
5966   free ((*ivs)->used_inv_expr);
5967   free (*ivs);
5968   *ivs = NULL;
5969 }
5970
5971 /* Dumps IVS to FILE.  */
5972
5973 static void
5974 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5975 {
5976   const char *pref = "  invariants ";
5977   unsigned i;
5978   comp_cost cost = iv_ca_cost (ivs);
5979
5980   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5981   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5982            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5983   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5984
5985    for (i = 0; i < ivs->upto; i++)
5986     {
5987       struct iv_use *use = iv_use (data, i);
5988       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5989       if (cp)
5990         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5991                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5992       else
5993         fprintf (file, "   use:%d --> ??\n", use->id);
5994     }
5995
5996   for (i = 1; i <= data->max_inv_id; i++)
5997     if (ivs->n_invariant_uses[i])
5998       {
5999         fprintf (file, "%s%d", pref, i);
6000         pref = ", ";
6001       }
6002   fprintf (file, "\n\n");
6003 }
6004
6005 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6006    new set, and store differences in DELTA.  Number of induction variables
6007    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6008    the function will try to find a solution with mimimal iv candidates.  */
6009
6010 static comp_cost
6011 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6012               struct iv_cand *cand, struct iv_ca_delta **delta,
6013               unsigned *n_ivs, bool min_ncand)
6014 {
6015   unsigned i;
6016   comp_cost cost;
6017   struct iv_use *use;
6018   struct cost_pair *old_cp, *new_cp;
6019
6020   *delta = NULL;
6021   for (i = 0; i < ivs->upto; i++)
6022     {
6023       use = iv_use (data, i);
6024       old_cp = iv_ca_cand_for_use (ivs, use);
6025
6026       if (old_cp
6027           && old_cp->cand == cand)
6028         continue;
6029
6030       new_cp = get_use_iv_cost (data, use, cand);
6031       if (!new_cp)
6032         continue;
6033
6034       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6035         continue;
6036
6037       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6038         continue;
6039
6040       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6041     }
6042
6043   iv_ca_delta_commit (data, ivs, *delta, true);
6044   cost = iv_ca_cost (ivs);
6045   if (n_ivs)
6046     *n_ivs = iv_ca_n_cands (ivs);
6047   iv_ca_delta_commit (data, ivs, *delta, false);
6048
6049   return cost;
6050 }
6051
6052 /* Try narrowing set IVS by removing CAND.  Return the cost of
6053    the new set and store the differences in DELTA.  START is
6054    the candidate with which we start narrowing.  */
6055
6056 static comp_cost
6057 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6058               struct iv_cand *cand, struct iv_cand *start,
6059               struct iv_ca_delta **delta)
6060 {
6061   unsigned i, ci;
6062   struct iv_use *use;
6063   struct cost_pair *old_cp, *new_cp, *cp;
6064   bitmap_iterator bi;
6065   struct iv_cand *cnd;
6066   comp_cost cost, best_cost, acost;
6067
6068   *delta = NULL;
6069   for (i = 0; i < n_iv_uses (data); i++)
6070     {
6071       use = iv_use (data, i);
6072
6073       old_cp = iv_ca_cand_for_use (ivs, use);
6074       if (old_cp->cand != cand)
6075         continue;
6076
6077       best_cost = iv_ca_cost (ivs);
6078       /* Start narrowing with START.  */
6079       new_cp = get_use_iv_cost (data, use, start);
6080
6081       if (data->consider_all_candidates)
6082         {
6083           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6084             {
6085               if (ci == cand->id || (start && ci == start->id))
6086                 continue;
6087
6088               cnd = iv_cand (data, ci);
6089
6090               cp = get_use_iv_cost (data, use, cnd);
6091               if (!cp)
6092                 continue;
6093
6094               iv_ca_set_cp (data, ivs, use, cp);
6095               acost = iv_ca_cost (ivs);
6096
6097               if (compare_costs (acost, best_cost) < 0)
6098                 {
6099                   best_cost = acost;
6100                   new_cp = cp;
6101                 }
6102             }
6103         }
6104       else
6105         {
6106           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6107             {
6108               if (ci == cand->id || (start && ci == start->id))
6109                 continue;
6110
6111               cnd = iv_cand (data, ci);
6112
6113               cp = get_use_iv_cost (data, use, cnd);
6114               if (!cp)
6115                 continue;
6116
6117               iv_ca_set_cp (data, ivs, use, cp);
6118               acost = iv_ca_cost (ivs);
6119
6120               if (compare_costs (acost, best_cost) < 0)
6121                 {
6122                   best_cost = acost;
6123                   new_cp = cp;
6124                 }
6125             }
6126         }
6127       /* Restore to old cp for use.  */
6128       iv_ca_set_cp (data, ivs, use, old_cp);
6129
6130       if (!new_cp)
6131         {
6132           iv_ca_delta_free (delta);
6133           return infinite_cost;
6134         }
6135
6136       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6137     }
6138
6139   iv_ca_delta_commit (data, ivs, *delta, true);
6140   cost = iv_ca_cost (ivs);
6141   iv_ca_delta_commit (data, ivs, *delta, false);
6142
6143   return cost;
6144 }
6145
6146 /* Try optimizing the set of candidates IVS by removing candidates different
6147    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6148    differences in DELTA.  */
6149
6150 static comp_cost
6151 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6152              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6153 {
6154   bitmap_iterator bi;
6155   struct iv_ca_delta *act_delta, *best_delta;
6156   unsigned i;
6157   comp_cost best_cost, acost;
6158   struct iv_cand *cand;
6159
6160   best_delta = NULL;
6161   best_cost = iv_ca_cost (ivs);
6162
6163   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6164     {
6165       cand = iv_cand (data, i);
6166
6167       if (cand == except_cand)
6168         continue;
6169
6170       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6171
6172       if (compare_costs (acost, best_cost) < 0)
6173         {
6174           best_cost = acost;
6175           iv_ca_delta_free (&best_delta);
6176           best_delta = act_delta;
6177         }
6178       else
6179         iv_ca_delta_free (&act_delta);
6180     }
6181
6182   if (!best_delta)
6183     {
6184       *delta = NULL;
6185       return best_cost;
6186     }
6187
6188   /* Recurse to possibly remove other unnecessary ivs.  */
6189   iv_ca_delta_commit (data, ivs, best_delta, true);
6190   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6191   iv_ca_delta_commit (data, ivs, best_delta, false);
6192   *delta = iv_ca_delta_join (best_delta, *delta);
6193   return best_cost;
6194 }
6195
6196 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6197    cheaper local cost for USE than BEST_CP.  Return pointer to
6198    the corresponding cost_pair, otherwise just return BEST_CP.  */
6199
6200 static struct cost_pair*
6201 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6202                         unsigned int cand_idx, struct iv_cand *old_cand,
6203                         struct cost_pair *best_cp)
6204 {
6205   struct iv_cand *cand;
6206   struct cost_pair *cp;
6207
6208   gcc_assert (old_cand != NULL && best_cp != NULL);
6209   if (cand_idx == old_cand->id)
6210     return best_cp;
6211
6212   cand = iv_cand (data, cand_idx);
6213   cp = get_use_iv_cost (data, use, cand);
6214   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6215     return cp;
6216
6217   return best_cp;
6218 }
6219
6220 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6221    which are used by more than one iv uses.  For each of those candidates,
6222    this function tries to represent iv uses under that candidate using
6223    other ones with lower local cost, then tries to prune the new set.
6224    If the new set has lower cost, It returns the new cost after recording
6225    candidate replacement in list DELTA.  */
6226
6227 static comp_cost
6228 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6229                struct iv_ca_delta **delta)
6230 {
6231   bitmap_iterator bi, bj;
6232   unsigned int i, j, k;
6233   struct iv_use *use;
6234   struct iv_cand *cand;
6235   comp_cost orig_cost, acost;
6236   struct iv_ca_delta *act_delta, *tmp_delta;
6237   struct cost_pair *old_cp, *best_cp = NULL;
6238
6239   *delta = NULL;
6240   orig_cost = iv_ca_cost (ivs);
6241
6242   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6243     {
6244       if (ivs->n_cand_uses[i] == 1
6245           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6246         continue;
6247
6248       cand = iv_cand (data, i);
6249
6250       act_delta = NULL;
6251       /*  Represent uses under current candidate using other ones with
6252           lower local cost.  */
6253       for (j = 0; j < ivs->upto; j++)
6254         {
6255           use = iv_use (data, j);
6256           old_cp = iv_ca_cand_for_use (ivs, use);
6257
6258           if (old_cp->cand != cand)
6259             continue;
6260
6261           best_cp = old_cp;
6262           if (data->consider_all_candidates)
6263             for (k = 0; k < n_iv_cands (data); k++)
6264               best_cp = cheaper_cost_with_cand (data, use, k,
6265                                                 old_cp->cand, best_cp);
6266           else
6267             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6268               best_cp = cheaper_cost_with_cand (data, use, k,
6269                                                 old_cp->cand, best_cp);
6270
6271           if (best_cp == old_cp)
6272             continue;
6273
6274           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6275         }
6276       /* No need for further prune.  */
6277       if (!act_delta)
6278         continue;
6279
6280       /* Prune the new candidate set.  */
6281       iv_ca_delta_commit (data, ivs, act_delta, true);
6282       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6283       iv_ca_delta_commit (data, ivs, act_delta, false);
6284       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6285
6286       if (compare_costs (acost, orig_cost) < 0)
6287         {
6288           *delta = act_delta;
6289           return acost;
6290         }
6291       else
6292         iv_ca_delta_free (&act_delta);
6293     }
6294
6295   return orig_cost;
6296 }
6297
6298 /* Tries to extend the sets IVS in the best possible way in order
6299    to express the USE.  If ORIGINALP is true, prefer candidates from
6300    the original set of IVs, otherwise favor important candidates not
6301    based on any memory object.  */
6302
6303 static bool
6304 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6305                   struct iv_use *use, bool originalp)
6306 {
6307   comp_cost best_cost, act_cost;
6308   unsigned i;
6309   bitmap_iterator bi;
6310   struct iv_cand *cand;
6311   struct iv_ca_delta *best_delta = NULL, *act_delta;
6312   struct cost_pair *cp;
6313
6314   iv_ca_add_use (data, ivs, use);
6315   best_cost = iv_ca_cost (ivs);
6316   cp = iv_ca_cand_for_use (ivs, use);
6317   if (cp)
6318     {
6319       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6320       iv_ca_set_no_cp (data, ivs, use);
6321     }
6322
6323   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6324      first try important candidates not based on any memory object.  Only if
6325      this fails, try the specific ones.  Rationale -- in loops with many
6326      variables the best choice often is to use just one generic biv.  If we
6327      added here many ivs specific to the uses, the optimization algorithm later
6328      would be likely to get stuck in a local minimum, thus causing us to create
6329      too many ivs.  The approach from few ivs to more seems more likely to be
6330      successful -- starting from few ivs, replacing an expensive use by a
6331      specific iv should always be a win.  */
6332   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6333     {
6334       cand = iv_cand (data, i);
6335
6336       if (originalp && cand->pos !=IP_ORIGINAL)
6337         continue;
6338
6339       if (!originalp && cand->iv->base_object != NULL_TREE)
6340         continue;
6341
6342       if (iv_ca_cand_used_p (ivs, cand))
6343         continue;
6344
6345       cp = get_use_iv_cost (data, use, cand);
6346       if (!cp)
6347         continue;
6348
6349       iv_ca_set_cp (data, ivs, use, cp);
6350       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6351                                true);
6352       iv_ca_set_no_cp (data, ivs, use);
6353       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6354
6355       if (compare_costs (act_cost, best_cost) < 0)
6356         {
6357           best_cost = act_cost;
6358
6359           iv_ca_delta_free (&best_delta);
6360           best_delta = act_delta;
6361         }
6362       else
6363         iv_ca_delta_free (&act_delta);
6364     }
6365
6366   if (infinite_cost_p (best_cost))
6367     {
6368       for (i = 0; i < use->n_map_members; i++)
6369         {
6370           cp = use->cost_map + i;
6371           cand = cp->cand;
6372           if (!cand)
6373             continue;
6374
6375           /* Already tried this.  */
6376           if (cand->important)
6377             {
6378               if (originalp && cand->pos == IP_ORIGINAL)
6379                 continue;
6380               if (!originalp && cand->iv->base_object == NULL_TREE)
6381                 continue;
6382             }
6383
6384           if (iv_ca_cand_used_p (ivs, cand))
6385             continue;
6386
6387           act_delta = NULL;
6388           iv_ca_set_cp (data, ivs, use, cp);
6389           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6390           iv_ca_set_no_cp (data, ivs, use);
6391           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6392                                        cp, act_delta);
6393
6394           if (compare_costs (act_cost, best_cost) < 0)
6395             {
6396               best_cost = act_cost;
6397
6398               if (best_delta)
6399                 iv_ca_delta_free (&best_delta);
6400               best_delta = act_delta;
6401             }
6402           else
6403             iv_ca_delta_free (&act_delta);
6404         }
6405     }
6406
6407   iv_ca_delta_commit (data, ivs, best_delta, true);
6408   iv_ca_delta_free (&best_delta);
6409
6410   return !infinite_cost_p (best_cost);
6411 }
6412
6413 /* Finds an initial assignment of candidates to uses.  */
6414
6415 static struct iv_ca *
6416 get_initial_solution (struct ivopts_data *data, bool originalp)
6417 {
6418   struct iv_ca *ivs = iv_ca_new (data);
6419   unsigned i;
6420
6421   for (i = 0; i < n_iv_uses (data); i++)
6422     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6423       {
6424         iv_ca_free (&ivs);
6425         return NULL;
6426       }
6427
6428   return ivs;
6429 }
6430
6431 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6432    points to a bool variable, this function tries to break local
6433    optimal fixed-point by replacing candidates in IVS if it's true.  */
6434
6435 static bool
6436 try_improve_iv_set (struct ivopts_data *data,
6437                     struct iv_ca *ivs, bool *try_replace_p)
6438 {
6439   unsigned i, n_ivs;
6440   comp_cost acost, best_cost = iv_ca_cost (ivs);
6441   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6442   struct iv_cand *cand;
6443
6444   /* Try extending the set of induction variables by one.  */
6445   for (i = 0; i < n_iv_cands (data); i++)
6446     {
6447       cand = iv_cand (data, i);
6448
6449       if (iv_ca_cand_used_p (ivs, cand))
6450         continue;
6451
6452       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6453       if (!act_delta)
6454         continue;
6455
6456       /* If we successfully added the candidate and the set is small enough,
6457          try optimizing it by removing other candidates.  */
6458       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6459         {
6460           iv_ca_delta_commit (data, ivs, act_delta, true);
6461           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6462           iv_ca_delta_commit (data, ivs, act_delta, false);
6463           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6464         }
6465
6466       if (compare_costs (acost, best_cost) < 0)
6467         {
6468           best_cost = acost;
6469           iv_ca_delta_free (&best_delta);
6470           best_delta = act_delta;
6471         }
6472       else
6473         iv_ca_delta_free (&act_delta);
6474     }
6475
6476   if (!best_delta)
6477     {
6478       /* Try removing the candidates from the set instead.  */
6479       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6480
6481       if (!best_delta && *try_replace_p)
6482         {
6483           *try_replace_p = false;
6484           /* So far candidate selecting algorithm tends to choose fewer IVs
6485              so that it can handle cases in which loops have many variables
6486              but the best choice is often to use only one general biv.  One
6487              weakness is it can't handle opposite cases, in which different
6488              candidates should be chosen with respect to each use.  To solve
6489              the problem, we replace candidates in a manner described by the
6490              comments of iv_ca_replace, thus give general algorithm a chance
6491              to break local optimal fixed-point in these cases.  */
6492           best_cost = iv_ca_replace (data, ivs, &best_delta);
6493         }
6494
6495       if (!best_delta)
6496         return false;
6497     }
6498
6499   iv_ca_delta_commit (data, ivs, best_delta, true);
6500   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6501   iv_ca_delta_free (&best_delta);
6502   return true;
6503 }
6504
6505 /* Attempts to find the optimal set of induction variables.  We do simple
6506    greedy heuristic -- we try to replace at most one candidate in the selected
6507    solution and remove the unused ivs while this improves the cost.  */
6508
6509 static struct iv_ca *
6510 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6511 {
6512   struct iv_ca *set;
6513   bool try_replace_p = true;
6514
6515   /* Get the initial solution.  */
6516   set = get_initial_solution (data, originalp);
6517   if (!set)
6518     {
6519       if (dump_file && (dump_flags & TDF_DETAILS))
6520         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6521       return NULL;
6522     }
6523
6524   if (dump_file && (dump_flags & TDF_DETAILS))
6525     {
6526       fprintf (dump_file, "Initial set of candidates:\n");
6527       iv_ca_dump (data, dump_file, set);
6528     }
6529
6530   while (try_improve_iv_set (data, set, &try_replace_p))
6531     {
6532       if (dump_file && (dump_flags & TDF_DETAILS))
6533         {
6534           fprintf (dump_file, "Improved to:\n");
6535           iv_ca_dump (data, dump_file, set);
6536         }
6537     }
6538
6539   return set;
6540 }
6541
6542 static struct iv_ca *
6543 find_optimal_iv_set (struct ivopts_data *data)
6544 {
6545   unsigned i;
6546   struct iv_ca *set, *origset;
6547   struct iv_use *use;
6548   comp_cost cost, origcost;
6549
6550   /* Determine the cost based on a strategy that starts with original IVs,
6551      and try again using a strategy that prefers candidates not based
6552      on any IVs.  */
6553   origset = find_optimal_iv_set_1 (data, true);
6554   set = find_optimal_iv_set_1 (data, false);
6555
6556   if (!origset && !set)
6557     return NULL;
6558
6559   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6560   cost = set ? iv_ca_cost (set) : infinite_cost;
6561
6562   if (dump_file && (dump_flags & TDF_DETAILS))
6563     {
6564       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6565                origcost.cost, origcost.complexity);
6566       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6567                cost.cost, cost.complexity);
6568     }
6569
6570   /* Choose the one with the best cost.  */
6571   if (compare_costs (origcost, cost) <= 0)
6572     {
6573       if (set)
6574         iv_ca_free (&set);
6575       set = origset;
6576     }
6577   else if (origset)
6578     iv_ca_free (&origset);
6579
6580   for (i = 0; i < n_iv_uses (data); i++)
6581     {
6582       use = iv_use (data, i);
6583       use->selected = iv_ca_cand_for_use (set, use)->cand;
6584     }
6585
6586   return set;
6587 }
6588
6589 /* Creates a new induction variable corresponding to CAND.  */
6590
6591 static void
6592 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6593 {
6594   gimple_stmt_iterator incr_pos;
6595   tree base;
6596   bool after = false;
6597
6598   if (!cand->iv)
6599     return;
6600
6601   switch (cand->pos)
6602     {
6603     case IP_NORMAL:
6604       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6605       break;
6606
6607     case IP_END:
6608       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6609       after = true;
6610       break;
6611
6612     case IP_AFTER_USE:
6613       after = true;
6614       /* fall through */
6615     case IP_BEFORE_USE:
6616       incr_pos = gsi_for_stmt (cand->incremented_at);
6617       break;
6618
6619     case IP_ORIGINAL:
6620       /* Mark that the iv is preserved.  */
6621       name_info (data, cand->var_before)->preserve_biv = true;
6622       name_info (data, cand->var_after)->preserve_biv = true;
6623
6624       /* Rewrite the increment so that it uses var_before directly.  */
6625       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6626       return;
6627     }
6628
6629   gimple_add_tmp_var (cand->var_before);
6630
6631   base = unshare_expr (cand->iv->base);
6632
6633   create_iv (base, unshare_expr (cand->iv->step),
6634              cand->var_before, data->current_loop,
6635              &incr_pos, after, &cand->var_before, &cand->var_after);
6636 }
6637
6638 /* Creates new induction variables described in SET.  */
6639
6640 static void
6641 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6642 {
6643   unsigned i;
6644   struct iv_cand *cand;
6645   bitmap_iterator bi;
6646
6647   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6648     {
6649       cand = iv_cand (data, i);
6650       create_new_iv (data, cand);
6651     }
6652
6653   if (dump_file && (dump_flags & TDF_DETAILS))
6654     {
6655       fprintf (dump_file, "Selected IV set for loop %d",
6656                data->current_loop->num);
6657       if (data->loop_loc != UNKNOWN_LOCATION)
6658         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6659                  LOCATION_LINE (data->loop_loc));
6660       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6661       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6662         {
6663           cand = iv_cand (data, i);
6664           dump_cand (dump_file, cand);
6665         }
6666       fprintf (dump_file, "\n");
6667     }
6668 }
6669
6670 /* Rewrites USE (definition of iv used in a nonlinear expression)
6671    using candidate CAND.  */
6672
6673 static void
6674 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6675                             struct iv_use *use, struct iv_cand *cand)
6676 {
6677   tree comp;
6678   tree op, tgt;
6679   gassign *ass;
6680   gimple_stmt_iterator bsi;
6681
6682   /* An important special case -- if we are asked to express value of
6683      the original iv by itself, just exit; there is no need to
6684      introduce a new computation (that might also need casting the
6685      variable to unsigned and back).  */
6686   if (cand->pos == IP_ORIGINAL
6687       && cand->incremented_at == use->stmt)
6688     {
6689       enum tree_code stmt_code;
6690
6691       gcc_assert (is_gimple_assign (use->stmt));
6692       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6693
6694       /* Check whether we may leave the computation unchanged.
6695          This is the case only if it does not rely on other
6696          computations in the loop -- otherwise, the computation
6697          we rely upon may be removed in remove_unused_ivs,
6698          thus leading to ICE.  */
6699       stmt_code = gimple_assign_rhs_code (use->stmt);
6700       if (stmt_code == PLUS_EXPR
6701           || stmt_code == MINUS_EXPR
6702           || stmt_code == POINTER_PLUS_EXPR)
6703         {
6704           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6705             op = gimple_assign_rhs2 (use->stmt);
6706           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6707             op = gimple_assign_rhs1 (use->stmt);
6708           else
6709             op = NULL_TREE;
6710         }
6711       else
6712         op = NULL_TREE;
6713
6714       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6715         return;
6716     }
6717
6718   comp = get_computation (data->current_loop, use, cand);
6719   gcc_assert (comp != NULL_TREE);
6720
6721   switch (gimple_code (use->stmt))
6722     {
6723     case GIMPLE_PHI:
6724       tgt = PHI_RESULT (use->stmt);
6725
6726       /* If we should keep the biv, do not replace it.  */
6727       if (name_info (data, tgt)->preserve_biv)
6728         return;
6729
6730       bsi = gsi_after_labels (gimple_bb (use->stmt));
6731       break;
6732
6733     case GIMPLE_ASSIGN:
6734       tgt = gimple_assign_lhs (use->stmt);
6735       bsi = gsi_for_stmt (use->stmt);
6736       break;
6737
6738     default:
6739       gcc_unreachable ();
6740     }
6741
6742   if (!valid_gimple_rhs_p (comp)
6743       || (gimple_code (use->stmt) != GIMPLE_PHI
6744           /* We can't allow re-allocating the stmt as it might be pointed
6745              to still.  */
6746           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6747               >= gimple_num_ops (gsi_stmt (bsi)))))
6748     {
6749       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6750                                        true, GSI_SAME_STMT);
6751       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6752         {
6753           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6754           /* As this isn't a plain copy we have to reset alignment
6755              information.  */
6756           if (SSA_NAME_PTR_INFO (comp))
6757             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6758         }
6759     }
6760
6761   if (gimple_code (use->stmt) == GIMPLE_PHI)
6762     {
6763       ass = gimple_build_assign (tgt, comp);
6764       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6765
6766       bsi = gsi_for_stmt (use->stmt);
6767       remove_phi_node (&bsi, false);
6768     }
6769   else
6770     {
6771       gimple_assign_set_rhs_from_tree (&bsi, comp);
6772       use->stmt = gsi_stmt (bsi);
6773     }
6774 }
6775
6776 /* Performs a peephole optimization to reorder the iv update statement with
6777    a mem ref to enable instruction combining in later phases. The mem ref uses
6778    the iv value before the update, so the reordering transformation requires
6779    adjustment of the offset. CAND is the selected IV_CAND.
6780
6781    Example:
6782
6783    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6784    iv2 = iv1 + 1;
6785
6786    if (t < val)      (1)
6787      goto L;
6788    goto Head;
6789
6790
6791    directly propagating t over to (1) will introduce overlapping live range
6792    thus increase register pressure. This peephole transform it into:
6793
6794
6795    iv2 = iv1 + 1;
6796    t = MEM_REF (base, iv2, 8, 8);
6797    if (t < val)
6798      goto L;
6799    goto Head;
6800 */
6801
6802 static void
6803 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6804 {
6805   tree var_after;
6806   gimple iv_update, stmt;
6807   basic_block bb;
6808   gimple_stmt_iterator gsi, gsi_iv;
6809
6810   if (cand->pos != IP_NORMAL)
6811     return;
6812
6813   var_after = cand->var_after;
6814   iv_update = SSA_NAME_DEF_STMT (var_after);
6815
6816   bb = gimple_bb (iv_update);
6817   gsi = gsi_last_nondebug_bb (bb);
6818   stmt = gsi_stmt (gsi);
6819
6820   /* Only handle conditional statement for now.  */
6821   if (gimple_code (stmt) != GIMPLE_COND)
6822     return;
6823
6824   gsi_prev_nondebug (&gsi);
6825   stmt = gsi_stmt (gsi);
6826   if (stmt != iv_update)
6827     return;
6828
6829   gsi_prev_nondebug (&gsi);
6830   if (gsi_end_p (gsi))
6831     return;
6832
6833   stmt = gsi_stmt (gsi);
6834   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6835     return;
6836
6837   if (stmt != use->stmt)
6838     return;
6839
6840   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6841     return;
6842
6843   if (dump_file && (dump_flags & TDF_DETAILS))
6844     {
6845       fprintf (dump_file, "Reordering \n");
6846       print_gimple_stmt (dump_file, iv_update, 0, 0);
6847       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6848       fprintf (dump_file, "\n");
6849     }
6850
6851   gsi = gsi_for_stmt (use->stmt);
6852   gsi_iv = gsi_for_stmt (iv_update);
6853   gsi_move_before (&gsi_iv, &gsi);
6854
6855   cand->pos = IP_BEFORE_USE;
6856   cand->incremented_at = use->stmt;
6857 }
6858
6859 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6860
6861 static void
6862 rewrite_use_address_1 (struct ivopts_data *data,
6863                        struct iv_use *use, struct iv_cand *cand)
6864 {
6865   aff_tree aff;
6866   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6867   tree base_hint = NULL_TREE;
6868   tree ref, iv;
6869   bool ok;
6870
6871   adjust_iv_update_pos (cand, use);
6872   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6873   gcc_assert (ok);
6874   unshare_aff_combination (&aff);
6875
6876   /* To avoid undefined overflow problems, all IV candidates use unsigned
6877      integer types.  The drawback is that this makes it impossible for
6878      create_mem_ref to distinguish an IV that is based on a memory object
6879      from one that represents simply an offset.
6880
6881      To work around this problem, we pass a hint to create_mem_ref that
6882      indicates which variable (if any) in aff is an IV based on a memory
6883      object.  Note that we only consider the candidate.  If this is not
6884      based on an object, the base of the reference is in some subexpression
6885      of the use -- but these will use pointer types, so they are recognized
6886      by the create_mem_ref heuristics anyway.  */
6887   if (cand->iv->base_object)
6888     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6889
6890   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6891   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6892                         reference_alias_ptr_type (*use->op_p),
6893                         iv, base_hint, data->speed);
6894   copy_ref_info (ref, *use->op_p);
6895   *use->op_p = ref;
6896 }
6897
6898 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6899    first use of a group, rewrites sub uses in the group too.  */
6900
6901 static void
6902 rewrite_use_address (struct ivopts_data *data,
6903                       struct iv_use *use, struct iv_cand *cand)
6904 {
6905   struct iv_use *next;
6906
6907   gcc_assert (use->sub_id == 0);
6908   rewrite_use_address_1 (data, use, cand);
6909   update_stmt (use->stmt);
6910
6911   for (next = use->next; next != NULL; next = next->next)
6912     {
6913       rewrite_use_address_1 (data, next, cand);
6914       update_stmt (next->stmt);
6915     }
6916
6917   return;
6918 }
6919
6920 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6921    candidate CAND.  */
6922
6923 static void
6924 rewrite_use_compare (struct ivopts_data *data,
6925                      struct iv_use *use, struct iv_cand *cand)
6926 {
6927   tree comp, *var_p, op, bound;
6928   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6929   enum tree_code compare;
6930   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6931   bool ok;
6932
6933   bound = cp->value;
6934   if (bound)
6935     {
6936       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6937       tree var_type = TREE_TYPE (var);
6938       gimple_seq stmts;
6939
6940       if (dump_file && (dump_flags & TDF_DETAILS))
6941         {
6942           fprintf (dump_file, "Replacing exit test: ");
6943           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6944         }
6945       compare = cp->comp;
6946       bound = unshare_expr (fold_convert (var_type, bound));
6947       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6948       if (stmts)
6949         gsi_insert_seq_on_edge_immediate (
6950                 loop_preheader_edge (data->current_loop),
6951                 stmts);
6952
6953       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6954       gimple_cond_set_lhs (cond_stmt, var);
6955       gimple_cond_set_code (cond_stmt, compare);
6956       gimple_cond_set_rhs (cond_stmt, op);
6957       return;
6958     }
6959
6960   /* The induction variable elimination failed; just express the original
6961      giv.  */
6962   comp = get_computation (data->current_loop, use, cand);
6963   gcc_assert (comp != NULL_TREE);
6964
6965   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6966   gcc_assert (ok);
6967
6968   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6969                                      true, GSI_SAME_STMT);
6970 }
6971
6972 /* Rewrites USE using candidate CAND.  */
6973
6974 static void
6975 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6976 {
6977   switch (use->type)
6978     {
6979       case USE_NONLINEAR_EXPR:
6980         rewrite_use_nonlinear_expr (data, use, cand);
6981         break;
6982
6983       case USE_ADDRESS:
6984         rewrite_use_address (data, use, cand);
6985         break;
6986
6987       case USE_COMPARE:
6988         rewrite_use_compare (data, use, cand);
6989         break;
6990
6991       default:
6992         gcc_unreachable ();
6993     }
6994
6995   update_stmt (use->stmt);
6996 }
6997
6998 /* Rewrite the uses using the selected induction variables.  */
6999
7000 static void
7001 rewrite_uses (struct ivopts_data *data)
7002 {
7003   unsigned i;
7004   struct iv_cand *cand;
7005   struct iv_use *use;
7006
7007   for (i = 0; i < n_iv_uses (data); i++)
7008     {
7009       use = iv_use (data, i);
7010       cand = use->selected;
7011       gcc_assert (cand);
7012
7013       rewrite_use (data, use, cand);
7014     }
7015 }
7016
7017 /* Removes the ivs that are not used after rewriting.  */
7018
7019 static void
7020 remove_unused_ivs (struct ivopts_data *data)
7021 {
7022   unsigned j;
7023   bitmap_iterator bi;
7024   bitmap toremove = BITMAP_ALLOC (NULL);
7025
7026   /* Figure out an order in which to release SSA DEFs so that we don't
7027      release something that we'd have to propagate into a debug stmt
7028      afterwards.  */
7029   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7030     {
7031       struct version_info *info;
7032
7033       info = ver_info (data, j);
7034       if (info->iv
7035           && !integer_zerop (info->iv->step)
7036           && !info->inv_id
7037           && !info->iv->have_use_for
7038           && !info->preserve_biv)
7039         {
7040           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7041
7042           tree def = info->iv->ssa_name;
7043
7044           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7045             {
7046               imm_use_iterator imm_iter;
7047               use_operand_p use_p;
7048               gimple stmt;
7049               int count = 0;
7050
7051               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7052                 {
7053                   if (!gimple_debug_bind_p (stmt))
7054                     continue;
7055
7056                   /* We just want to determine whether to do nothing
7057                      (count == 0), to substitute the computed
7058                      expression into a single use of the SSA DEF by
7059                      itself (count == 1), or to use a debug temp
7060                      because the SSA DEF is used multiple times or as
7061                      part of a larger expression (count > 1). */
7062                   count++;
7063                   if (gimple_debug_bind_get_value (stmt) != def)
7064                     count++;
7065
7066                   if (count > 1)
7067                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7068                 }
7069
7070               if (!count)
7071                 continue;
7072
7073               struct iv_use dummy_use;
7074               struct iv_cand *best_cand = NULL, *cand;
7075               unsigned i, best_pref = 0, cand_pref;
7076
7077               memset (&dummy_use, 0, sizeof (dummy_use));
7078               dummy_use.iv = info->iv;
7079               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7080                 {
7081                   cand = iv_use (data, i)->selected;
7082                   if (cand == best_cand)
7083                     continue;
7084                   cand_pref = operand_equal_p (cand->iv->step,
7085                                                info->iv->step, 0)
7086                     ? 4 : 0;
7087                   cand_pref
7088                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7089                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7090                     ? 2 : 0;
7091                   cand_pref
7092                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7093                     ? 1 : 0;
7094                   if (best_cand == NULL || best_pref < cand_pref)
7095                     {
7096                       best_cand = cand;
7097                       best_pref = cand_pref;
7098                     }
7099                 }
7100
7101               if (!best_cand)
7102                 continue;
7103
7104               tree comp = get_computation_at (data->current_loop,
7105                                               &dummy_use, best_cand,
7106                                               SSA_NAME_DEF_STMT (def));
7107               if (!comp)
7108                 continue;
7109
7110               if (count > 1)
7111                 {
7112                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7113                   DECL_ARTIFICIAL (vexpr) = 1;
7114                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7115                   if (SSA_NAME_VAR (def))
7116                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7117                   else
7118                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7119                   gdebug *def_temp
7120                     = gimple_build_debug_bind (vexpr, comp, NULL);
7121                   gimple_stmt_iterator gsi;
7122
7123                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7124                     gsi = gsi_after_labels (gimple_bb
7125                                             (SSA_NAME_DEF_STMT (def)));
7126                   else
7127                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7128
7129                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7130                   comp = vexpr;
7131                 }
7132
7133               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7134                 {
7135                   if (!gimple_debug_bind_p (stmt))
7136                     continue;
7137
7138                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7139                     SET_USE (use_p, comp);
7140
7141                   update_stmt (stmt);
7142                 }
7143             }
7144         }
7145     }
7146
7147   release_defs_bitset (toremove);
7148
7149   BITMAP_FREE (toremove);
7150 }
7151
7152 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7153    for hash_map::traverse.  */
7154
7155 bool
7156 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7157 {
7158   free (value);
7159   return true;
7160 }
7161
7162 /* Frees data allocated by the optimization of a single loop.  */
7163
7164 static void
7165 free_loop_data (struct ivopts_data *data)
7166 {
7167   unsigned i, j;
7168   bitmap_iterator bi;
7169   tree obj;
7170
7171   if (data->niters)
7172     {
7173       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7174       delete data->niters;
7175       data->niters = NULL;
7176     }
7177
7178   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7179     {
7180       struct version_info *info;
7181
7182       info = ver_info (data, i);
7183       info->iv = NULL;
7184       info->has_nonlin_use = false;
7185       info->preserve_biv = false;
7186       info->inv_id = 0;
7187     }
7188   bitmap_clear (data->relevant);
7189   bitmap_clear (data->important_candidates);
7190
7191   for (i = 0; i < n_iv_uses (data); i++)
7192     {
7193       struct iv_use *use = iv_use (data, i);
7194       struct iv_use *pre = use, *sub = use->next;
7195
7196       while (sub)
7197         {
7198           gcc_assert (sub->related_cands == NULL);
7199           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7200
7201           pre = sub;
7202           sub = sub->next;
7203           free (pre);
7204         }
7205
7206       BITMAP_FREE (use->related_cands);
7207       for (j = 0; j < use->n_map_members; j++)
7208         if (use->cost_map[j].depends_on)
7209           BITMAP_FREE (use->cost_map[j].depends_on);
7210       free (use->cost_map);
7211       free (use);
7212     }
7213   data->iv_uses.truncate (0);
7214
7215   for (i = 0; i < n_iv_cands (data); i++)
7216     {
7217       struct iv_cand *cand = iv_cand (data, i);
7218
7219       if (cand->depends_on)
7220         BITMAP_FREE (cand->depends_on);
7221       free (cand);
7222     }
7223   data->iv_candidates.truncate (0);
7224
7225   if (data->version_info_size < num_ssa_names)
7226     {
7227       data->version_info_size = 2 * num_ssa_names;
7228       free (data->version_info);
7229       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7230     }
7231
7232   data->max_inv_id = 0;
7233
7234   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7235     SET_DECL_RTL (obj, NULL_RTX);
7236
7237   decl_rtl_to_reset.truncate (0);
7238
7239   data->inv_expr_tab->empty ();
7240   data->inv_expr_id = 0;
7241 }
7242
7243 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7244    loop tree.  */
7245
7246 static void
7247 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7248 {
7249   free_loop_data (data);
7250   free (data->version_info);
7251   BITMAP_FREE (data->relevant);
7252   BITMAP_FREE (data->important_candidates);
7253
7254   decl_rtl_to_reset.release ();
7255   data->iv_uses.release ();
7256   data->iv_candidates.release ();
7257   delete data->inv_expr_tab;
7258   data->inv_expr_tab = NULL;
7259   free_affine_expand_cache (&data->name_expansion_cache);
7260   obstack_free (&data->iv_obstack, NULL);
7261 }
7262
7263 /* Returns true if the loop body BODY includes any function calls.  */
7264
7265 static bool
7266 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7267 {
7268   gimple_stmt_iterator gsi;
7269   unsigned i;
7270
7271   for (i = 0; i < num_nodes; i++)
7272     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7273       {
7274         gimple stmt = gsi_stmt (gsi);
7275         if (is_gimple_call (stmt)
7276             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7277           return true;
7278       }
7279   return false;
7280 }
7281
7282 /* Optimizes the LOOP.  Returns true if anything changed.  */
7283
7284 static bool
7285 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7286 {
7287   bool changed = false;
7288   struct iv_ca *iv_ca;
7289   edge exit = single_dom_exit (loop);
7290   basic_block *body;
7291
7292   gcc_assert (!data->niters);
7293   data->current_loop = loop;
7294   data->loop_loc = find_loop_location (loop);
7295   data->speed = optimize_loop_for_speed_p (loop);
7296
7297   if (dump_file && (dump_flags & TDF_DETAILS))
7298     {
7299       fprintf (dump_file, "Processing loop %d", loop->num);
7300       if (data->loop_loc != UNKNOWN_LOCATION)
7301         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7302                  LOCATION_LINE (data->loop_loc));
7303       fprintf (dump_file, "\n");
7304
7305       if (exit)
7306         {
7307           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7308                    exit->src->index, exit->dest->index);
7309           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7310           fprintf (dump_file, "\n");
7311         }
7312
7313       fprintf (dump_file, "\n");
7314     }
7315
7316   body = get_loop_body (loop);
7317   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7318   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7319   free (body);
7320
7321   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7322
7323   /* For each ssa name determines whether it behaves as an induction variable
7324      in some loop.  */
7325   if (!find_induction_variables (data))
7326     goto finish;
7327
7328   /* Finds interesting uses (item 1).  */
7329   find_interesting_uses (data);
7330   group_address_uses (data);
7331   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7332     goto finish;
7333
7334   /* Finds candidates for the induction variables (item 2).  */
7335   find_iv_candidates (data);
7336
7337   /* Calculates the costs (item 3, part 1).  */
7338   determine_iv_costs (data);
7339   determine_use_iv_costs (data);
7340   determine_set_costs (data);
7341
7342   /* Find the optimal set of induction variables (item 3, part 2).  */
7343   iv_ca = find_optimal_iv_set (data);
7344   if (!iv_ca)
7345     goto finish;
7346   changed = true;
7347
7348   /* Create the new induction variables (item 4, part 1).  */
7349   create_new_ivs (data, iv_ca);
7350   iv_ca_free (&iv_ca);
7351
7352   /* Rewrite the uses (item 4, part 2).  */
7353   rewrite_uses (data);
7354
7355   /* Remove the ivs that are unused after rewriting.  */
7356   remove_unused_ivs (data);
7357
7358   /* We have changed the structure of induction variables; it might happen
7359      that definitions in the scev database refer to some of them that were
7360      eliminated.  */
7361   scev_reset ();
7362
7363 finish:
7364   free_loop_data (data);
7365
7366   return changed;
7367 }
7368
7369 /* Main entry point.  Optimizes induction variables in loops.  */
7370
7371 void
7372 tree_ssa_iv_optimize (void)
7373 {
7374   struct loop *loop;
7375   struct ivopts_data data;
7376
7377   tree_ssa_iv_optimize_init (&data);
7378
7379   /* Optimize the loops starting with the innermost ones.  */
7380   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7381     {
7382       if (dump_file && (dump_flags & TDF_DETAILS))
7383         flow_loop_dump (loop, dump_file, NULL, 1);
7384
7385       tree_ssa_iv_optimize_loop (&data, loop);
7386     }
7387
7388   tree_ssa_iv_optimize_finalize (&data);
7389 }