gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "hash-set.h"
  69 #include "machmode.h"
  70 #include "vec.h"
  71 #include "double-int.h"
  72 #include "input.h"
  73 #include "alias.h"
  74 #include "symtab.h"
  75 #include "wide-int.h"
  76 #include "inchash.h"
  77 #include "tree.h"
  78 #include "fold-const.h"
  79 #include "stor-layout.h"
  80 #include "tm_p.h"
  81 #include "predict.h"
  82 #include "hard-reg-set.h"
  83 #include "function.h"
  84 #include "dominance.h"
  85 #include "cfg.h"
  86 #include "basic-block.h"
  87 #include "gimple-pretty-print.h"
  88 #include "hash-map.h"
  89 #include "hash-table.h"
  90 #include "tree-ssa-alias.h"
  91 #include "internal-fn.h"
  92 #include "tree-eh.h"
  93 #include "gimple-expr.h"
  94 #include "is-a.h"
  95 #include "gimple.h"
  96 #include "gimplify.h"
  97 #include "gimple-iterator.h"
  98 #include "gimplify-me.h"
  99 #include "gimple-ssa.h"
 100 #include "plugin-api.h"
 101 #include "ipa-ref.h"
 102 #include "cgraph.h"
 103 #include "tree-cfg.h"
 104 #include "tree-phinodes.h"
 105 #include "ssa-iterators.h"
 106 #include "stringpool.h"
 107 #include "tree-ssanames.h"
 108 #include "tree-ssa-loop-ivopts.h"
 109 #include "tree-ssa-loop-manip.h"
 110 #include "tree-ssa-loop-niter.h"
 111 #include "tree-ssa-loop.h"
 112 #include "hashtab.h"
 113 #include "rtl.h"
 114 #include "flags.h"
 115 #include "statistics.h"
 116 #include "real.h"
 117 #include "fixed-value.h"
 118 #include "insn-config.h"
 119 #include "expmed.h"
 120 #include "dojump.h"
 121 #include "explow.h"
 122 #include "calls.h"
 123 #include "emit-rtl.h"
 124 #include "varasm.h"
 125 #include "stmt.h"
 126 #include "expr.h"
 127 #include "tree-dfa.h"
 128 #include "tree-ssa.h"
 129 #include "cfgloop.h"
 130 #include "tree-pass.h"
 131 #include "tree-chrec.h"
 132 #include "tree-scalar-evolution.h"
 133 #include "params.h"
 134 #include "langhooks.h"
 135 #include "tree-affine.h"
 136 #include "target.h"
 137 #include "tree-inline.h"
 138 #include "tree-ssa-propagate.h"
 139 #include "tree-ssa-address.h"
 140 #include "builtins.h"
 141 #include "tree-vectorizer.h"
 142
 143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 144    cost of different addressing modes.  This should be moved to a TBD
 145    interface between the GIMPLE and RTL worlds.  */
 146 #include "recog.h"
 147
 148 /* The infinite cost.  */
 149 #define INFTY 10000000
 150
 151 #define AVG_LOOP_NITER(LOOP) 5
 152
 153 /* Returns the expected number of loop iterations for LOOP.
 154    The average trip count is computed from profile data if it
 155    exists. */
 156
 157 static inline HOST_WIDE_INT
 158 avg_loop_niter (struct loop *loop)
 159 {
 160   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 161   if (niter == -1)
 162     return AVG_LOOP_NITER (loop);
 163
 164   return niter;
 165 }
 166
 167 /* Representation of the induction variable.  */
 168 struct iv
 169 {
 170   tree base;            /* Initial value of the iv.  */
 171   tree base_object;     /* A memory object to that the induction variable points.  */
 172   tree step;            /* Step of the iv (constant only).  */
 173   tree ssa_name;        /* The ssa name with the value.  */
 174   unsigned use_id;      /* The identifier in the use if it is the case.  */
 175   bool biv_p;           /* Is it a biv?  */
 176   bool have_use_for;    /* Do we already have a use for it?  */
 177   bool no_overflow;     /* True if the iv doesn't overflow.  */
 178 };
 179
 180 /* Per-ssa version information (induction variable descriptions, etc.).  */
 181 struct version_info
 182 {
 183   tree name;            /* The ssa name.  */
 184   struct iv *iv;        /* Induction variable description.  */
 185   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 186                            an expression that is not an induction variable.  */
 187   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 188   unsigned inv_id;      /* Id of an invariant.  */
 189 };
 190
 191 /* Types of uses.  */
 192 enum use_type
 193 {
 194   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 195   USE_ADDRESS,          /* Use in an address.  */
 196   USE_COMPARE           /* Use is a compare.  */
 197 };
 198
 199 /* Cost of a computation.  */
 200 typedef struct
 201 {
 202   int cost;             /* The runtime cost.  */
 203   unsigned complexity;  /* The estimate of the complexity of the code for
 204                            the computation (in no concrete units --
 205                            complexity field should be larger for more
 206                            complex expressions and addressing modes).  */
 207 } comp_cost;
 208
 209 static const comp_cost no_cost = {0, 0};
 210 static const comp_cost infinite_cost = {INFTY, INFTY};
 211
 212 /* The candidate - cost pair.  */
 213 struct cost_pair
 214 {
 215   struct iv_cand *cand; /* The candidate.  */
 216   comp_cost cost;       /* The cost.  */
 217   bitmap depends_on;    /* The list of invariants that have to be
 218                            preserved.  */
 219   tree value;           /* For final value elimination, the expression for
 220                            the final value of the iv.  For iv elimination,
 221                            the new bound to compare with.  */
 222   enum tree_code comp;  /* For iv elimination, the comparison.  */
 223   int inv_expr_id;      /* Loop invariant expression id.  */
 224 };
 225
 226 /* Use.  */
 227 struct iv_use
 228 {
 229   unsigned id;          /* The id of the use.  */
 230   unsigned sub_id;      /* The id of the sub use.  */
 231   enum use_type type;   /* Type of the use.  */
 232   struct iv *iv;        /* The induction variable it is based on.  */
 233   gimple stmt;          /* Statement in that it occurs.  */
 234   tree *op_p;           /* The place where it occurs.  */
 235   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 236                            important ones.  */
 237
 238   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 239   struct cost_pair *cost_map;
 240                         /* The costs wrto the iv candidates.  */
 241
 242   struct iv_cand *selected;
 243                         /* The selected candidate.  */
 244
 245   struct iv_use *next;  /* The next sub use.  */
 246   tree addr_base;       /* Base address with const offset stripped.  */
 247   unsigned HOST_WIDE_INT addr_offset;
 248                         /* Const offset stripped from base address.  */
 249 };
 250
 251 /* The position where the iv is computed.  */
 252 enum iv_position
 253 {
 254   IP_NORMAL,            /* At the end, just before the exit condition.  */
 255   IP_END,               /* At the end of the latch block.  */
 256   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 257   IP_AFTER_USE,         /* Immediately after a specific use.  */
 258   IP_ORIGINAL           /* The original biv.  */
 259 };
 260
 261 /* The induction variable candidate.  */
 262 struct iv_cand
 263 {
 264   unsigned id;          /* The number of the candidate.  */
 265   bool important;       /* Whether this is an "important" candidate, i.e. such
 266                            that it should be considered by all uses.  */
 267   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 268   gimple incremented_at;/* For original biv, the statement where it is
 269                            incremented.  */
 270   tree var_before;      /* The variable used for it before increment.  */
 271   tree var_after;       /* The variable used for it after increment.  */
 272   struct iv *iv;        /* The value of the candidate.  NULL for
 273                            "pseudocandidate" used to indicate the possibility
 274                            to replace the final value of an iv by direct
 275                            computation of the value.  */
 276   unsigned cost;        /* Cost of the candidate.  */
 277   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 278   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 279                               where it is incremented.  */
 280   bitmap depends_on;    /* The list of invariants that are used in step of the
 281                            biv.  */
 282 };
 283
 284 /* Loop invariant expression hashtable entry.  */
 285 struct iv_inv_expr_ent
 286 {
 287   tree expr;
 288   int id;
 289   hashval_t hash;
 290 };
 291
 292 /* The data used by the induction variable optimizations.  */
 293
 294 typedef struct iv_use *iv_use_p;
 295
 296 typedef struct iv_cand *iv_cand_p;
 297
 298 /* Hashtable helpers.  */
 299
 300 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 301 {
 302   typedef iv_inv_expr_ent *value_type;
 303   typedef iv_inv_expr_ent *compare_type;
 304   static inline hashval_t hash (const iv_inv_expr_ent *);
 305   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 306 };
 307
 308 /* Hash function for loop invariant expressions.  */
 309
 310 inline hashval_t
 311 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 312 {
 313   return expr->hash;
 314 }
 315
 316 /* Hash table equality function for expressions.  */
 317
 318 inline bool
 319 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 320                            const iv_inv_expr_ent *expr2)
 321 {
 322   return expr1->hash == expr2->hash
 323          && operand_equal_p (expr1->expr, expr2->expr, 0);
 324 }
 325
 326 struct ivopts_data
 327 {
 328   /* The currently optimized loop.  */
 329   struct loop *current_loop;
 330   source_location loop_loc;
 331
 332   /* Numbers of iterations for all exits of the current loop.  */
 333   hash_map<edge, tree_niter_desc *> *niters;
 334
 335   /* Number of registers used in it.  */
 336   unsigned regs_used;
 337
 338   /* The size of version_info array allocated.  */
 339   unsigned version_info_size;
 340
 341   /* The array of information for the ssa names.  */
 342   struct version_info *version_info;
 343
 344   /* The hashtable of loop invariant expressions created
 345      by ivopt.  */
 346   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 347
 348   /* Loop invariant expression id.  */
 349   int inv_expr_id;
 350
 351   /* The bitmap of indices in version_info whose value was changed.  */
 352   bitmap relevant;
 353
 354   /* The uses of induction variables.  */
 355   vec<iv_use_p> iv_uses;
 356
 357   /* The candidates.  */
 358   vec<iv_cand_p> iv_candidates;
 359
 360   /* A bitmap of important candidates.  */
 361   bitmap important_candidates;
 362
 363   /* Cache used by tree_to_aff_combination_expand.  */
 364   hash_map<tree, name_expansion *> *name_expansion_cache;
 365
 366   /* The maximum invariant id.  */
 367   unsigned max_inv_id;
 368
 369   /* Whether to consider just related and important candidates when replacing a
 370      use.  */
 371   bool consider_all_candidates;
 372
 373   /* Are we optimizing for speed?  */
 374   bool speed;
 375
 376   /* Whether the loop body includes any function calls.  */
 377   bool body_includes_call;
 378
 379   /* Whether the loop body can only be exited via single exit.  */
 380   bool loop_single_exit_p;
 381 };
 382
 383 /* An assignment of iv candidates to uses.  */
 384
 385 struct iv_ca
 386 {
 387   /* The number of uses covered by the assignment.  */
 388   unsigned upto;
 389
 390   /* Number of uses that cannot be expressed by the candidates in the set.  */
 391   unsigned bad_uses;
 392
 393   /* Candidate assigned to a use, together with the related costs.  */
 394   struct cost_pair **cand_for_use;
 395
 396   /* Number of times each candidate is used.  */
 397   unsigned *n_cand_uses;
 398
 399   /* The candidates used.  */
 400   bitmap cands;
 401
 402   /* The number of candidates in the set.  */
 403   unsigned n_cands;
 404
 405   /* Total number of registers needed.  */
 406   unsigned n_regs;
 407
 408   /* Total cost of expressing uses.  */
 409   comp_cost cand_use_cost;
 410
 411   /* Total cost of candidates.  */
 412   unsigned cand_cost;
 413
 414   /* Number of times each invariant is used.  */
 415   unsigned *n_invariant_uses;
 416
 417   /* The array holding the number of uses of each loop
 418      invariant expressions created by ivopt.  */
 419   unsigned *used_inv_expr;
 420
 421   /* The number of created loop invariants.  */
 422   unsigned num_used_inv_expr;
 423
 424   /* Total cost of the assignment.  */
 425   comp_cost cost;
 426 };
 427
 428 /* Difference of two iv candidate assignments.  */
 429
 430 struct iv_ca_delta
 431 {
 432   /* Changed use.  */
 433   struct iv_use *use;
 434
 435   /* An old assignment (for rollback purposes).  */
 436   struct cost_pair *old_cp;
 437
 438   /* A new assignment.  */
 439   struct cost_pair *new_cp;
 440
 441   /* Next change in the list.  */
 442   struct iv_ca_delta *next_change;
 443 };
 444
 445 /* Bound on number of candidates below that all candidates are considered.  */
 446
 447 #define CONSIDER_ALL_CANDIDATES_BOUND \
 448   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 449
 450 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 451    optimizing such a loop would help, and it would take ages).  */
 452
 453 #define MAX_CONSIDERED_USES \
 454   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 455
 456 /* If there are at most this number of ivs in the set, try removing unnecessary
 457    ivs from the set always.  */
 458
 459 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 460   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 461
 462 /* The list of trees for that the decl_rtl field must be reset is stored
 463    here.  */
 464
 465 static vec<tree> decl_rtl_to_reset;
 466
 467 static comp_cost force_expr_to_var_cost (tree, bool);
 468
 469 /* Number of uses recorded in DATA.  */
 470
 471 static inline unsigned
 472 n_iv_uses (struct ivopts_data *data)
 473 {
 474   return data->iv_uses.length ();
 475 }
 476
 477 /* Ith use recorded in DATA.  */
 478
 479 static inline struct iv_use *
 480 iv_use (struct ivopts_data *data, unsigned i)
 481 {
 482   return data->iv_uses[i];
 483 }
 484
 485 /* Number of candidates recorded in DATA.  */
 486
 487 static inline unsigned
 488 n_iv_cands (struct ivopts_data *data)
 489 {
 490   return data->iv_candidates.length ();
 491 }
 492
 493 /* Ith candidate recorded in DATA.  */
 494
 495 static inline struct iv_cand *
 496 iv_cand (struct ivopts_data *data, unsigned i)
 497 {
 498   return data->iv_candidates[i];
 499 }
 500
 501 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 502
 503 edge
 504 single_dom_exit (struct loop *loop)
 505 {
 506   edge exit = single_exit (loop);
 507
 508   if (!exit)
 509     return NULL;
 510
 511   if (!just_once_each_iteration_p (loop, exit->src))
 512     return NULL;
 513
 514   return exit;
 515 }
 516
 517 /* Dumps information about the induction variable IV to FILE.  */
 518
 519 void
 520 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 521 {
 522   if (iv->ssa_name && dump_name)
 523     {
 524       fprintf (file, "ssa name ");
 525       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 526       fprintf (file, "\n");
 527     }
 528
 529   fprintf (file, "  type ");
 530   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 531   fprintf (file, "\n");
 532
 533   if (iv->step)
 534     {
 535       fprintf (file, "  base ");
 536       print_generic_expr (file, iv->base, TDF_SLIM);
 537       fprintf (file, "\n");
 538
 539       fprintf (file, "  step ");
 540       print_generic_expr (file, iv->step, TDF_SLIM);
 541       fprintf (file, "\n");
 542     }
 543   else
 544     {
 545       fprintf (file, "  invariant ");
 546       print_generic_expr (file, iv->base, TDF_SLIM);
 547       fprintf (file, "\n");
 548     }
 549
 550   if (iv->base_object)
 551     {
 552       fprintf (file, "  base object ");
 553       print_generic_expr (file, iv->base_object, TDF_SLIM);
 554       fprintf (file, "\n");
 555     }
 556
 557   if (iv->biv_p)
 558     fprintf (file, "  is a biv\n");
 559 }
 560
 561 /* Dumps information about the USE to FILE.  */
 562
 563 void
 564 dump_use (FILE *file, struct iv_use *use)
 565 {
 566   fprintf (file, "use %d", use->id);
 567   if (use->sub_id)
 568     fprintf (file, ".%d", use->sub_id);
 569
 570   fprintf (file, "\n");
 571
 572   switch (use->type)
 573     {
 574     case USE_NONLINEAR_EXPR:
 575       fprintf (file, "  generic\n");
 576       break;
 577
 578     case USE_ADDRESS:
 579       fprintf (file, "  address\n");
 580       break;
 581
 582     case USE_COMPARE:
 583       fprintf (file, "  compare\n");
 584       break;
 585
 586     default:
 587       gcc_unreachable ();
 588     }
 589
 590   fprintf (file, "  in statement ");
 591   print_gimple_stmt (file, use->stmt, 0, 0);
 592   fprintf (file, "\n");
 593
 594   fprintf (file, "  at position ");
 595   if (use->op_p)
 596     print_generic_expr (file, *use->op_p, TDF_SLIM);
 597   fprintf (file, "\n");
 598
 599   dump_iv (file, use->iv, false);
 600
 601   if (use->related_cands)
 602     {
 603       fprintf (file, "  related candidates ");
 604       dump_bitmap (file, use->related_cands);
 605     }
 606 }
 607
 608 /* Dumps information about the uses to FILE.  */
 609
 610 void
 611 dump_uses (FILE *file, struct ivopts_data *data)
 612 {
 613   unsigned i;
 614   struct iv_use *use;
 615
 616   for (i = 0; i < n_iv_uses (data); i++)
 617     {
 618       use = iv_use (data, i);
 619       do
 620         {
 621           dump_use (file, use);
 622           use = use->next;
 623         }
 624       while (use);
 625       fprintf (file, "\n");
 626     }
 627 }
 628
 629 /* Dumps information about induction variable candidate CAND to FILE.  */
 630
 631 void
 632 dump_cand (FILE *file, struct iv_cand *cand)
 633 {
 634   struct iv *iv = cand->iv;
 635
 636   fprintf (file, "candidate %d%s\n",
 637            cand->id, cand->important ? " (important)" : "");
 638
 639   if (cand->depends_on)
 640     {
 641       fprintf (file, "  depends on ");
 642       dump_bitmap (file, cand->depends_on);
 643     }
 644
 645   if (!iv)
 646     {
 647       fprintf (file, "  final value replacement\n");
 648       return;
 649     }
 650
 651   if (cand->var_before)
 652     {
 653       fprintf (file, "  var_before ");
 654       print_generic_expr (file, cand->var_before, TDF_SLIM);
 655       fprintf (file, "\n");
 656     }
 657   if (cand->var_after)
 658     {
 659       fprintf (file, "  var_after ");
 660       print_generic_expr (file, cand->var_after, TDF_SLIM);
 661       fprintf (file, "\n");
 662     }
 663
 664   switch (cand->pos)
 665     {
 666     case IP_NORMAL:
 667       fprintf (file, "  incremented before exit test\n");
 668       break;
 669
 670     case IP_BEFORE_USE:
 671       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 672       break;
 673
 674     case IP_AFTER_USE:
 675       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 676       break;
 677
 678     case IP_END:
 679       fprintf (file, "  incremented at end\n");
 680       break;
 681
 682     case IP_ORIGINAL:
 683       fprintf (file, "  original biv\n");
 684       break;
 685     }
 686
 687   dump_iv (file, iv, false);
 688 }
 689
 690 /* Returns the info for ssa version VER.  */
 691
 692 static inline struct version_info *
 693 ver_info (struct ivopts_data *data, unsigned ver)
 694 {
 695   return data->version_info + ver;
 696 }
 697
 698 /* Returns the info for ssa name NAME.  */
 699
 700 static inline struct version_info *
 701 name_info (struct ivopts_data *data, tree name)
 702 {
 703   return ver_info (data, SSA_NAME_VERSION (name));
 704 }
 705
 706 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 707    emitted in LOOP.  */
 708
 709 static bool
 710 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 711 {
 712   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 713
 714   gcc_assert (bb);
 715
 716   if (sbb == loop->latch)
 717     return true;
 718
 719   if (sbb != bb)
 720     return false;
 721
 722   return stmt == last_stmt (bb);
 723 }
 724
 725 /* Returns true if STMT if after the place where the original induction
 726    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 727    if the positions are identical.  */
 728
 729 static bool
 730 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 731 {
 732   basic_block cand_bb = gimple_bb (cand->incremented_at);
 733   basic_block stmt_bb = gimple_bb (stmt);
 734
 735   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 736     return false;
 737
 738   if (stmt_bb != cand_bb)
 739     return true;
 740
 741   if (true_if_equal
 742       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 743     return true;
 744   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 745 }
 746
 747 /* Returns true if STMT if after the place where the induction variable
 748    CAND is incremented in LOOP.  */
 749
 750 static bool
 751 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 752 {
 753   switch (cand->pos)
 754     {
 755     case IP_END:
 756       return false;
 757
 758     case IP_NORMAL:
 759       return stmt_after_ip_normal_pos (loop, stmt);
 760
 761     case IP_ORIGINAL:
 762     case IP_AFTER_USE:
 763       return stmt_after_inc_pos (cand, stmt, false);
 764
 765     case IP_BEFORE_USE:
 766       return stmt_after_inc_pos (cand, stmt, true);
 767
 768     default:
 769       gcc_unreachable ();
 770     }
 771 }
 772
 773 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 774
 775 static bool
 776 abnormal_ssa_name_p (tree exp)
 777 {
 778   if (!exp)
 779     return false;
 780
 781   if (TREE_CODE (exp) != SSA_NAME)
 782     return false;
 783
 784   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 785 }
 786
 787 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 788    abnormal phi node.  Callback for for_each_index.  */
 789
 790 static bool
 791 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 792                                   void *data ATTRIBUTE_UNUSED)
 793 {
 794   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 795     {
 796       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 797         return false;
 798       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 799         return false;
 800     }
 801
 802   return !abnormal_ssa_name_p (*index);
 803 }
 804
 805 /* Returns true if EXPR contains a ssa name that occurs in an
 806    abnormal phi node.  */
 807
 808 bool
 809 contains_abnormal_ssa_name_p (tree expr)
 810 {
 811   enum tree_code code;
 812   enum tree_code_class codeclass;
 813
 814   if (!expr)
 815     return false;
 816
 817   code = TREE_CODE (expr);
 818   codeclass = TREE_CODE_CLASS (code);
 819
 820   if (code == SSA_NAME)
 821     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 822
 823   if (code == INTEGER_CST
 824       || is_gimple_min_invariant (expr))
 825     return false;
 826
 827   if (code == ADDR_EXPR)
 828     return !for_each_index (&TREE_OPERAND (expr, 0),
 829                             idx_contains_abnormal_ssa_name_p,
 830                             NULL);
 831
 832   if (code == COND_EXPR)
 833     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 834       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 835       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 836
 837   switch (codeclass)
 838     {
 839     case tcc_binary:
 840     case tcc_comparison:
 841       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 842         return true;
 843
 844       /* Fallthru.  */
 845     case tcc_unary:
 846       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 847         return true;
 848
 849       break;
 850
 851     default:
 852       gcc_unreachable ();
 853     }
 854
 855   return false;
 856 }
 857
 858 /*  Returns the structure describing number of iterations determined from
 859     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 860
 861 static struct tree_niter_desc *
 862 niter_for_exit (struct ivopts_data *data, edge exit)
 863 {
 864   struct tree_niter_desc *desc;
 865   tree_niter_desc **slot;
 866
 867   if (!data->niters)
 868     {
 869       data->niters = new hash_map<edge, tree_niter_desc *>;
 870       slot = NULL;
 871     }
 872   else
 873     slot = data->niters->get (exit);
 874
 875   if (!slot)
 876     {
 877       /* Try to determine number of iterations.  We cannot safely work with ssa
 878          names that appear in phi nodes on abnormal edges, so that we do not
 879          create overlapping life ranges for them (PR 27283).  */
 880       desc = XNEW (struct tree_niter_desc);
 881       if (!number_of_iterations_exit (data->current_loop,
 882                                       exit, desc, true)
 883           || contains_abnormal_ssa_name_p (desc->niter))
 884         {
 885           XDELETE (desc);
 886           desc = NULL;
 887         }
 888       data->niters->put (exit, desc);
 889     }
 890   else
 891     desc = *slot;
 892
 893   return desc;
 894 }
 895
 896 /* Returns the structure describing number of iterations determined from
 897    single dominating exit of DATA->current_loop, or NULL if something
 898    goes wrong.  */
 899
 900 static struct tree_niter_desc *
 901 niter_for_single_dom_exit (struct ivopts_data *data)
 902 {
 903   edge exit = single_dom_exit (data->current_loop);
 904
 905   if (!exit)
 906     return NULL;
 907
 908   return niter_for_exit (data, exit);
 909 }
 910
 911 /* Initializes data structures used by the iv optimization pass, stored
 912    in DATA.  */
 913
 914 static void
 915 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 916 {
 917   data->version_info_size = 2 * num_ssa_names;
 918   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 919   data->relevant = BITMAP_ALLOC (NULL);
 920   data->important_candidates = BITMAP_ALLOC (NULL);
 921   data->max_inv_id = 0;
 922   data->niters = NULL;
 923   data->iv_uses.create (20);
 924   data->iv_candidates.create (20);
 925   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 926   data->inv_expr_id = 0;
 927   data->name_expansion_cache = NULL;
 928   decl_rtl_to_reset.create (20);
 929 }
 930
 931 /* Returns a memory object to that EXPR points.  In case we are able to
 932    determine that it does not point to any such object, NULL is returned.  */
 933
 934 static tree
 935 determine_base_object (tree expr)
 936 {
 937   enum tree_code code = TREE_CODE (expr);
 938   tree base, obj;
 939
 940   /* If this is a pointer casted to any type, we need to determine
 941      the base object for the pointer; so handle conversions before
 942      throwing away non-pointer expressions.  */
 943   if (CONVERT_EXPR_P (expr))
 944     return determine_base_object (TREE_OPERAND (expr, 0));
 945
 946   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 947     return NULL_TREE;
 948
 949   switch (code)
 950     {
 951     case INTEGER_CST:
 952       return NULL_TREE;
 953
 954     case ADDR_EXPR:
 955       obj = TREE_OPERAND (expr, 0);
 956       base = get_base_address (obj);
 957
 958       if (!base)
 959         return expr;
 960
 961       if (TREE_CODE (base) == MEM_REF)
 962         return determine_base_object (TREE_OPERAND (base, 0));
 963
 964       return fold_convert (ptr_type_node,
 965                            build_fold_addr_expr (base));
 966
 967     case POINTER_PLUS_EXPR:
 968       return determine_base_object (TREE_OPERAND (expr, 0));
 969
 970     case PLUS_EXPR:
 971     case MINUS_EXPR:
 972       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 973       gcc_unreachable ();
 974
 975     default:
 976       return fold_convert (ptr_type_node, expr);
 977     }
 978 }
 979
 980 /* Return true if address expression with non-DECL_P operand appears
 981    in EXPR.  */
 982
 983 static bool
 984 contain_complex_addr_expr (tree expr)
 985 {
 986   bool res = false;
 987
 988   STRIP_NOPS (expr);
 989   switch (TREE_CODE (expr))
 990     {
 991     case POINTER_PLUS_EXPR:
 992     case PLUS_EXPR:
 993     case MINUS_EXPR:
 994       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 995       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 996       break;
 997
 998     case ADDR_EXPR:
 999       return (!DECL_P (TREE_OPERAND (expr, 0)));
1000
1001     default:
1002       return false;
1003     }
1004
1005   return res;
1006 }
1007
1008 /* Allocates an induction variable with given initial value BASE and step STEP
1009    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1010
1011 static struct iv *
1012 alloc_iv (tree base, tree step, bool no_overflow = false)
1013 {
1014   tree expr = base;
1015   struct iv *iv = XCNEW (struct iv);
1016   gcc_assert (step != NULL_TREE);
1017
1018   /* Lower address expression in base except ones with DECL_P as operand.
1019      By doing this:
1020        1) More accurate cost can be computed for address expressions;
1021        2) Duplicate candidates won't be created for bases in different
1022           forms, like &a[0] and &a.  */
1023   STRIP_NOPS (expr);
1024   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1025       || contain_complex_addr_expr (expr))
1026     {
1027       aff_tree comb;
1028       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1029       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1030     }
1031
1032   iv->base = base;
1033   iv->base_object = determine_base_object (base);
1034   iv->step = step;
1035   iv->biv_p = false;
1036   iv->have_use_for = false;
1037   iv->use_id = 0;
1038   iv->ssa_name = NULL_TREE;
1039   iv->no_overflow = no_overflow;
1040
1041   return iv;
1042 }
1043
1044 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1045    doesn't overflow.  */
1046
1047 static void
1048 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1049         bool no_overflow)
1050 {
1051   struct version_info *info = name_info (data, iv);
1052
1053   gcc_assert (!info->iv);
1054
1055   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1056   info->iv = alloc_iv (base, step, no_overflow);
1057   info->iv->ssa_name = iv;
1058 }
1059
1060 /* Finds induction variable declaration for VAR.  */
1061
1062 static struct iv *
1063 get_iv (struct ivopts_data *data, tree var)
1064 {
1065   basic_block bb;
1066   tree type = TREE_TYPE (var);
1067
1068   if (!POINTER_TYPE_P (type)
1069       && !INTEGRAL_TYPE_P (type))
1070     return NULL;
1071
1072   if (!name_info (data, var)->iv)
1073     {
1074       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1075
1076       if (!bb
1077           || !flow_bb_inside_loop_p (data->current_loop, bb))
1078         set_iv (data, var, var, build_int_cst (type, 0), true);
1079     }
1080
1081   return name_info (data, var)->iv;
1082 }
1083
1084 /* Return the first non-invariant ssa var found in EXPR.  */
1085
1086 static tree
1087 extract_single_var_from_expr (tree expr)
1088 {
1089   int i, n;
1090   tree tmp;
1091   enum tree_code code;
1092
1093   if (!expr || is_gimple_min_invariant (expr))
1094     return NULL;
1095
1096   code = TREE_CODE (expr);
1097   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1098     {
1099       n = TREE_OPERAND_LENGTH (expr);
1100       for (i = 0; i < n; i++)
1101         {
1102           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1103
1104           if (tmp)
1105             return tmp;
1106         }
1107     }
1108   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1109 }
1110
1111 /* Finds basic ivs.  */
1112
1113 static bool
1114 find_bivs (struct ivopts_data *data)
1115 {
1116   gphi *phi;
1117   affine_iv iv;
1118   tree step, type, base, stop;
1119   bool found = false;
1120   struct loop *loop = data->current_loop;
1121   gphi_iterator psi;
1122
1123   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1124     {
1125       phi = psi.phi ();
1126
1127       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1128         continue;
1129
1130       if (virtual_operand_p (PHI_RESULT (phi)))
1131         continue;
1132
1133       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1134         continue;
1135
1136       if (integer_zerop (iv.step))
1137         continue;
1138
1139       step = iv.step;
1140       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1141       /* Stop expanding iv base at the first ssa var referred by iv step.
1142          Ideally we should stop at any ssa var, because that's expensive
1143          and unusual to happen, we just do it on the first one.
1144
1145          See PR64705 for the rationale.  */
1146       stop = extract_single_var_from_expr (step);
1147       base = expand_simple_operations (base, stop);
1148       if (contains_abnormal_ssa_name_p (base)
1149           || contains_abnormal_ssa_name_p (step))
1150         continue;
1151
1152       type = TREE_TYPE (PHI_RESULT (phi));
1153       base = fold_convert (type, base);
1154       if (step)
1155         {
1156           if (POINTER_TYPE_P (type))
1157             step = convert_to_ptrofftype (step);
1158           else
1159             step = fold_convert (type, step);
1160         }
1161
1162       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1163       found = true;
1164     }
1165
1166   return found;
1167 }
1168
1169 /* Marks basic ivs.  */
1170
1171 static void
1172 mark_bivs (struct ivopts_data *data)
1173 {
1174   gphi *phi;
1175   gimple def;
1176   tree var;
1177   struct iv *iv, *incr_iv;
1178   struct loop *loop = data->current_loop;
1179   basic_block incr_bb;
1180   gphi_iterator psi;
1181
1182   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1183     {
1184       phi = psi.phi ();
1185
1186       iv = get_iv (data, PHI_RESULT (phi));
1187       if (!iv)
1188         continue;
1189
1190       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1191       def = SSA_NAME_DEF_STMT (var);
1192       /* Don't mark iv peeled from other one as biv.  */
1193       if (def
1194           && gimple_code (def) == GIMPLE_PHI
1195           && gimple_bb (def) == loop->header)
1196         continue;
1197
1198       incr_iv = get_iv (data, var);
1199       if (!incr_iv)
1200         continue;
1201
1202       /* If the increment is in the subloop, ignore it.  */
1203       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1204       if (incr_bb->loop_father != data->current_loop
1205           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1206         continue;
1207
1208       iv->biv_p = true;
1209       incr_iv->biv_p = true;
1210     }
1211 }
1212
1213 /* Checks whether STMT defines a linear induction variable and stores its
1214    parameters to IV.  */
1215
1216 static bool
1217 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1218 {
1219   tree lhs, stop;
1220   struct loop *loop = data->current_loop;
1221
1222   iv->base = NULL_TREE;
1223   iv->step = NULL_TREE;
1224
1225   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1226     return false;
1227
1228   lhs = gimple_assign_lhs (stmt);
1229   if (TREE_CODE (lhs) != SSA_NAME)
1230     return false;
1231
1232   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1233     return false;
1234
1235   /* Stop expanding iv base at the first ssa var referred by iv step.
1236      Ideally we should stop at any ssa var, because that's expensive
1237      and unusual to happen, we just do it on the first one.
1238
1239      See PR64705 for the rationale.  */
1240   stop = extract_single_var_from_expr (iv->step);
1241   iv->base = expand_simple_operations (iv->base, stop);
1242   if (contains_abnormal_ssa_name_p (iv->base)
1243       || contains_abnormal_ssa_name_p (iv->step))
1244     return false;
1245
1246   /* If STMT could throw, then do not consider STMT as defining a GIV.
1247      While this will suppress optimizations, we can not safely delete this
1248      GIV and associated statements, even if it appears it is not used.  */
1249   if (stmt_could_throw_p (stmt))
1250     return false;
1251
1252   return true;
1253 }
1254
1255 /* Finds general ivs in statement STMT.  */
1256
1257 static void
1258 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1259 {
1260   affine_iv iv;
1261
1262   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1263     return;
1264
1265   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1266 }
1267
1268 /* Finds general ivs in basic block BB.  */
1269
1270 static void
1271 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1272 {
1273   gimple_stmt_iterator bsi;
1274
1275   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1276     find_givs_in_stmt (data, gsi_stmt (bsi));
1277 }
1278
1279 /* Finds general ivs.  */
1280
1281 static void
1282 find_givs (struct ivopts_data *data)
1283 {
1284   struct loop *loop = data->current_loop;
1285   basic_block *body = get_loop_body_in_dom_order (loop);
1286   unsigned i;
1287
1288   for (i = 0; i < loop->num_nodes; i++)
1289     find_givs_in_bb (data, body[i]);
1290   free (body);
1291 }
1292
1293 /* For each ssa name defined in LOOP determines whether it is an induction
1294    variable and if so, its initial value and step.  */
1295
1296 static bool
1297 find_induction_variables (struct ivopts_data *data)
1298 {
1299   unsigned i;
1300   bitmap_iterator bi;
1301
1302   if (!find_bivs (data))
1303     return false;
1304
1305   find_givs (data);
1306   mark_bivs (data);
1307
1308   if (dump_file && (dump_flags & TDF_DETAILS))
1309     {
1310       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1311
1312       if (niter)
1313         {
1314           fprintf (dump_file, "  number of iterations ");
1315           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1316           if (!integer_zerop (niter->may_be_zero))
1317             {
1318               fprintf (dump_file, "; zero if ");
1319               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1320             }
1321           fprintf (dump_file, "\n\n");
1322         };
1323
1324       fprintf (dump_file, "Induction variables:\n\n");
1325
1326       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1327         {
1328           if (ver_info (data, i)->iv)
1329             dump_iv (dump_file, ver_info (data, i)->iv, true);
1330         }
1331     }
1332
1333   return true;
1334 }
1335
1336 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1337    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1338    is the const offset stripped from IV base.  For uses of other types,
1339    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1340
1341 static struct iv_use *
1342 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1343             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1344             unsigned HOST_WIDE_INT addr_offset = 0)
1345 {
1346   struct iv_use *use = XCNEW (struct iv_use);
1347
1348   use->id = n_iv_uses (data);
1349   use->sub_id = 0;
1350   use->type = use_type;
1351   use->iv = iv;
1352   use->stmt = stmt;
1353   use->op_p = use_p;
1354   use->related_cands = BITMAP_ALLOC (NULL);
1355   use->next = NULL;
1356   use->addr_base = addr_base;
1357   use->addr_offset = addr_offset;
1358
1359   data->iv_uses.safe_push (use);
1360
1361   return use;
1362 }
1363
1364 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1365    The sub use is recorded under the one whose use id is ID_GROUP.  */
1366
1367 static struct iv_use *
1368 record_sub_use (struct ivopts_data *data, tree *use_p,
1369                     struct iv *iv, gimple stmt, enum use_type use_type,
1370                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1371                     unsigned int id_group)
1372 {
1373   struct iv_use *use = XCNEW (struct iv_use);
1374   struct iv_use *group = iv_use (data, id_group);
1375
1376   use->id = group->id;
1377   use->sub_id = 0;
1378   use->type = use_type;
1379   use->iv = iv;
1380   use->stmt = stmt;
1381   use->op_p = use_p;
1382   use->related_cands = NULL;
1383   use->addr_base = addr_base;
1384   use->addr_offset = addr_offset;
1385
1386   /* Sub use list is maintained in offset ascending order.  */
1387   if (addr_offset <= group->addr_offset)
1388     {
1389       use->related_cands = group->related_cands;
1390       group->related_cands = NULL;
1391       use->next = group;
1392       data->iv_uses[id_group] = use;
1393     }
1394   else
1395     {
1396       struct iv_use *pre;
1397       do
1398         {
1399           pre = group;
1400           group = group->next;
1401         }
1402       while (group && addr_offset > group->addr_offset);
1403       use->next = pre->next;
1404       pre->next = use;
1405     }
1406
1407   /* To avoid showing ssa name in the dumps, if it was not reset by the
1408      caller.  */
1409   iv->ssa_name = NULL_TREE;
1410
1411   return use;
1412 }
1413
1414 /* Checks whether OP is a loop-level invariant and if so, records it.
1415    NONLINEAR_USE is true if the invariant is used in a way we do not
1416    handle specially.  */
1417
1418 static void
1419 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1420 {
1421   basic_block bb;
1422   struct version_info *info;
1423
1424   if (TREE_CODE (op) != SSA_NAME
1425       || virtual_operand_p (op))
1426     return;
1427
1428   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1429   if (bb
1430       && flow_bb_inside_loop_p (data->current_loop, bb))
1431     return;
1432
1433   info = name_info (data, op);
1434   info->name = op;
1435   info->has_nonlin_use |= nonlinear_use;
1436   if (!info->inv_id)
1437     info->inv_id = ++data->max_inv_id;
1438   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1439 }
1440
1441 /* Checks whether the use OP is interesting and if so, records it.  */
1442
1443 static struct iv_use *
1444 find_interesting_uses_op (struct ivopts_data *data, tree op)
1445 {
1446   struct iv *iv;
1447   struct iv *civ;
1448   gimple stmt;
1449   struct iv_use *use;
1450
1451   if (TREE_CODE (op) != SSA_NAME)
1452     return NULL;
1453
1454   iv = get_iv (data, op);
1455   if (!iv)
1456     return NULL;
1457
1458   if (iv->have_use_for)
1459     {
1460       use = iv_use (data, iv->use_id);
1461
1462       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1463       return use;
1464     }
1465
1466   if (integer_zerop (iv->step))
1467     {
1468       record_invariant (data, op, true);
1469       return NULL;
1470     }
1471   iv->have_use_for = true;
1472
1473   civ = XNEW (struct iv);
1474   *civ = *iv;
1475
1476   stmt = SSA_NAME_DEF_STMT (op);
1477   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1478               || is_gimple_assign (stmt));
1479
1480   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1481   iv->use_id = use->id;
1482
1483   return use;
1484 }
1485
1486 /* Given a condition in statement STMT, checks whether it is a compare
1487    of an induction variable and an invariant.  If this is the case,
1488    CONTROL_VAR is set to location of the iv, BOUND to the location of
1489    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1490    induction variable descriptions, and true is returned.  If this is not
1491    the case, CONTROL_VAR and BOUND are set to the arguments of the
1492    condition and false is returned.  */
1493
1494 static bool
1495 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1496                        tree **control_var, tree **bound,
1497                        struct iv **iv_var, struct iv **iv_bound)
1498 {
1499   /* The objects returned when COND has constant operands.  */
1500   static struct iv const_iv;
1501   static tree zero;
1502   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1503   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1504   bool ret = false;
1505
1506   if (gimple_code (stmt) == GIMPLE_COND)
1507     {
1508       gcond *cond_stmt = as_a <gcond *> (stmt);
1509       op0 = gimple_cond_lhs_ptr (cond_stmt);
1510       op1 = gimple_cond_rhs_ptr (cond_stmt);
1511     }
1512   else
1513     {
1514       op0 = gimple_assign_rhs1_ptr (stmt);
1515       op1 = gimple_assign_rhs2_ptr (stmt);
1516     }
1517
1518   zero = integer_zero_node;
1519   const_iv.step = integer_zero_node;
1520
1521   if (TREE_CODE (*op0) == SSA_NAME)
1522     iv0 = get_iv (data, *op0);
1523   if (TREE_CODE (*op1) == SSA_NAME)
1524     iv1 = get_iv (data, *op1);
1525
1526   /* Exactly one of the compared values must be an iv, and the other one must
1527      be an invariant.  */
1528   if (!iv0 || !iv1)
1529     goto end;
1530
1531   if (integer_zerop (iv0->step))
1532     {
1533       /* Control variable may be on the other side.  */
1534       tmp_op = op0; op0 = op1; op1 = tmp_op;
1535       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1536     }
1537   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1538
1539 end:
1540   if (control_var)
1541     *control_var = op0;
1542   if (iv_var)
1543     *iv_var = iv0;
1544   if (bound)
1545     *bound = op1;
1546   if (iv_bound)
1547     *iv_bound = iv1;
1548
1549   return ret;
1550 }
1551
1552 /* Checks whether the condition in STMT is interesting and if so,
1553    records it.  */
1554
1555 static void
1556 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1557 {
1558   tree *var_p, *bound_p;
1559   struct iv *var_iv, *civ;
1560
1561   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1562     {
1563       find_interesting_uses_op (data, *var_p);
1564       find_interesting_uses_op (data, *bound_p);
1565       return;
1566     }
1567
1568   civ = XNEW (struct iv);
1569   *civ = *var_iv;
1570   record_use (data, NULL, civ, stmt, USE_COMPARE);
1571 }
1572
1573 /* Returns the outermost loop EXPR is obviously invariant in
1574    relative to the loop LOOP, i.e. if all its operands are defined
1575    outside of the returned loop.  Returns NULL if EXPR is not
1576    even obviously invariant in LOOP.  */
1577
1578 struct loop *
1579 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1580 {
1581   basic_block def_bb;
1582   unsigned i, len;
1583
1584   if (is_gimple_min_invariant (expr))
1585     return current_loops->tree_root;
1586
1587   if (TREE_CODE (expr) == SSA_NAME)
1588     {
1589       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1590       if (def_bb)
1591         {
1592           if (flow_bb_inside_loop_p (loop, def_bb))
1593             return NULL;
1594           return superloop_at_depth (loop,
1595                                      loop_depth (def_bb->loop_father) + 1);
1596         }
1597
1598       return current_loops->tree_root;
1599     }
1600
1601   if (!EXPR_P (expr))
1602     return NULL;
1603
1604   unsigned maxdepth = 0;
1605   len = TREE_OPERAND_LENGTH (expr);
1606   for (i = 0; i < len; i++)
1607     {
1608       struct loop *ivloop;
1609       if (!TREE_OPERAND (expr, i))
1610         continue;
1611
1612       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1613       if (!ivloop)
1614         return NULL;
1615       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1616     }
1617
1618   return superloop_at_depth (loop, maxdepth);
1619 }
1620
1621 /* Returns true if expression EXPR is obviously invariant in LOOP,
1622    i.e. if all its operands are defined outside of the LOOP.  LOOP
1623    should not be the function body.  */
1624
1625 bool
1626 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1627 {
1628   basic_block def_bb;
1629   unsigned i, len;
1630
1631   gcc_assert (loop_depth (loop) > 0);
1632
1633   if (is_gimple_min_invariant (expr))
1634     return true;
1635
1636   if (TREE_CODE (expr) == SSA_NAME)
1637     {
1638       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1639       if (def_bb
1640           && flow_bb_inside_loop_p (loop, def_bb))
1641         return false;
1642
1643       return true;
1644     }
1645
1646   if (!EXPR_P (expr))
1647     return false;
1648
1649   len = TREE_OPERAND_LENGTH (expr);
1650   for (i = 0; i < len; i++)
1651     if (TREE_OPERAND (expr, i)
1652         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1653       return false;
1654
1655   return true;
1656 }
1657
1658 /* Cumulates the steps of indices into DATA and replaces their values with the
1659    initial ones.  Returns false when the value of the index cannot be determined.
1660    Callback for for_each_index.  */
1661
1662 struct ifs_ivopts_data
1663 {
1664   struct ivopts_data *ivopts_data;
1665   gimple stmt;
1666   tree step;
1667 };
1668
1669 static bool
1670 idx_find_step (tree base, tree *idx, void *data)
1671 {
1672   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1673   struct iv *iv;
1674   bool use_overflow_semantics = false;
1675   tree step, iv_base, iv_step, lbound, off;
1676   struct loop *loop = dta->ivopts_data->current_loop;
1677
1678   /* If base is a component ref, require that the offset of the reference
1679      be invariant.  */
1680   if (TREE_CODE (base) == COMPONENT_REF)
1681     {
1682       off = component_ref_field_offset (base);
1683       return expr_invariant_in_loop_p (loop, off);
1684     }
1685
1686   /* If base is array, first check whether we will be able to move the
1687      reference out of the loop (in order to take its address in strength
1688      reduction).  In order for this to work we need both lower bound
1689      and step to be loop invariants.  */
1690   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1691     {
1692       /* Moreover, for a range, the size needs to be invariant as well.  */
1693       if (TREE_CODE (base) == ARRAY_RANGE_REF
1694           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1695         return false;
1696
1697       step = array_ref_element_size (base);
1698       lbound = array_ref_low_bound (base);
1699
1700       if (!expr_invariant_in_loop_p (loop, step)
1701           || !expr_invariant_in_loop_p (loop, lbound))
1702         return false;
1703     }
1704
1705   if (TREE_CODE (*idx) != SSA_NAME)
1706     return true;
1707
1708   iv = get_iv (dta->ivopts_data, *idx);
1709   if (!iv)
1710     return false;
1711
1712   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1713           *&x[0], which is not folded and does not trigger the
1714           ARRAY_REF path below.  */
1715   *idx = iv->base;
1716
1717   if (integer_zerop (iv->step))
1718     return true;
1719
1720   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1721     {
1722       step = array_ref_element_size (base);
1723
1724       /* We only handle addresses whose step is an integer constant.  */
1725       if (TREE_CODE (step) != INTEGER_CST)
1726         return false;
1727     }
1728   else
1729     /* The step for pointer arithmetics already is 1 byte.  */
1730     step = size_one_node;
1731
1732   iv_base = iv->base;
1733   iv_step = iv->step;
1734   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1735     use_overflow_semantics = true;
1736
1737   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1738                             sizetype, &iv_base, &iv_step, dta->stmt,
1739                             use_overflow_semantics))
1740     {
1741       /* The index might wrap.  */
1742       return false;
1743     }
1744
1745   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1746   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1747
1748   return true;
1749 }
1750
1751 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1752    object is passed to it in DATA.  */
1753
1754 static bool
1755 idx_record_use (tree base, tree *idx,
1756                 void *vdata)
1757 {
1758   struct ivopts_data *data = (struct ivopts_data *) vdata;
1759   find_interesting_uses_op (data, *idx);
1760   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1761     {
1762       find_interesting_uses_op (data, array_ref_element_size (base));
1763       find_interesting_uses_op (data, array_ref_low_bound (base));
1764     }
1765   return true;
1766 }
1767
1768 /* If we can prove that TOP = cst * BOT for some constant cst,
1769    store cst to MUL and return true.  Otherwise return false.
1770    The returned value is always sign-extended, regardless of the
1771    signedness of TOP and BOT.  */
1772
1773 static bool
1774 constant_multiple_of (tree top, tree bot, widest_int *mul)
1775 {
1776   tree mby;
1777   enum tree_code code;
1778   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1779   widest_int res, p0, p1;
1780
1781   STRIP_NOPS (top);
1782   STRIP_NOPS (bot);
1783
1784   if (operand_equal_p (top, bot, 0))
1785     {
1786       *mul = 1;
1787       return true;
1788     }
1789
1790   code = TREE_CODE (top);
1791   switch (code)
1792     {
1793     case MULT_EXPR:
1794       mby = TREE_OPERAND (top, 1);
1795       if (TREE_CODE (mby) != INTEGER_CST)
1796         return false;
1797
1798       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1799         return false;
1800
1801       *mul = wi::sext (res * wi::to_widest (mby), precision);
1802       return true;
1803
1804     case PLUS_EXPR:
1805     case MINUS_EXPR:
1806       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1807           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1808         return false;
1809
1810       if (code == MINUS_EXPR)
1811         p1 = -p1;
1812       *mul = wi::sext (p0 + p1, precision);
1813       return true;
1814
1815     case INTEGER_CST:
1816       if (TREE_CODE (bot) != INTEGER_CST)
1817         return false;
1818
1819       p0 = widest_int::from (top, SIGNED);
1820       p1 = widest_int::from (bot, SIGNED);
1821       if (p1 == 0)
1822         return false;
1823       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1824       return res == 0;
1825
1826     default:
1827       return false;
1828     }
1829 }
1830
1831 /* Return true if memory reference REF with step STEP may be unaligned.  */
1832
1833 static bool
1834 may_be_unaligned_p (tree ref, tree step)
1835 {
1836   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1837      thus they are not misaligned.  */
1838   if (TREE_CODE (ref) == TARGET_MEM_REF)
1839     return false;
1840
1841   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1842   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1843     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1844
1845   unsigned HOST_WIDE_INT bitpos;
1846   unsigned int ref_align;
1847   get_object_alignment_1 (ref, &ref_align, &bitpos);
1848   if (ref_align < align
1849       || (bitpos % align) != 0
1850       || (bitpos % BITS_PER_UNIT) != 0)
1851     return true;
1852
1853   unsigned int trailing_zeros = tree_ctz (step);
1854   if (trailing_zeros < HOST_BITS_PER_INT
1855       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1856     return true;
1857
1858   return false;
1859 }
1860
1861 /* Return true if EXPR may be non-addressable.   */
1862
1863 bool
1864 may_be_nonaddressable_p (tree expr)
1865 {
1866   switch (TREE_CODE (expr))
1867     {
1868     case TARGET_MEM_REF:
1869       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1870          target, thus they are always addressable.  */
1871       return false;
1872
1873     case COMPONENT_REF:
1874       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1875              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1876
1877     case VIEW_CONVERT_EXPR:
1878       /* This kind of view-conversions may wrap non-addressable objects
1879          and make them look addressable.  After some processing the
1880          non-addressability may be uncovered again, causing ADDR_EXPRs
1881          of inappropriate objects to be built.  */
1882       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1883           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1884         return true;
1885
1886       /* ... fall through ... */
1887
1888     case ARRAY_REF:
1889     case ARRAY_RANGE_REF:
1890       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1891
1892     CASE_CONVERT:
1893       return true;
1894
1895     default:
1896       break;
1897     }
1898
1899   return false;
1900 }
1901
1902 static tree
1903 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1904
1905 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1906    If there is an existing use which has same stripped iv base and step,
1907    this function records this one as a sub use to that; otherwise records
1908    it as a normal one.  */
1909
1910 static struct iv_use *
1911 record_group_use (struct ivopts_data *data, tree *use_p,
1912                   struct iv *iv, gimple stmt, enum use_type use_type)
1913 {
1914   unsigned int i;
1915   struct iv_use *use;
1916   tree addr_base;
1917   unsigned HOST_WIDE_INT addr_offset;
1918
1919   /* Only support sub use for address type uses, that is, with base
1920      object.  */
1921   if (!iv->base_object)
1922     return record_use (data, use_p, iv, stmt, use_type);
1923
1924   addr_base = strip_offset (iv->base, &addr_offset);
1925   for (i = 0; i < n_iv_uses (data); i++)
1926     {
1927       use = iv_use (data, i);
1928       if (use->type != USE_ADDRESS || !use->iv->base_object)
1929         continue;
1930
1931       /* Check if it has the same stripped base and step.  */
1932       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1933           && operand_equal_p (iv->step, use->iv->step, 0)
1934           && operand_equal_p (addr_base, use->addr_base, 0))
1935         break;
1936     }
1937
1938   if (i == n_iv_uses (data))
1939     return record_use (data, use_p, iv, stmt,
1940                        use_type, addr_base, addr_offset);
1941   else
1942     return record_sub_use (data, use_p, iv, stmt,
1943                            use_type, addr_base, addr_offset, i);
1944 }
1945
1946 /* Finds addresses in *OP_P inside STMT.  */
1947
1948 static void
1949 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1950 {
1951   tree base = *op_p, step = size_zero_node;
1952   struct iv *civ;
1953   struct ifs_ivopts_data ifs_ivopts_data;
1954
1955   /* Do not play with volatile memory references.  A bit too conservative,
1956      perhaps, but safe.  */
1957   if (gimple_has_volatile_ops (stmt))
1958     goto fail;
1959
1960   /* Ignore bitfields for now.  Not really something terribly complicated
1961      to handle.  TODO.  */
1962   if (TREE_CODE (base) == BIT_FIELD_REF)
1963     goto fail;
1964
1965   base = unshare_expr (base);
1966
1967   if (TREE_CODE (base) == TARGET_MEM_REF)
1968     {
1969       tree type = build_pointer_type (TREE_TYPE (base));
1970       tree astep;
1971
1972       if (TMR_BASE (base)
1973           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1974         {
1975           civ = get_iv (data, TMR_BASE (base));
1976           if (!civ)
1977             goto fail;
1978
1979           TMR_BASE (base) = civ->base;
1980           step = civ->step;
1981         }
1982       if (TMR_INDEX2 (base)
1983           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1984         {
1985           civ = get_iv (data, TMR_INDEX2 (base));
1986           if (!civ)
1987             goto fail;
1988
1989           TMR_INDEX2 (base) = civ->base;
1990           step = civ->step;
1991         }
1992       if (TMR_INDEX (base)
1993           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1994         {
1995           civ = get_iv (data, TMR_INDEX (base));
1996           if (!civ)
1997             goto fail;
1998
1999           TMR_INDEX (base) = civ->base;
2000           astep = civ->step;
2001
2002           if (astep)
2003             {
2004               if (TMR_STEP (base))
2005                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2006
2007               step = fold_build2 (PLUS_EXPR, type, step, astep);
2008             }
2009         }
2010
2011       if (integer_zerop (step))
2012         goto fail;
2013       base = tree_mem_ref_addr (type, base);
2014     }
2015   else
2016     {
2017       ifs_ivopts_data.ivopts_data = data;
2018       ifs_ivopts_data.stmt = stmt;
2019       ifs_ivopts_data.step = size_zero_node;
2020       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2021           || integer_zerop (ifs_ivopts_data.step))
2022         goto fail;
2023       step = ifs_ivopts_data.step;
2024
2025       /* Check that the base expression is addressable.  This needs
2026          to be done after substituting bases of IVs into it.  */
2027       if (may_be_nonaddressable_p (base))
2028         goto fail;
2029
2030       /* Moreover, on strict alignment platforms, check that it is
2031          sufficiently aligned.  */
2032       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2033         goto fail;
2034
2035       base = build_fold_addr_expr (base);
2036
2037       /* Substituting bases of IVs into the base expression might
2038          have caused folding opportunities.  */
2039       if (TREE_CODE (base) == ADDR_EXPR)
2040         {
2041           tree *ref = &TREE_OPERAND (base, 0);
2042           while (handled_component_p (*ref))
2043             ref = &TREE_OPERAND (*ref, 0);
2044           if (TREE_CODE (*ref) == MEM_REF)
2045             {
2046               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2047                                       TREE_OPERAND (*ref, 0),
2048                                       TREE_OPERAND (*ref, 1));
2049               if (tem)
2050                 *ref = tem;
2051             }
2052         }
2053     }
2054
2055   civ = alloc_iv (base, step);
2056   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2057   return;
2058
2059 fail:
2060   for_each_index (op_p, idx_record_use, data);
2061 }
2062
2063 /* Finds and records invariants used in STMT.  */
2064
2065 static void
2066 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2067 {
2068   ssa_op_iter iter;
2069   use_operand_p use_p;
2070   tree op;
2071
2072   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2073     {
2074       op = USE_FROM_PTR (use_p);
2075       record_invariant (data, op, false);
2076     }
2077 }
2078
2079 /* Finds interesting uses of induction variables in the statement STMT.  */
2080
2081 static void
2082 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2083 {
2084   struct iv *iv;
2085   tree op, *lhs, *rhs;
2086   ssa_op_iter iter;
2087   use_operand_p use_p;
2088   enum tree_code code;
2089
2090   find_invariants_stmt (data, stmt);
2091
2092   if (gimple_code (stmt) == GIMPLE_COND)
2093     {
2094       find_interesting_uses_cond (data, stmt);
2095       return;
2096     }
2097
2098   if (is_gimple_assign (stmt))
2099     {
2100       lhs = gimple_assign_lhs_ptr (stmt);
2101       rhs = gimple_assign_rhs1_ptr (stmt);
2102
2103       if (TREE_CODE (*lhs) == SSA_NAME)
2104         {
2105           /* If the statement defines an induction variable, the uses are not
2106              interesting by themselves.  */
2107
2108           iv = get_iv (data, *lhs);
2109
2110           if (iv && !integer_zerop (iv->step))
2111             return;
2112         }
2113
2114       code = gimple_assign_rhs_code (stmt);
2115       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2116           && (REFERENCE_CLASS_P (*rhs)
2117               || is_gimple_val (*rhs)))
2118         {
2119           if (REFERENCE_CLASS_P (*rhs))
2120             find_interesting_uses_address (data, stmt, rhs);
2121           else
2122             find_interesting_uses_op (data, *rhs);
2123
2124           if (REFERENCE_CLASS_P (*lhs))
2125             find_interesting_uses_address (data, stmt, lhs);
2126           return;
2127         }
2128       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2129         {
2130           find_interesting_uses_cond (data, stmt);
2131           return;
2132         }
2133
2134       /* TODO -- we should also handle address uses of type
2135
2136          memory = call (whatever);
2137
2138          and
2139
2140          call (memory).  */
2141     }
2142
2143   if (gimple_code (stmt) == GIMPLE_PHI
2144       && gimple_bb (stmt) == data->current_loop->header)
2145     {
2146       iv = get_iv (data, PHI_RESULT (stmt));
2147
2148       if (iv && !integer_zerop (iv->step))
2149         return;
2150     }
2151
2152   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2153     {
2154       op = USE_FROM_PTR (use_p);
2155
2156       if (TREE_CODE (op) != SSA_NAME)
2157         continue;
2158
2159       iv = get_iv (data, op);
2160       if (!iv)
2161         continue;
2162
2163       find_interesting_uses_op (data, op);
2164     }
2165 }
2166
2167 /* Finds interesting uses of induction variables outside of loops
2168    on loop exit edge EXIT.  */
2169
2170 static void
2171 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2172 {
2173   gphi *phi;
2174   gphi_iterator psi;
2175   tree def;
2176
2177   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2178     {
2179       phi = psi.phi ();
2180       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2181       if (!virtual_operand_p (def))
2182         find_interesting_uses_op (data, def);
2183     }
2184 }
2185
2186 /* Finds uses of the induction variables that are interesting.  */
2187
2188 static void
2189 find_interesting_uses (struct ivopts_data *data)
2190 {
2191   basic_block bb;
2192   gimple_stmt_iterator bsi;
2193   basic_block *body = get_loop_body (data->current_loop);
2194   unsigned i;
2195   struct version_info *info;
2196   edge e;
2197
2198   if (dump_file && (dump_flags & TDF_DETAILS))
2199     fprintf (dump_file, "Uses:\n\n");
2200
2201   for (i = 0; i < data->current_loop->num_nodes; i++)
2202     {
2203       edge_iterator ei;
2204       bb = body[i];
2205
2206       FOR_EACH_EDGE (e, ei, bb->succs)
2207         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2208             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2209           find_interesting_uses_outside (data, e);
2210
2211       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2212         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2213       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2214         if (!is_gimple_debug (gsi_stmt (bsi)))
2215           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2216     }
2217
2218   if (dump_file && (dump_flags & TDF_DETAILS))
2219     {
2220       bitmap_iterator bi;
2221
2222       fprintf (dump_file, "\n");
2223
2224       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2225         {
2226           info = ver_info (data, i);
2227           if (info->inv_id)
2228             {
2229               fprintf (dump_file, "  ");
2230               print_generic_expr (dump_file, info->name, TDF_SLIM);
2231               fprintf (dump_file, " is invariant (%d)%s\n",
2232                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2233             }
2234         }
2235
2236       fprintf (dump_file, "\n");
2237     }
2238
2239   free (body);
2240 }
2241
2242 /* Compute maximum offset of [base + offset] addressing mode
2243    for memory reference represented by USE.  */
2244
2245 static HOST_WIDE_INT
2246 compute_max_addr_offset (struct iv_use *use)
2247 {
2248   int width;
2249   rtx reg, addr;
2250   HOST_WIDE_INT i, off;
2251   unsigned list_index, num;
2252   addr_space_t as;
2253   machine_mode mem_mode, addr_mode;
2254   static vec<HOST_WIDE_INT> max_offset_list;
2255
2256   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2257   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2258
2259   num = max_offset_list.length ();
2260   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2261   if (list_index >= num)
2262     {
2263       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2264       for (; num < max_offset_list.length (); num++)
2265         max_offset_list[num] = -1;
2266     }
2267
2268   off = max_offset_list[list_index];
2269   if (off != -1)
2270     return off;
2271
2272   addr_mode = targetm.addr_space.address_mode (as);
2273   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2274   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2275
2276   width = GET_MODE_BITSIZE (addr_mode) - 1;
2277   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2278     width = HOST_BITS_PER_WIDE_INT - 1;
2279
2280   for (i = width; i > 0; i--)
2281     {
2282       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2283       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2284       if (memory_address_addr_space_p (mem_mode, addr, as))
2285         break;
2286
2287       /* For some strict-alignment targets, the offset must be naturally
2288          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2289       off = ((unsigned HOST_WIDE_INT) 1 << i);
2290       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2291         {
2292           off -= GET_MODE_SIZE (mem_mode);
2293           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2294           if (memory_address_addr_space_p (mem_mode, addr, as))
2295             break;
2296         }
2297     }
2298   if (i == 0)
2299     off = 0;
2300
2301   max_offset_list[list_index] = off;
2302   return off;
2303 }
2304
2305 /* Check if all small groups should be split.  Return true if and
2306    only if:
2307
2308      1) At least one groups contain two uses with different offsets.
2309      2) No group contains more than two uses with different offsets.
2310
2311    Return false otherwise.  We want to split such groups because:
2312
2313      1) Small groups don't have much benefit and may interfer with
2314         general candidate selection.
2315      2) Size for problem with only small groups is usually small and
2316         general algorithm can handle it well.
2317
2318    TODO -- Above claim may not hold when auto increment is supported.  */
2319
2320 static bool
2321 split_all_small_groups (struct ivopts_data *data)
2322 {
2323   bool split_p = false;
2324   unsigned int i, n, distinct;
2325   struct iv_use *pre, *use;
2326
2327   n = n_iv_uses (data);
2328   for (i = 0; i < n; i++)
2329     {
2330       use = iv_use (data, i);
2331       if (!use->next)
2332         continue;
2333
2334       distinct = 1;
2335       gcc_assert (use->type == USE_ADDRESS);
2336       for (pre = use, use = use->next; use; pre = use, use = use->next)
2337         {
2338           if (pre->addr_offset != use->addr_offset)
2339             distinct++;
2340
2341           if (distinct > 2)
2342             return false;
2343         }
2344       if (distinct == 2)
2345         split_p = true;
2346     }
2347
2348   return split_p;
2349 }
2350
2351 /* For each group of address type uses, this function further groups
2352    these uses according to the maximum offset supported by target's
2353    [base + offset] addressing mode.  */
2354
2355 static void
2356 group_address_uses (struct ivopts_data *data)
2357 {
2358   HOST_WIDE_INT max_offset = -1;
2359   unsigned int i, n, sub_id;
2360   struct iv_use *pre, *use;
2361   unsigned HOST_WIDE_INT addr_offset_first;
2362
2363   /* Reset max offset to split all small groups.  */
2364   if (split_all_small_groups (data))
2365     max_offset = 0;
2366
2367   n = n_iv_uses (data);
2368   for (i = 0; i < n; i++)
2369     {
2370       use = iv_use (data, i);
2371       if (!use->next)
2372         continue;
2373
2374       gcc_assert (use->type == USE_ADDRESS);
2375       if (max_offset != 0)
2376         max_offset = compute_max_addr_offset (use);
2377
2378       while (use)
2379         {
2380           sub_id = 0;
2381           addr_offset_first = use->addr_offset;
2382           /* Only uses with offset that can fit in offset part against
2383              the first use can be grouped together.  */
2384           for (pre = use, use = use->next;
2385                use && (use->addr_offset - addr_offset_first
2386                        <= (unsigned HOST_WIDE_INT) max_offset);
2387                pre = use, use = use->next)
2388             {
2389               use->id = pre->id;
2390               use->sub_id = ++sub_id;
2391             }
2392
2393           /* Break the list and create new group.  */
2394           if (use)
2395             {
2396               pre->next = NULL;
2397               use->id = n_iv_uses (data);
2398               use->related_cands = BITMAP_ALLOC (NULL);
2399               data->iv_uses.safe_push (use);
2400             }
2401         }
2402     }
2403
2404   if (dump_file && (dump_flags & TDF_DETAILS))
2405     dump_uses (dump_file, data);
2406 }
2407
2408 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2409    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2410    we are at the top-level of the processed address.  */
2411
2412 static tree
2413 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2414                 HOST_WIDE_INT *offset)
2415 {
2416   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2417   enum tree_code code;
2418   tree type, orig_type = TREE_TYPE (expr);
2419   HOST_WIDE_INT off0, off1, st;
2420   tree orig_expr = expr;
2421
2422   STRIP_NOPS (expr);
2423
2424   type = TREE_TYPE (expr);
2425   code = TREE_CODE (expr);
2426   *offset = 0;
2427
2428   switch (code)
2429     {
2430     case INTEGER_CST:
2431       if (!cst_and_fits_in_hwi (expr)
2432           || integer_zerop (expr))
2433         return orig_expr;
2434
2435       *offset = int_cst_value (expr);
2436       return build_int_cst (orig_type, 0);
2437
2438     case POINTER_PLUS_EXPR:
2439     case PLUS_EXPR:
2440     case MINUS_EXPR:
2441       op0 = TREE_OPERAND (expr, 0);
2442       op1 = TREE_OPERAND (expr, 1);
2443
2444       op0 = strip_offset_1 (op0, false, false, &off0);
2445       op1 = strip_offset_1 (op1, false, false, &off1);
2446
2447       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2448       if (op0 == TREE_OPERAND (expr, 0)
2449           && op1 == TREE_OPERAND (expr, 1))
2450         return orig_expr;
2451
2452       if (integer_zerop (op1))
2453         expr = op0;
2454       else if (integer_zerop (op0))
2455         {
2456           if (code == MINUS_EXPR)
2457             expr = fold_build1 (NEGATE_EXPR, type, op1);
2458           else
2459             expr = op1;
2460         }
2461       else
2462         expr = fold_build2 (code, type, op0, op1);
2463
2464       return fold_convert (orig_type, expr);
2465
2466     case MULT_EXPR:
2467       op1 = TREE_OPERAND (expr, 1);
2468       if (!cst_and_fits_in_hwi (op1))
2469         return orig_expr;
2470
2471       op0 = TREE_OPERAND (expr, 0);
2472       op0 = strip_offset_1 (op0, false, false, &off0);
2473       if (op0 == TREE_OPERAND (expr, 0))
2474         return orig_expr;
2475
2476       *offset = off0 * int_cst_value (op1);
2477       if (integer_zerop (op0))
2478         expr = op0;
2479       else
2480         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2481
2482       return fold_convert (orig_type, expr);
2483
2484     case ARRAY_REF:
2485     case ARRAY_RANGE_REF:
2486       if (!inside_addr)
2487         return orig_expr;
2488
2489       step = array_ref_element_size (expr);
2490       if (!cst_and_fits_in_hwi (step))
2491         break;
2492
2493       st = int_cst_value (step);
2494       op1 = TREE_OPERAND (expr, 1);
2495       op1 = strip_offset_1 (op1, false, false, &off1);
2496       *offset = off1 * st;
2497
2498       if (top_compref
2499           && integer_zerop (op1))
2500         {
2501           /* Strip the component reference completely.  */
2502           op0 = TREE_OPERAND (expr, 0);
2503           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2504           *offset += off0;
2505           return op0;
2506         }
2507       break;
2508
2509     case COMPONENT_REF:
2510       {
2511         tree field;
2512
2513         if (!inside_addr)
2514           return orig_expr;
2515
2516         tmp = component_ref_field_offset (expr);
2517         field = TREE_OPERAND (expr, 1);
2518         if (top_compref
2519             && cst_and_fits_in_hwi (tmp)
2520             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2521           {
2522             HOST_WIDE_INT boffset, abs_off;
2523
2524             /* Strip the component reference completely.  */
2525             op0 = TREE_OPERAND (expr, 0);
2526             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2527             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2528             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2529             if (boffset < 0)
2530               abs_off = -abs_off;
2531
2532             *offset = off0 + int_cst_value (tmp) + abs_off;
2533             return op0;
2534           }
2535       }
2536       break;
2537
2538     case ADDR_EXPR:
2539       op0 = TREE_OPERAND (expr, 0);
2540       op0 = strip_offset_1 (op0, true, true, &off0);
2541       *offset += off0;
2542
2543       if (op0 == TREE_OPERAND (expr, 0))
2544         return orig_expr;
2545
2546       expr = build_fold_addr_expr (op0);
2547       return fold_convert (orig_type, expr);
2548
2549     case MEM_REF:
2550       /* ???  Offset operand?  */
2551       inside_addr = false;
2552       break;
2553
2554     default:
2555       return orig_expr;
2556     }
2557
2558   /* Default handling of expressions for that we want to recurse into
2559      the first operand.  */
2560   op0 = TREE_OPERAND (expr, 0);
2561   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2562   *offset += off0;
2563
2564   if (op0 == TREE_OPERAND (expr, 0)
2565       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2566     return orig_expr;
2567
2568   expr = copy_node (expr);
2569   TREE_OPERAND (expr, 0) = op0;
2570   if (op1)
2571     TREE_OPERAND (expr, 1) = op1;
2572
2573   /* Inside address, we might strip the top level component references,
2574      thus changing type of the expression.  Handling of ADDR_EXPR
2575      will fix that.  */
2576   expr = fold_convert (orig_type, expr);
2577
2578   return expr;
2579 }
2580
2581 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2582
2583 static tree
2584 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2585 {
2586   HOST_WIDE_INT off;
2587   tree core = strip_offset_1 (expr, false, false, &off);
2588   *offset = off;
2589   return core;
2590 }
2591
2592 /* Returns variant of TYPE that can be used as base for different uses.
2593    We return unsigned type with the same precision, which avoids problems
2594    with overflows.  */
2595
2596 static tree
2597 generic_type_for (tree type)
2598 {
2599   if (POINTER_TYPE_P (type))
2600     return unsigned_type_for (type);
2601
2602   if (TYPE_UNSIGNED (type))
2603     return type;
2604
2605   return unsigned_type_for (type);
2606 }
2607
2608 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2609    the bitmap to that we should store it.  */
2610
2611 static struct ivopts_data *fd_ivopts_data;
2612 static tree
2613 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2614 {
2615   bitmap *depends_on = (bitmap *) data;
2616   struct version_info *info;
2617
2618   if (TREE_CODE (*expr_p) != SSA_NAME)
2619     return NULL_TREE;
2620   info = name_info (fd_ivopts_data, *expr_p);
2621
2622   if (!info->inv_id || info->has_nonlin_use)
2623     return NULL_TREE;
2624
2625   if (!*depends_on)
2626     *depends_on = BITMAP_ALLOC (NULL);
2627   bitmap_set_bit (*depends_on, info->inv_id);
2628
2629   return NULL_TREE;
2630 }
2631
2632 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2633    position to POS.  If USE is not NULL, the candidate is set as related to
2634    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2635    replacement of the final value of the iv by a direct computation.  */
2636
2637 static struct iv_cand *
2638 add_candidate_1 (struct ivopts_data *data,
2639                  tree base, tree step, bool important, enum iv_position pos,
2640                  struct iv_use *use, gimple incremented_at)
2641 {
2642   unsigned i;
2643   struct iv_cand *cand = NULL;
2644   tree type, orig_type;
2645
2646   /* For non-original variables, make sure their values are computed in a type
2647      that does not invoke undefined behavior on overflows (since in general,
2648      we cannot prove that these induction variables are non-wrapping).  */
2649   if (pos != IP_ORIGINAL)
2650     {
2651       orig_type = TREE_TYPE (base);
2652       type = generic_type_for (orig_type);
2653       if (type != orig_type)
2654         {
2655           base = fold_convert (type, base);
2656           step = fold_convert (type, step);
2657         }
2658     }
2659
2660   for (i = 0; i < n_iv_cands (data); i++)
2661     {
2662       cand = iv_cand (data, i);
2663
2664       if (cand->pos != pos)
2665         continue;
2666
2667       if (cand->incremented_at != incremented_at
2668           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2669               && cand->ainc_use != use))
2670         continue;
2671
2672       if (!cand->iv)
2673         {
2674           if (!base && !step)
2675             break;
2676
2677           continue;
2678         }
2679
2680       if (!base && !step)
2681         continue;
2682
2683       if (operand_equal_p (base, cand->iv->base, 0)
2684           && operand_equal_p (step, cand->iv->step, 0)
2685           && (TYPE_PRECISION (TREE_TYPE (base))
2686               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2687         break;
2688     }
2689
2690   if (i == n_iv_cands (data))
2691     {
2692       cand = XCNEW (struct iv_cand);
2693       cand->id = i;
2694
2695       if (!base && !step)
2696         cand->iv = NULL;
2697       else
2698         cand->iv = alloc_iv (base, step);
2699
2700       cand->pos = pos;
2701       if (pos != IP_ORIGINAL && cand->iv)
2702         {
2703           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2704           cand->var_after = cand->var_before;
2705         }
2706       cand->important = important;
2707       cand->incremented_at = incremented_at;
2708       data->iv_candidates.safe_push (cand);
2709
2710       if (step
2711           && TREE_CODE (step) != INTEGER_CST)
2712         {
2713           fd_ivopts_data = data;
2714           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2715         }
2716
2717       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2718         cand->ainc_use = use;
2719       else
2720         cand->ainc_use = NULL;
2721
2722       if (dump_file && (dump_flags & TDF_DETAILS))
2723         dump_cand (dump_file, cand);
2724     }
2725
2726   if (important && !cand->important)
2727     {
2728       cand->important = true;
2729       if (dump_file && (dump_flags & TDF_DETAILS))
2730         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2731     }
2732
2733   if (use)
2734     {
2735       bitmap_set_bit (use->related_cands, i);
2736       if (dump_file && (dump_flags & TDF_DETAILS))
2737         fprintf (dump_file, "Candidate %d is related to use %d\n",
2738                  cand->id, use->id);
2739     }
2740
2741   return cand;
2742 }
2743
2744 /* Returns true if incrementing the induction variable at the end of the LOOP
2745    is allowed.
2746
2747    The purpose is to avoid splitting latch edge with a biv increment, thus
2748    creating a jump, possibly confusing other optimization passes and leaving
2749    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2750    is not available (so we do not have a better alternative), or if the latch
2751    edge is already nonempty.  */
2752
2753 static bool
2754 allow_ip_end_pos_p (struct loop *loop)
2755 {
2756   if (!ip_normal_pos (loop))
2757     return true;
2758
2759   if (!empty_block_p (ip_end_pos (loop)))
2760     return true;
2761
2762   return false;
2763 }
2764
2765 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2766    Important field is set to IMPORTANT.  */
2767
2768 static void
2769 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2770                         bool important, struct iv_use *use)
2771 {
2772   basic_block use_bb = gimple_bb (use->stmt);
2773   machine_mode mem_mode;
2774   unsigned HOST_WIDE_INT cstepi;
2775
2776   /* If we insert the increment in any position other than the standard
2777      ones, we must ensure that it is incremented once per iteration.
2778      It must not be in an inner nested loop, or one side of an if
2779      statement.  */
2780   if (use_bb->loop_father != data->current_loop
2781       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2782       || stmt_could_throw_p (use->stmt)
2783       || !cst_and_fits_in_hwi (step))
2784     return;
2785
2786   cstepi = int_cst_value (step);
2787
2788   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2789   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2790         || USE_STORE_PRE_INCREMENT (mem_mode))
2791        && GET_MODE_SIZE (mem_mode) == cstepi)
2792       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2793            || USE_STORE_PRE_DECREMENT (mem_mode))
2794           && GET_MODE_SIZE (mem_mode) == -cstepi))
2795     {
2796       enum tree_code code = MINUS_EXPR;
2797       tree new_base;
2798       tree new_step = step;
2799
2800       if (POINTER_TYPE_P (TREE_TYPE (base)))
2801         {
2802           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2803           code = POINTER_PLUS_EXPR;
2804         }
2805       else
2806         new_step = fold_convert (TREE_TYPE (base), new_step);
2807       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2808       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2809                        use->stmt);
2810     }
2811   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2812         || USE_STORE_POST_INCREMENT (mem_mode))
2813        && GET_MODE_SIZE (mem_mode) == cstepi)
2814       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2815            || USE_STORE_POST_DECREMENT (mem_mode))
2816           && GET_MODE_SIZE (mem_mode) == -cstepi))
2817     {
2818       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2819                        use->stmt);
2820     }
2821 }
2822
2823 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2824    position to POS.  If USE is not NULL, the candidate is set as related to
2825    it.  The candidate computation is scheduled on all available positions.  */
2826
2827 static void
2828 add_candidate (struct ivopts_data *data,
2829                tree base, tree step, bool important, struct iv_use *use)
2830 {
2831   gcc_assert (use == NULL || use->sub_id == 0);
2832
2833   if (ip_normal_pos (data->current_loop))
2834     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2835   if (ip_end_pos (data->current_loop)
2836       && allow_ip_end_pos_p (data->current_loop))
2837     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2838
2839   if (use != NULL && use->type == USE_ADDRESS)
2840     add_autoinc_candidates (data, base, step, important, use);
2841 }
2842
2843 /* Adds standard iv candidates.  */
2844
2845 static void
2846 add_standard_iv_candidates (struct ivopts_data *data)
2847 {
2848   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2849
2850   /* The same for a double-integer type if it is still fast enough.  */
2851   if (TYPE_PRECISION
2852         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2853       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2854     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2855                    build_int_cst (long_integer_type_node, 1), true, NULL);
2856
2857   /* The same for a double-integer type if it is still fast enough.  */
2858   if (TYPE_PRECISION
2859         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2860       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2861     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2862                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2863 }
2864
2865
2866 /* Adds candidates bases on the old induction variable IV.  */
2867
2868 static void
2869 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2870 {
2871   gimple phi;
2872   tree def;
2873   struct iv_cand *cand;
2874
2875   add_candidate (data, iv->base, iv->step, true, NULL);
2876
2877   /* The same, but with initial value zero.  */
2878   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2879     add_candidate (data, size_int (0), iv->step, true, NULL);
2880   else
2881     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2882                    iv->step, true, NULL);
2883
2884   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2885   if (gimple_code (phi) == GIMPLE_PHI)
2886     {
2887       /* Additionally record the possibility of leaving the original iv
2888          untouched.  */
2889       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2890       /* Don't add candidate if it's from another PHI node because
2891          it's an affine iv appearing in the form of PEELED_CHREC.  */
2892       phi = SSA_NAME_DEF_STMT (def);
2893       if (gimple_code (phi) != GIMPLE_PHI)
2894         {
2895           cand = add_candidate_1 (data,
2896                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2897                                   SSA_NAME_DEF_STMT (def));
2898           cand->var_before = iv->ssa_name;
2899           cand->var_after = def;
2900         }
2901       else
2902         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2903     }
2904 }
2905
2906 /* Adds candidates based on the old induction variables.  */
2907
2908 static void
2909 add_old_ivs_candidates (struct ivopts_data *data)
2910 {
2911   unsigned i;
2912   struct iv *iv;
2913   bitmap_iterator bi;
2914
2915   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2916     {
2917       iv = ver_info (data, i)->iv;
2918       if (iv && iv->biv_p && !integer_zerop (iv->step))
2919         add_old_iv_candidates (data, iv);
2920     }
2921 }
2922
2923 /* Adds candidates based on the value of the induction variable IV and USE.  */
2924
2925 static void
2926 add_iv_value_candidates (struct ivopts_data *data,
2927                          struct iv *iv, struct iv_use *use)
2928 {
2929   unsigned HOST_WIDE_INT offset;
2930   tree base;
2931   tree basetype;
2932
2933   add_candidate (data, iv->base, iv->step, false, use);
2934
2935   /* The same, but with initial value zero.  Make such variable important,
2936      since it is generic enough so that possibly many uses may be based
2937      on it.  */
2938   basetype = TREE_TYPE (iv->base);
2939   if (POINTER_TYPE_P (basetype))
2940     basetype = sizetype;
2941   add_candidate (data, build_int_cst (basetype, 0),
2942                  iv->step, true, use);
2943
2944   /* Third, try removing the constant offset.  Make sure to even
2945      add a candidate for &a[0] vs. (T *)&a.  */
2946   base = strip_offset (iv->base, &offset);
2947   if (offset
2948       || base != iv->base)
2949     add_candidate (data, base, iv->step, false, use);
2950 }
2951
2952 /* Adds candidates based on the uses.  */
2953
2954 static void
2955 add_derived_ivs_candidates (struct ivopts_data *data)
2956 {
2957   unsigned i;
2958
2959   for (i = 0; i < n_iv_uses (data); i++)
2960     {
2961       struct iv_use *use = iv_use (data, i);
2962
2963       if (!use)
2964         continue;
2965
2966       switch (use->type)
2967         {
2968         case USE_NONLINEAR_EXPR:
2969         case USE_COMPARE:
2970         case USE_ADDRESS:
2971           /* Just add the ivs based on the value of the iv used here.  */
2972           add_iv_value_candidates (data, use->iv, use);
2973           break;
2974
2975         default:
2976           gcc_unreachable ();
2977         }
2978     }
2979 }
2980
2981 /* Record important candidates and add them to related_cands bitmaps
2982    if needed.  */
2983
2984 static void
2985 record_important_candidates (struct ivopts_data *data)
2986 {
2987   unsigned i;
2988   struct iv_use *use;
2989
2990   for (i = 0; i < n_iv_cands (data); i++)
2991     {
2992       struct iv_cand *cand = iv_cand (data, i);
2993
2994       if (cand->important)
2995         bitmap_set_bit (data->important_candidates, i);
2996     }
2997
2998   data->consider_all_candidates = (n_iv_cands (data)
2999                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3000
3001   if (data->consider_all_candidates)
3002     {
3003       /* We will not need "related_cands" bitmaps in this case,
3004          so release them to decrease peak memory consumption.  */
3005       for (i = 0; i < n_iv_uses (data); i++)
3006         {
3007           use = iv_use (data, i);
3008           BITMAP_FREE (use->related_cands);
3009         }
3010     }
3011   else
3012     {
3013       /* Add important candidates to the related_cands bitmaps.  */
3014       for (i = 0; i < n_iv_uses (data); i++)
3015         bitmap_ior_into (iv_use (data, i)->related_cands,
3016                          data->important_candidates);
3017     }
3018 }
3019
3020 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3021    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3022    we allocate a simple list to every use.  */
3023
3024 static void
3025 alloc_use_cost_map (struct ivopts_data *data)
3026 {
3027   unsigned i, size, s;
3028
3029   for (i = 0; i < n_iv_uses (data); i++)
3030     {
3031       struct iv_use *use = iv_use (data, i);
3032
3033       if (data->consider_all_candidates)
3034         size = n_iv_cands (data);
3035       else
3036         {
3037           s = bitmap_count_bits (use->related_cands);
3038
3039           /* Round up to the power of two, so that moduling by it is fast.  */
3040           size = s ? (1 << ceil_log2 (s)) : 1;
3041         }
3042
3043       use->n_map_members = size;
3044       use->cost_map = XCNEWVEC (struct cost_pair, size);
3045     }
3046 }
3047
3048 /* Returns description of computation cost of expression whose runtime
3049    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3050
3051 static comp_cost
3052 new_cost (unsigned runtime, unsigned complexity)
3053 {
3054   comp_cost cost;
3055
3056   cost.cost = runtime;
3057   cost.complexity = complexity;
3058
3059   return cost;
3060 }
3061
3062 /* Returns true if COST is infinite.  */
3063
3064 static bool
3065 infinite_cost_p (comp_cost cost)
3066 {
3067   return cost.cost == INFTY;
3068 }
3069
3070 /* Adds costs COST1 and COST2.  */
3071
3072 static comp_cost
3073 add_costs (comp_cost cost1, comp_cost cost2)
3074 {
3075   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3076     return infinite_cost;
3077
3078   cost1.cost += cost2.cost;
3079   cost1.complexity += cost2.complexity;
3080
3081   return cost1;
3082 }
3083 /* Subtracts costs COST1 and COST2.  */
3084
3085 static comp_cost
3086 sub_costs (comp_cost cost1, comp_cost cost2)
3087 {
3088   cost1.cost -= cost2.cost;
3089   cost1.complexity -= cost2.complexity;
3090
3091   return cost1;
3092 }
3093
3094 /* Returns a negative number if COST1 < COST2, a positive number if
3095    COST1 > COST2, and 0 if COST1 = COST2.  */
3096
3097 static int
3098 compare_costs (comp_cost cost1, comp_cost cost2)
3099 {
3100   if (cost1.cost == cost2.cost)
3101     return cost1.complexity - cost2.complexity;
3102
3103   return cost1.cost - cost2.cost;
3104 }
3105
3106 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3107    on invariants DEPENDS_ON and that the value used in expressing it
3108    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3109
3110 static void
3111 set_use_iv_cost (struct ivopts_data *data,
3112                  struct iv_use *use, struct iv_cand *cand,
3113                  comp_cost cost, bitmap depends_on, tree value,
3114                  enum tree_code comp, int inv_expr_id)
3115 {
3116   unsigned i, s;
3117
3118   if (infinite_cost_p (cost))
3119     {
3120       BITMAP_FREE (depends_on);
3121       return;
3122     }
3123
3124   if (data->consider_all_candidates)
3125     {
3126       use->cost_map[cand->id].cand = cand;
3127       use->cost_map[cand->id].cost = cost;
3128       use->cost_map[cand->id].depends_on = depends_on;
3129       use->cost_map[cand->id].value = value;
3130       use->cost_map[cand->id].comp = comp;
3131       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3132       return;
3133     }
3134
3135   /* n_map_members is a power of two, so this computes modulo.  */
3136   s = cand->id & (use->n_map_members - 1);
3137   for (i = s; i < use->n_map_members; i++)
3138     if (!use->cost_map[i].cand)
3139       goto found;
3140   for (i = 0; i < s; i++)
3141     if (!use->cost_map[i].cand)
3142       goto found;
3143
3144   gcc_unreachable ();
3145
3146 found:
3147   use->cost_map[i].cand = cand;
3148   use->cost_map[i].cost = cost;
3149   use->cost_map[i].depends_on = depends_on;
3150   use->cost_map[i].value = value;
3151   use->cost_map[i].comp = comp;
3152   use->cost_map[i].inv_expr_id = inv_expr_id;
3153 }
3154
3155 /* Gets cost of (USE, CANDIDATE) pair.  */
3156
3157 static struct cost_pair *
3158 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3159                  struct iv_cand *cand)
3160 {
3161   unsigned i, s;
3162   struct cost_pair *ret;
3163
3164   if (!cand)
3165     return NULL;
3166
3167   if (data->consider_all_candidates)
3168     {
3169       ret = use->cost_map + cand->id;
3170       if (!ret->cand)
3171         return NULL;
3172
3173       return ret;
3174     }
3175
3176   /* n_map_members is a power of two, so this computes modulo.  */
3177   s = cand->id & (use->n_map_members - 1);
3178   for (i = s; i < use->n_map_members; i++)
3179     if (use->cost_map[i].cand == cand)
3180       return use->cost_map + i;
3181     else if (use->cost_map[i].cand == NULL)
3182       return NULL;
3183   for (i = 0; i < s; i++)
3184     if (use->cost_map[i].cand == cand)
3185       return use->cost_map + i;
3186     else if (use->cost_map[i].cand == NULL)
3187       return NULL;
3188
3189   return NULL;
3190 }
3191
3192 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3193 static rtx
3194 produce_memory_decl_rtl (tree obj, int *regno)
3195 {
3196   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3197   machine_mode address_mode = targetm.addr_space.address_mode (as);
3198   rtx x;
3199
3200   gcc_assert (obj);
3201   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3202     {
3203       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3204       x = gen_rtx_SYMBOL_REF (address_mode, name);
3205       SET_SYMBOL_REF_DECL (x, obj);
3206       x = gen_rtx_MEM (DECL_MODE (obj), x);
3207       set_mem_addr_space (x, as);
3208       targetm.encode_section_info (obj, x, true);
3209     }
3210   else
3211     {
3212       x = gen_raw_REG (address_mode, (*regno)++);
3213       x = gen_rtx_MEM (DECL_MODE (obj), x);
3214       set_mem_addr_space (x, as);
3215     }
3216
3217   return x;
3218 }
3219
3220 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3221    walk_tree.  DATA contains the actual fake register number.  */
3222
3223 static tree
3224 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3225 {
3226   tree obj = NULL_TREE;
3227   rtx x = NULL_RTX;
3228   int *regno = (int *) data;
3229
3230   switch (TREE_CODE (*expr_p))
3231     {
3232     case ADDR_EXPR:
3233       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3234            handled_component_p (*expr_p);
3235            expr_p = &TREE_OPERAND (*expr_p, 0))
3236         continue;
3237       obj = *expr_p;
3238       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3239         x = produce_memory_decl_rtl (obj, regno);
3240       break;
3241
3242     case SSA_NAME:
3243       *ws = 0;
3244       obj = SSA_NAME_VAR (*expr_p);
3245       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3246       if (!obj)
3247         return NULL_TREE;
3248       if (!DECL_RTL_SET_P (obj))
3249         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3250       break;
3251
3252     case VAR_DECL:
3253     case PARM_DECL:
3254     case RESULT_DECL:
3255       *ws = 0;
3256       obj = *expr_p;
3257
3258       if (DECL_RTL_SET_P (obj))
3259         break;
3260
3261       if (DECL_MODE (obj) == BLKmode)
3262         x = produce_memory_decl_rtl (obj, regno);
3263       else
3264         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3265
3266       break;
3267
3268     default:
3269       break;
3270     }
3271
3272   if (x)
3273     {
3274       decl_rtl_to_reset.safe_push (obj);
3275       SET_DECL_RTL (obj, x);
3276     }
3277
3278   return NULL_TREE;
3279 }
3280
3281 /* Determines cost of the computation of EXPR.  */
3282
3283 static unsigned
3284 computation_cost (tree expr, bool speed)
3285 {
3286   rtx_insn *seq;
3287   rtx rslt;
3288   tree type = TREE_TYPE (expr);
3289   unsigned cost;
3290   /* Avoid using hard regs in ways which may be unsupported.  */
3291   int regno = LAST_VIRTUAL_REGISTER + 1;
3292   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3293   enum node_frequency real_frequency = node->frequency;
3294
3295   node->frequency = NODE_FREQUENCY_NORMAL;
3296   crtl->maybe_hot_insn_p = speed;
3297   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3298   start_sequence ();
3299   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3300   seq = get_insns ();
3301   end_sequence ();
3302   default_rtl_profile ();
3303   node->frequency = real_frequency;
3304
3305   cost = seq_cost (seq, speed);
3306   if (MEM_P (rslt))
3307     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3308                           TYPE_ADDR_SPACE (type), speed);
3309   else if (!REG_P (rslt))
3310     cost += set_src_cost (rslt, speed);
3311
3312   return cost;
3313 }
3314
3315 /* Returns variable containing the value of candidate CAND at statement AT.  */
3316
3317 static tree
3318 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3319 {
3320   if (stmt_after_increment (loop, cand, stmt))
3321     return cand->var_after;
3322   else
3323     return cand->var_before;
3324 }
3325
3326 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3327    same precision that is at least as wide as the precision of TYPE, stores
3328    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3329    type of A and B.  */
3330
3331 static tree
3332 determine_common_wider_type (tree *a, tree *b)
3333 {
3334   tree wider_type = NULL;
3335   tree suba, subb;
3336   tree atype = TREE_TYPE (*a);
3337
3338   if (CONVERT_EXPR_P (*a))
3339     {
3340       suba = TREE_OPERAND (*a, 0);
3341       wider_type = TREE_TYPE (suba);
3342       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3343         return atype;
3344     }
3345   else
3346     return atype;
3347
3348   if (CONVERT_EXPR_P (*b))
3349     {
3350       subb = TREE_OPERAND (*b, 0);
3351       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3352         return atype;
3353     }
3354   else
3355     return atype;
3356
3357   *a = suba;
3358   *b = subb;
3359   return wider_type;
3360 }
3361
3362 /* Determines the expression by that USE is expressed from induction variable
3363    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3364    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3365
3366 static bool
3367 get_computation_aff (struct loop *loop,
3368                      struct iv_use *use, struct iv_cand *cand, gimple at,
3369                      struct aff_tree *aff)
3370 {
3371   tree ubase = use->iv->base;
3372   tree ustep = use->iv->step;
3373   tree cbase = cand->iv->base;
3374   tree cstep = cand->iv->step, cstep_common;
3375   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3376   tree common_type, var;
3377   tree uutype;
3378   aff_tree cbase_aff, var_aff;
3379   widest_int rat;
3380
3381   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3382     {
3383       /* We do not have a precision to express the values of use.  */
3384       return false;
3385     }
3386
3387   var = var_at_stmt (loop, cand, at);
3388   uutype = unsigned_type_for (utype);
3389
3390   /* If the conversion is not noop, perform it.  */
3391   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3392     {
3393       cstep = fold_convert (uutype, cstep);
3394       cbase = fold_convert (uutype, cbase);
3395       var = fold_convert (uutype, var);
3396     }
3397
3398   if (!constant_multiple_of (ustep, cstep, &rat))
3399     return false;
3400
3401   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3402      type, we achieve better folding by computing their difference in this
3403      wider type, and cast the result to UUTYPE.  We do not need to worry about
3404      overflows, as all the arithmetics will in the end be performed in UUTYPE
3405      anyway.  */
3406   common_type = determine_common_wider_type (&ubase, &cbase);
3407
3408   /* use = ubase - ratio * cbase + ratio * var.  */
3409   tree_to_aff_combination (ubase, common_type, aff);
3410   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3411   tree_to_aff_combination (var, uutype, &var_aff);
3412
3413   /* We need to shift the value if we are after the increment.  */
3414   if (stmt_after_increment (loop, cand, at))
3415     {
3416       aff_tree cstep_aff;
3417
3418       if (common_type != uutype)
3419         cstep_common = fold_convert (common_type, cstep);
3420       else
3421         cstep_common = cstep;
3422
3423       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3424       aff_combination_add (&cbase_aff, &cstep_aff);
3425     }
3426
3427   aff_combination_scale (&cbase_aff, -rat);
3428   aff_combination_add (aff, &cbase_aff);
3429   if (common_type != uutype)
3430     aff_combination_convert (aff, uutype);
3431
3432   aff_combination_scale (&var_aff, rat);
3433   aff_combination_add (aff, &var_aff);
3434
3435   return true;
3436 }
3437
3438 /* Return the type of USE.  */
3439
3440 static tree
3441 get_use_type (struct iv_use *use)
3442 {
3443   tree base_type = TREE_TYPE (use->iv->base);
3444   tree type;
3445
3446   if (use->type == USE_ADDRESS)
3447     {
3448       /* The base_type may be a void pointer.  Create a pointer type based on
3449          the mem_ref instead.  */
3450       type = build_pointer_type (TREE_TYPE (*use->op_p));
3451       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3452                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3453     }
3454   else
3455     type = base_type;
3456
3457   return type;
3458 }
3459
3460 /* Determines the expression by that USE is expressed from induction variable
3461    CAND at statement AT in LOOP.  The computation is unshared.  */
3462
3463 static tree
3464 get_computation_at (struct loop *loop,
3465                     struct iv_use *use, struct iv_cand *cand, gimple at)
3466 {
3467   aff_tree aff;
3468   tree type = get_use_type (use);
3469
3470   if (!get_computation_aff (loop, use, cand, at, &aff))
3471     return NULL_TREE;
3472   unshare_aff_combination (&aff);
3473   return fold_convert (type, aff_combination_to_tree (&aff));
3474 }
3475
3476 /* Determines the expression by that USE is expressed from induction variable
3477    CAND in LOOP.  The computation is unshared.  */
3478
3479 static tree
3480 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3481 {
3482   return get_computation_at (loop, use, cand, use->stmt);
3483 }
3484
3485 /* Adjust the cost COST for being in loop setup rather than loop body.
3486    If we're optimizing for space, the loop setup overhead is constant;
3487    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3488 static unsigned
3489 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3490 {
3491   if (cost == INFTY)
3492     return cost;
3493   else if (optimize_loop_for_speed_p (data->current_loop))
3494     return cost / avg_loop_niter (data->current_loop);
3495   else
3496     return cost;
3497 }
3498
3499 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3500    validity for a memory reference accessing memory of mode MODE in
3501    address space AS.  */
3502
3503
3504 bool
3505 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3506                                  addr_space_t as)
3507 {
3508 #define MAX_RATIO 128
3509   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3510   static vec<sbitmap> valid_mult_list;
3511   sbitmap valid_mult;
3512
3513   if (data_index >= valid_mult_list.length ())
3514     valid_mult_list.safe_grow_cleared (data_index + 1);
3515
3516   valid_mult = valid_mult_list[data_index];
3517   if (!valid_mult)
3518     {
3519       machine_mode address_mode = targetm.addr_space.address_mode (as);
3520       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3521       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3522       rtx addr, scaled;
3523       HOST_WIDE_INT i;
3524
3525       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3526       bitmap_clear (valid_mult);
3527       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3528       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3529       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3530         {
3531           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3532           if (memory_address_addr_space_p (mode, addr, as)
3533               || memory_address_addr_space_p (mode, scaled, as))
3534             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3535         }
3536
3537       if (dump_file && (dump_flags & TDF_DETAILS))
3538         {
3539           fprintf (dump_file, "  allowed multipliers:");
3540           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3541             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3542               fprintf (dump_file, " %d", (int) i);
3543           fprintf (dump_file, "\n");
3544           fprintf (dump_file, "\n");
3545         }
3546
3547       valid_mult_list[data_index] = valid_mult;
3548     }
3549
3550   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3551     return false;
3552
3553   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3554 }
3555
3556 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3557    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3558    variable is omitted.  Compute the cost for a memory reference that accesses
3559    a memory location of mode MEM_MODE in address space AS.
3560
3561    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3562    size of MEM_MODE / RATIO) is available.  To make this determination, we
3563    look at the size of the increment to be made, which is given in CSTEP.
3564    CSTEP may be zero if the step is unknown.
3565    STMT_AFTER_INC is true iff the statement we're looking at is after the
3566    increment of the original biv.
3567
3568    TODO -- there must be some better way.  This all is quite crude.  */
3569
3570 enum ainc_type
3571 {
3572   AINC_PRE_INC,         /* Pre increment.  */
3573   AINC_PRE_DEC,         /* Pre decrement.  */
3574   AINC_POST_INC,        /* Post increment.  */
3575   AINC_POST_DEC,        /* Post decrement.  */
3576   AINC_NONE             /* Also the number of auto increment types.  */
3577 };
3578
3579 typedef struct address_cost_data_s
3580 {
3581   HOST_WIDE_INT min_offset, max_offset;
3582   unsigned costs[2][2][2][2];
3583   unsigned ainc_costs[AINC_NONE];
3584 } *address_cost_data;
3585
3586
3587 static comp_cost
3588 get_address_cost (bool symbol_present, bool var_present,
3589                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3590                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3591                   addr_space_t as, bool speed,
3592                   bool stmt_after_inc, bool *may_autoinc)
3593 {
3594   machine_mode address_mode = targetm.addr_space.address_mode (as);
3595   static vec<address_cost_data> address_cost_data_list;
3596   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3597   address_cost_data data;
3598   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3599   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3600   unsigned cost, acost, complexity;
3601   enum ainc_type autoinc_type;
3602   bool offset_p, ratio_p, autoinc;
3603   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3604   unsigned HOST_WIDE_INT mask;
3605   unsigned bits;
3606
3607   if (data_index >= address_cost_data_list.length ())
3608     address_cost_data_list.safe_grow_cleared (data_index + 1);
3609
3610   data = address_cost_data_list[data_index];
3611   if (!data)
3612     {
3613       HOST_WIDE_INT i;
3614       HOST_WIDE_INT rat, off = 0;
3615       int old_cse_not_expected, width;
3616       unsigned sym_p, var_p, off_p, rat_p, add_c;
3617       rtx_insn *seq;
3618       rtx addr, base;
3619       rtx reg0, reg1;
3620
3621       data = (address_cost_data) xcalloc (1, sizeof (*data));
3622
3623       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3624
3625       width = GET_MODE_BITSIZE (address_mode) - 1;
3626       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3627         width = HOST_BITS_PER_WIDE_INT - 1;
3628       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3629
3630       for (i = width; i >= 0; i--)
3631         {
3632           off = -((unsigned HOST_WIDE_INT) 1 << i);
3633           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3634           if (memory_address_addr_space_p (mem_mode, addr, as))
3635             break;
3636         }
3637       data->min_offset = (i == -1? 0 : off);
3638
3639       for (i = width; i >= 0; i--)
3640         {
3641           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3642           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3643           if (memory_address_addr_space_p (mem_mode, addr, as))
3644             break;
3645           /* For some strict-alignment targets, the offset must be naturally
3646              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3647           off = mem_mode != QImode
3648                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3649                     - GET_MODE_SIZE (mem_mode)
3650                 : 0;
3651           if (off > 0)
3652             {
3653               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3654               if (memory_address_addr_space_p (mem_mode, addr, as))
3655                 break;
3656             }
3657         }
3658       if (i == -1)
3659         off = 0;
3660       data->max_offset = off;
3661
3662       if (dump_file && (dump_flags & TDF_DETAILS))
3663         {
3664           fprintf (dump_file, "get_address_cost:\n");
3665           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3666                    GET_MODE_NAME (mem_mode),
3667                    data->min_offset);
3668           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3669                    GET_MODE_NAME (mem_mode),
3670                    data->max_offset);
3671         }
3672
3673       rat = 1;
3674       for (i = 2; i <= MAX_RATIO; i++)
3675         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3676           {
3677             rat = i;
3678             break;
3679           }
3680
3681       /* Compute the cost of various addressing modes.  */
3682       acost = 0;
3683       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3684       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3685
3686       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3687           || USE_STORE_PRE_DECREMENT (mem_mode))
3688         {
3689           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3690           has_predec[mem_mode]
3691             = memory_address_addr_space_p (mem_mode, addr, as);
3692
3693           if (has_predec[mem_mode])
3694             data->ainc_costs[AINC_PRE_DEC]
3695               = address_cost (addr, mem_mode, as, speed);
3696         }
3697       if (USE_LOAD_POST_DECREMENT (mem_mode)
3698           || USE_STORE_POST_DECREMENT (mem_mode))
3699         {
3700           addr = gen_rtx_POST_DEC (address_mode, reg0);
3701           has_postdec[mem_mode]
3702             = memory_address_addr_space_p (mem_mode, addr, as);
3703
3704           if (has_postdec[mem_mode])
3705             data->ainc_costs[AINC_POST_DEC]
3706               = address_cost (addr, mem_mode, as, speed);
3707         }
3708       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3709           || USE_STORE_PRE_DECREMENT (mem_mode))
3710         {
3711           addr = gen_rtx_PRE_INC (address_mode, reg0);
3712           has_preinc[mem_mode]
3713             = memory_address_addr_space_p (mem_mode, addr, as);
3714
3715           if (has_preinc[mem_mode])
3716             data->ainc_costs[AINC_PRE_INC]
3717               = address_cost (addr, mem_mode, as, speed);
3718         }
3719       if (USE_LOAD_POST_INCREMENT (mem_mode)
3720           || USE_STORE_POST_INCREMENT (mem_mode))
3721         {
3722           addr = gen_rtx_POST_INC (address_mode, reg0);
3723           has_postinc[mem_mode]
3724             = memory_address_addr_space_p (mem_mode, addr, as);
3725
3726           if (has_postinc[mem_mode])
3727             data->ainc_costs[AINC_POST_INC]
3728               = address_cost (addr, mem_mode, as, speed);
3729         }
3730       for (i = 0; i < 16; i++)
3731         {
3732           sym_p = i & 1;
3733           var_p = (i >> 1) & 1;
3734           off_p = (i >> 2) & 1;
3735           rat_p = (i >> 3) & 1;
3736
3737           addr = reg0;
3738           if (rat_p)
3739             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3740                                    gen_int_mode (rat, address_mode));
3741
3742           if (var_p)
3743             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3744
3745           if (sym_p)
3746             {
3747               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3748               /* ??? We can run into trouble with some backends by presenting
3749                  it with symbols which haven't been properly passed through
3750                  targetm.encode_section_info.  By setting the local bit, we
3751                  enhance the probability of things working.  */
3752               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3753
3754               if (off_p)
3755                 base = gen_rtx_fmt_e (CONST, address_mode,
3756                                       gen_rtx_fmt_ee
3757                                         (PLUS, address_mode, base,
3758                                          gen_int_mode (off, address_mode)));
3759             }
3760           else if (off_p)
3761             base = gen_int_mode (off, address_mode);
3762           else
3763             base = NULL_RTX;
3764
3765           if (base)
3766             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3767
3768           start_sequence ();
3769           /* To avoid splitting addressing modes, pretend that no cse will
3770              follow.  */
3771           old_cse_not_expected = cse_not_expected;
3772           cse_not_expected = true;
3773           addr = memory_address_addr_space (mem_mode, addr, as);
3774           cse_not_expected = old_cse_not_expected;
3775           seq = get_insns ();
3776           end_sequence ();
3777
3778           acost = seq_cost (seq, speed);
3779           acost += address_cost (addr, mem_mode, as, speed);
3780
3781           if (!acost)
3782             acost = 1;
3783           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3784         }
3785
3786       /* On some targets, it is quite expensive to load symbol to a register,
3787          which makes addresses that contain symbols look much more expensive.
3788          However, the symbol will have to be loaded in any case before the
3789          loop (and quite likely we have it in register already), so it does not
3790          make much sense to penalize them too heavily.  So make some final
3791          tweaks for the SYMBOL_PRESENT modes:
3792
3793          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3794          var is cheaper, use this mode with small penalty.
3795          If VAR_PRESENT is true, try whether the mode with
3796          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3797          if this is the case, use it.  */
3798       add_c = add_cost (speed, address_mode);
3799       for (i = 0; i < 8; i++)
3800         {
3801           var_p = i & 1;
3802           off_p = (i >> 1) & 1;
3803           rat_p = (i >> 2) & 1;
3804
3805           acost = data->costs[0][1][off_p][rat_p] + 1;
3806           if (var_p)
3807             acost += add_c;
3808
3809           if (acost < data->costs[1][var_p][off_p][rat_p])
3810             data->costs[1][var_p][off_p][rat_p] = acost;
3811         }
3812
3813       if (dump_file && (dump_flags & TDF_DETAILS))
3814         {
3815           fprintf (dump_file, "Address costs:\n");
3816
3817           for (i = 0; i < 16; i++)
3818             {
3819               sym_p = i & 1;
3820               var_p = (i >> 1) & 1;
3821               off_p = (i >> 2) & 1;
3822               rat_p = (i >> 3) & 1;
3823
3824               fprintf (dump_file, "  ");
3825               if (sym_p)
3826                 fprintf (dump_file, "sym + ");
3827               if (var_p)
3828                 fprintf (dump_file, "var + ");
3829               if (off_p)
3830                 fprintf (dump_file, "cst + ");
3831               if (rat_p)
3832                 fprintf (dump_file, "rat * ");
3833
3834               acost = data->costs[sym_p][var_p][off_p][rat_p];
3835               fprintf (dump_file, "index costs %d\n", acost);
3836             }
3837           if (has_predec[mem_mode] || has_postdec[mem_mode]
3838               || has_preinc[mem_mode] || has_postinc[mem_mode])
3839             fprintf (dump_file, "  May include autoinc/dec\n");
3840           fprintf (dump_file, "\n");
3841         }
3842
3843       address_cost_data_list[data_index] = data;
3844     }
3845
3846   bits = GET_MODE_BITSIZE (address_mode);
3847   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3848   offset &= mask;
3849   if ((offset >> (bits - 1) & 1))
3850     offset |= ~mask;
3851   s_offset = offset;
3852
3853   autoinc = false;
3854   autoinc_type = AINC_NONE;
3855   msize = GET_MODE_SIZE (mem_mode);
3856   autoinc_offset = offset;
3857   if (stmt_after_inc)
3858     autoinc_offset += ratio * cstep;
3859   if (symbol_present || var_present || ratio != 1)
3860     autoinc = false;
3861   else
3862     {
3863       if (has_postinc[mem_mode] && autoinc_offset == 0
3864           && msize == cstep)
3865         autoinc_type = AINC_POST_INC;
3866       else if (has_postdec[mem_mode] && autoinc_offset == 0
3867                && msize == -cstep)
3868         autoinc_type = AINC_POST_DEC;
3869       else if (has_preinc[mem_mode] && autoinc_offset == msize
3870                && msize == cstep)
3871         autoinc_type = AINC_PRE_INC;
3872       else if (has_predec[mem_mode] && autoinc_offset == -msize
3873                && msize == -cstep)
3874         autoinc_type = AINC_PRE_DEC;
3875
3876       if (autoinc_type != AINC_NONE)
3877         autoinc = true;
3878     }
3879
3880   cost = 0;
3881   offset_p = (s_offset != 0
3882               && data->min_offset <= s_offset
3883               && s_offset <= data->max_offset);
3884   ratio_p = (ratio != 1
3885              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3886
3887   if (ratio != 1 && !ratio_p)
3888     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3889
3890   if (s_offset && !offset_p && !symbol_present)
3891     cost += add_cost (speed, address_mode);
3892
3893   if (may_autoinc)
3894     *may_autoinc = autoinc;
3895   if (autoinc)
3896     acost = data->ainc_costs[autoinc_type];
3897   else
3898     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3899   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3900   return new_cost (cost + acost, complexity);
3901 }
3902
3903  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3904     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3905     calculating the operands of EXPR.  Returns true if successful, and returns
3906     the cost in COST.  */
3907
3908 static bool
3909 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3910                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3911 {
3912   comp_cost res;
3913   tree op1 = TREE_OPERAND (expr, 1);
3914   tree cst = TREE_OPERAND (mult, 1);
3915   tree multop = TREE_OPERAND (mult, 0);
3916   int m = exact_log2 (int_cst_value (cst));
3917   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3918   int as_cost, sa_cost;
3919   bool mult_in_op1;
3920
3921   if (!(m >= 0 && m < maxm))
3922     return false;
3923
3924   mult_in_op1 = operand_equal_p (op1, mult, 0);
3925
3926   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3927
3928   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3929      use that in preference to a shift insn followed by an add insn.  */
3930   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3931              ? shiftadd_cost (speed, mode, m)
3932              : (mult_in_op1
3933                 ? shiftsub1_cost (speed, mode, m)
3934                 : shiftsub0_cost (speed, mode, m)));
3935
3936   res = new_cost (MIN (as_cost, sa_cost), 0);
3937   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3938
3939   STRIP_NOPS (multop);
3940   if (!is_gimple_val (multop))
3941     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3942
3943   *cost = res;
3944   return true;
3945 }
3946
3947 /* Estimates cost of forcing expression EXPR into a variable.  */
3948
3949 static comp_cost
3950 force_expr_to_var_cost (tree expr, bool speed)
3951 {
3952   static bool costs_initialized = false;
3953   static unsigned integer_cost [2];
3954   static unsigned symbol_cost [2];
3955   static unsigned address_cost [2];
3956   tree op0, op1;
3957   comp_cost cost0, cost1, cost;
3958   machine_mode mode;
3959
3960   if (!costs_initialized)
3961     {
3962       tree type = build_pointer_type (integer_type_node);
3963       tree var, addr;
3964       rtx x;
3965       int i;
3966
3967       var = create_tmp_var_raw (integer_type_node, "test_var");
3968       TREE_STATIC (var) = 1;
3969       x = produce_memory_decl_rtl (var, NULL);
3970       SET_DECL_RTL (var, x);
3971
3972       addr = build1 (ADDR_EXPR, type, var);
3973
3974
3975       for (i = 0; i < 2; i++)
3976         {
3977           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3978                                                              2000), i);
3979
3980           symbol_cost[i] = computation_cost (addr, i) + 1;
3981
3982           address_cost[i]
3983             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3984           if (dump_file && (dump_flags & TDF_DETAILS))
3985             {
3986               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3987               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3988               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3989               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3990               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3991               fprintf (dump_file, "\n");
3992             }
3993         }
3994
3995       costs_initialized = true;
3996     }
3997
3998   STRIP_NOPS (expr);
3999
4000   if (SSA_VAR_P (expr))
4001     return no_cost;
4002
4003   if (is_gimple_min_invariant (expr))
4004     {
4005       if (TREE_CODE (expr) == INTEGER_CST)
4006         return new_cost (integer_cost [speed], 0);
4007
4008       if (TREE_CODE (expr) == ADDR_EXPR)
4009         {
4010           tree obj = TREE_OPERAND (expr, 0);
4011
4012           if (TREE_CODE (obj) == VAR_DECL
4013               || TREE_CODE (obj) == PARM_DECL
4014               || TREE_CODE (obj) == RESULT_DECL)
4015             return new_cost (symbol_cost [speed], 0);
4016         }
4017
4018       return new_cost (address_cost [speed], 0);
4019     }
4020
4021   switch (TREE_CODE (expr))
4022     {
4023     case POINTER_PLUS_EXPR:
4024     case PLUS_EXPR:
4025     case MINUS_EXPR:
4026     case MULT_EXPR:
4027       op0 = TREE_OPERAND (expr, 0);
4028       op1 = TREE_OPERAND (expr, 1);
4029       STRIP_NOPS (op0);
4030       STRIP_NOPS (op1);
4031       break;
4032
4033     CASE_CONVERT:
4034     case NEGATE_EXPR:
4035       op0 = TREE_OPERAND (expr, 0);
4036       STRIP_NOPS (op0);
4037       op1 = NULL_TREE;
4038       break;
4039
4040     default:
4041       /* Just an arbitrary value, FIXME.  */
4042       return new_cost (target_spill_cost[speed], 0);
4043     }
4044
4045   if (op0 == NULL_TREE
4046       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4047     cost0 = no_cost;
4048   else
4049     cost0 = force_expr_to_var_cost (op0, speed);
4050
4051   if (op1 == NULL_TREE
4052       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4053     cost1 = no_cost;
4054   else
4055     cost1 = force_expr_to_var_cost (op1, speed);
4056
4057   mode = TYPE_MODE (TREE_TYPE (expr));
4058   switch (TREE_CODE (expr))
4059     {
4060     case POINTER_PLUS_EXPR:
4061     case PLUS_EXPR:
4062     case MINUS_EXPR:
4063     case NEGATE_EXPR:
4064       cost = new_cost (add_cost (speed, mode), 0);
4065       if (TREE_CODE (expr) != NEGATE_EXPR)
4066         {
4067           tree mult = NULL_TREE;
4068           comp_cost sa_cost;
4069           if (TREE_CODE (op1) == MULT_EXPR)
4070             mult = op1;
4071           else if (TREE_CODE (op0) == MULT_EXPR)
4072             mult = op0;
4073
4074           if (mult != NULL_TREE
4075               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4076               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4077                                     speed, &sa_cost))
4078             return sa_cost;
4079         }
4080       break;
4081
4082     CASE_CONVERT:
4083       {
4084         tree inner_mode, outer_mode;
4085         outer_mode = TREE_TYPE (expr);
4086         inner_mode = TREE_TYPE (op0);
4087         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4088                                        TYPE_MODE (inner_mode), speed), 0);
4089       }
4090       break;
4091
4092     case MULT_EXPR:
4093       if (cst_and_fits_in_hwi (op0))
4094         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4095                                              mode, speed), 0);
4096       else if (cst_and_fits_in_hwi (op1))
4097         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4098                                              mode, speed), 0);
4099       else
4100         return new_cost (target_spill_cost [speed], 0);
4101       break;
4102
4103     default:
4104       gcc_unreachable ();
4105     }
4106
4107   cost = add_costs (cost, cost0);
4108   cost = add_costs (cost, cost1);
4109
4110   /* Bound the cost by target_spill_cost.  The parts of complicated
4111      computations often are either loop invariant or at least can
4112      be shared between several iv uses, so letting this grow without
4113      limits would not give reasonable results.  */
4114   if (cost.cost > (int) target_spill_cost [speed])
4115     cost.cost = target_spill_cost [speed];
4116
4117   return cost;
4118 }
4119
4120 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4121    invariants the computation depends on.  */
4122
4123 static comp_cost
4124 force_var_cost (struct ivopts_data *data,
4125                 tree expr, bitmap *depends_on)
4126 {
4127   if (depends_on)
4128     {
4129       fd_ivopts_data = data;
4130       walk_tree (&expr, find_depends, depends_on, NULL);
4131     }
4132
4133   return force_expr_to_var_cost (expr, data->speed);
4134 }
4135
4136 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4137    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4138    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4139    invariants the computation depends on.  */
4140
4141 static comp_cost
4142 split_address_cost (struct ivopts_data *data,
4143                     tree addr, bool *symbol_present, bool *var_present,
4144                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4145 {
4146   tree core;
4147   HOST_WIDE_INT bitsize;
4148   HOST_WIDE_INT bitpos;
4149   tree toffset;
4150   machine_mode mode;
4151   int unsignedp, volatilep;
4152
4153   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4154                               &unsignedp, &volatilep, false);
4155
4156   if (toffset != 0
4157       || bitpos % BITS_PER_UNIT != 0
4158       || TREE_CODE (core) != VAR_DECL)
4159     {
4160       *symbol_present = false;
4161       *var_present = true;
4162       fd_ivopts_data = data;
4163       walk_tree (&addr, find_depends, depends_on, NULL);
4164       return new_cost (target_spill_cost[data->speed], 0);
4165     }
4166
4167   *offset += bitpos / BITS_PER_UNIT;
4168   if (TREE_STATIC (core)
4169       || DECL_EXTERNAL (core))
4170     {
4171       *symbol_present = true;
4172       *var_present = false;
4173       return no_cost;
4174     }
4175
4176   *symbol_present = false;
4177   *var_present = true;
4178   return no_cost;
4179 }
4180
4181 /* Estimates cost of expressing difference of addresses E1 - E2 as
4182    var + symbol + offset.  The value of offset is added to OFFSET,
4183    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4184    part is missing.  DEPENDS_ON is a set of the invariants the computation
4185    depends on.  */
4186
4187 static comp_cost
4188 ptr_difference_cost (struct ivopts_data *data,
4189                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4190                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4191 {
4192   HOST_WIDE_INT diff = 0;
4193   aff_tree aff_e1, aff_e2;
4194   tree type;
4195
4196   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4197
4198   if (ptr_difference_const (e1, e2, &diff))
4199     {
4200       *offset += diff;
4201       *symbol_present = false;
4202       *var_present = false;
4203       return no_cost;
4204     }
4205
4206   if (integer_zerop (e2))
4207     return split_address_cost (data, TREE_OPERAND (e1, 0),
4208                                symbol_present, var_present, offset, depends_on);
4209
4210   *symbol_present = false;
4211   *var_present = true;
4212
4213   type = signed_type_for (TREE_TYPE (e1));
4214   tree_to_aff_combination (e1, type, &aff_e1);
4215   tree_to_aff_combination (e2, type, &aff_e2);
4216   aff_combination_scale (&aff_e2, -1);
4217   aff_combination_add (&aff_e1, &aff_e2);
4218
4219   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4220 }
4221
4222 /* Estimates cost of expressing difference E1 - E2 as
4223    var + symbol + offset.  The value of offset is added to OFFSET,
4224    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4225    part is missing.  DEPENDS_ON is a set of the invariants the computation
4226    depends on.  */
4227
4228 static comp_cost
4229 difference_cost (struct ivopts_data *data,
4230                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4231                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4232 {
4233   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4234   unsigned HOST_WIDE_INT off1, off2;
4235   aff_tree aff_e1, aff_e2;
4236   tree type;
4237
4238   e1 = strip_offset (e1, &off1);
4239   e2 = strip_offset (e2, &off2);
4240   *offset += off1 - off2;
4241
4242   STRIP_NOPS (e1);
4243   STRIP_NOPS (e2);
4244
4245   if (TREE_CODE (e1) == ADDR_EXPR)
4246     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4247                                 offset, depends_on);
4248   *symbol_present = false;
4249
4250   if (operand_equal_p (e1, e2, 0))
4251     {
4252       *var_present = false;
4253       return no_cost;
4254     }
4255
4256   *var_present = true;
4257
4258   if (integer_zerop (e2))
4259     return force_var_cost (data, e1, depends_on);
4260
4261   if (integer_zerop (e1))
4262     {
4263       comp_cost cost = force_var_cost (data, e2, depends_on);
4264       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4265       return cost;
4266     }
4267
4268   type = signed_type_for (TREE_TYPE (e1));
4269   tree_to_aff_combination (e1, type, &aff_e1);
4270   tree_to_aff_combination (e2, type, &aff_e2);
4271   aff_combination_scale (&aff_e2, -1);
4272   aff_combination_add (&aff_e1, &aff_e2);
4273
4274   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4275 }
4276
4277 /* Returns true if AFF1 and AFF2 are identical.  */
4278
4279 static bool
4280 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4281 {
4282   unsigned i;
4283
4284   if (aff1->n != aff2->n)
4285     return false;
4286
4287   for (i = 0; i < aff1->n; i++)
4288     {
4289       if (aff1->elts[i].coef != aff2->elts[i].coef)
4290         return false;
4291
4292       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4293         return false;
4294     }
4295   return true;
4296 }
4297
4298 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4299
4300 static int
4301 get_expr_id (struct ivopts_data *data, tree expr)
4302 {
4303   struct iv_inv_expr_ent ent;
4304   struct iv_inv_expr_ent **slot;
4305
4306   ent.expr = expr;
4307   ent.hash = iterative_hash_expr (expr, 0);
4308   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4309   if (*slot)
4310     return (*slot)->id;
4311
4312   *slot = XNEW (struct iv_inv_expr_ent);
4313   (*slot)->expr = expr;
4314   (*slot)->hash = ent.hash;
4315   (*slot)->id = data->inv_expr_id++;
4316   return (*slot)->id;
4317 }
4318
4319 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4320    requires a new compiler generated temporary.  Returns -1 otherwise.
4321    ADDRESS_P is a flag indicating if the expression is for address
4322    computation.  */
4323
4324 static int
4325 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4326                             tree cbase, HOST_WIDE_INT ratio,
4327                             bool address_p)
4328 {
4329   aff_tree ubase_aff, cbase_aff;
4330   tree expr, ub, cb;
4331
4332   STRIP_NOPS (ubase);
4333   STRIP_NOPS (cbase);
4334   ub = ubase;
4335   cb = cbase;
4336
4337   if ((TREE_CODE (ubase) == INTEGER_CST)
4338       && (TREE_CODE (cbase) == INTEGER_CST))
4339     return -1;
4340
4341   /* Strips the constant part. */
4342   if (TREE_CODE (ubase) == PLUS_EXPR
4343       || TREE_CODE (ubase) == MINUS_EXPR
4344       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4345     {
4346       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4347         ubase = TREE_OPERAND (ubase, 0);
4348     }
4349
4350   /* Strips the constant part. */
4351   if (TREE_CODE (cbase) == PLUS_EXPR
4352       || TREE_CODE (cbase) == MINUS_EXPR
4353       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4354     {
4355       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4356         cbase = TREE_OPERAND (cbase, 0);
4357     }
4358
4359   if (address_p)
4360     {
4361       if (((TREE_CODE (ubase) == SSA_NAME)
4362            || (TREE_CODE (ubase) == ADDR_EXPR
4363                && is_gimple_min_invariant (ubase)))
4364           && (TREE_CODE (cbase) == INTEGER_CST))
4365         return -1;
4366
4367       if (((TREE_CODE (cbase) == SSA_NAME)
4368            || (TREE_CODE (cbase) == ADDR_EXPR
4369                && is_gimple_min_invariant (cbase)))
4370           && (TREE_CODE (ubase) == INTEGER_CST))
4371         return -1;
4372     }
4373
4374   if (ratio == 1)
4375     {
4376       if (operand_equal_p (ubase, cbase, 0))
4377         return -1;
4378
4379       if (TREE_CODE (ubase) == ADDR_EXPR
4380           && TREE_CODE (cbase) == ADDR_EXPR)
4381         {
4382           tree usym, csym;
4383
4384           usym = TREE_OPERAND (ubase, 0);
4385           csym = TREE_OPERAND (cbase, 0);
4386           if (TREE_CODE (usym) == ARRAY_REF)
4387             {
4388               tree ind = TREE_OPERAND (usym, 1);
4389               if (TREE_CODE (ind) == INTEGER_CST
4390                   && tree_fits_shwi_p (ind)
4391                   && tree_to_shwi (ind) == 0)
4392                 usym = TREE_OPERAND (usym, 0);
4393             }
4394           if (TREE_CODE (csym) == ARRAY_REF)
4395             {
4396               tree ind = TREE_OPERAND (csym, 1);
4397               if (TREE_CODE (ind) == INTEGER_CST
4398                   && tree_fits_shwi_p (ind)
4399                   && tree_to_shwi (ind) == 0)
4400                 csym = TREE_OPERAND (csym, 0);
4401             }
4402           if (operand_equal_p (usym, csym, 0))
4403             return -1;
4404         }
4405       /* Now do more complex comparison  */
4406       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4407       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4408       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4409         return -1;
4410     }
4411
4412   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4413   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4414
4415   aff_combination_scale (&cbase_aff, -1 * ratio);
4416   aff_combination_add (&ubase_aff, &cbase_aff);
4417   expr = aff_combination_to_tree (&ubase_aff);
4418   return get_expr_id (data, expr);
4419 }
4420
4421
4422
4423 /* Determines the cost of the computation by that USE is expressed
4424    from induction variable CAND.  If ADDRESS_P is true, we just need
4425    to create an address from it, otherwise we want to get it into
4426    register.  A set of invariants we depend on is stored in
4427    DEPENDS_ON.  AT is the statement at that the value is computed.
4428    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4429    addressing is likely.  */
4430
4431 static comp_cost
4432 get_computation_cost_at (struct ivopts_data *data,
4433                          struct iv_use *use, struct iv_cand *cand,
4434                          bool address_p, bitmap *depends_on, gimple at,
4435                          bool *can_autoinc,
4436                          int *inv_expr_id)
4437 {
4438   tree ubase = use->iv->base, ustep = use->iv->step;
4439   tree cbase, cstep;
4440   tree utype = TREE_TYPE (ubase), ctype;
4441   unsigned HOST_WIDE_INT cstepi, offset = 0;
4442   HOST_WIDE_INT ratio, aratio;
4443   bool var_present, symbol_present, stmt_is_after_inc;
4444   comp_cost cost;
4445   widest_int rat;
4446   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4447   machine_mode mem_mode = (address_p
4448                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4449                                 : VOIDmode);
4450
4451   *depends_on = NULL;
4452
4453   /* Only consider real candidates.  */
4454   if (!cand->iv)
4455     return infinite_cost;
4456
4457   cbase = cand->iv->base;
4458   cstep = cand->iv->step;
4459   ctype = TREE_TYPE (cbase);
4460
4461   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4462     {
4463       /* We do not have a precision to express the values of use.  */
4464       return infinite_cost;
4465     }
4466
4467   if (address_p
4468       || (use->iv->base_object
4469           && cand->iv->base_object
4470           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4471           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4472     {
4473       /* Do not try to express address of an object with computation based
4474          on address of a different object.  This may cause problems in rtl
4475          level alias analysis (that does not expect this to be happening,
4476          as this is illegal in C), and would be unlikely to be useful
4477          anyway.  */
4478       if (use->iv->base_object
4479           && cand->iv->base_object
4480           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4481         return infinite_cost;
4482     }
4483
4484   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4485     {
4486       /* TODO -- add direct handling of this case.  */
4487       goto fallback;
4488     }
4489
4490   /* CSTEPI is removed from the offset in case statement is after the
4491      increment.  If the step is not constant, we use zero instead.
4492      This is a bit imprecise (there is the extra addition), but
4493      redundancy elimination is likely to transform the code so that
4494      it uses value of the variable before increment anyway,
4495      so it is not that much unrealistic.  */
4496   if (cst_and_fits_in_hwi (cstep))
4497     cstepi = int_cst_value (cstep);
4498   else
4499     cstepi = 0;
4500
4501   if (!constant_multiple_of (ustep, cstep, &rat))
4502     return infinite_cost;
4503
4504   if (wi::fits_shwi_p (rat))
4505     ratio = rat.to_shwi ();
4506   else
4507     return infinite_cost;
4508
4509   STRIP_NOPS (cbase);
4510   ctype = TREE_TYPE (cbase);
4511
4512   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4513
4514   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4515      or ratio == 1, it is better to handle this like
4516
4517      ubase - ratio * cbase + ratio * var
4518
4519      (also holds in the case ratio == -1, TODO.  */
4520
4521   if (cst_and_fits_in_hwi (cbase))
4522     {
4523       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4524       cost = difference_cost (data,
4525                               ubase, build_int_cst (utype, 0),
4526                               &symbol_present, &var_present, &offset,
4527                               depends_on);
4528       cost.cost /= avg_loop_niter (data->current_loop);
4529     }
4530   else if (ratio == 1)
4531     {
4532       tree real_cbase = cbase;
4533
4534       /* Check to see if any adjustment is needed.  */
4535       if (cstepi == 0 && stmt_is_after_inc)
4536         {
4537           aff_tree real_cbase_aff;
4538           aff_tree cstep_aff;
4539
4540           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4541                                    &real_cbase_aff);
4542           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4543
4544           aff_combination_add (&real_cbase_aff, &cstep_aff);
4545           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4546         }
4547
4548       cost = difference_cost (data,
4549                               ubase, real_cbase,
4550                               &symbol_present, &var_present, &offset,
4551                               depends_on);
4552       cost.cost /= avg_loop_niter (data->current_loop);
4553     }
4554   else if (address_p
4555            && !POINTER_TYPE_P (ctype)
4556            && multiplier_allowed_in_address_p
4557                 (ratio, mem_mode,
4558                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4559     {
4560       cbase
4561         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4562       cost = difference_cost (data,
4563                               ubase, cbase,
4564                               &symbol_present, &var_present, &offset,
4565                               depends_on);
4566       cost.cost /= avg_loop_niter (data->current_loop);
4567     }
4568   else
4569     {
4570       cost = force_var_cost (data, cbase, depends_on);
4571       cost = add_costs (cost,
4572                         difference_cost (data,
4573                                          ubase, build_int_cst (utype, 0),
4574                                          &symbol_present, &var_present,
4575                                          &offset, depends_on));
4576       cost.cost /= avg_loop_niter (data->current_loop);
4577       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4578     }
4579
4580   /* Set of invariants depended on by sub use has already been computed
4581      for the first use in the group.  */
4582   if (use->sub_id)
4583     {
4584       cost.cost = 0;
4585       if (depends_on && *depends_on)
4586         bitmap_clear (*depends_on);
4587     }
4588   else if (inv_expr_id)
4589     {
4590       *inv_expr_id =
4591           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4592       /* Clear depends on.  */
4593       if (*inv_expr_id != -1 && depends_on && *depends_on)
4594         bitmap_clear (*depends_on);
4595     }
4596
4597   /* If we are after the increment, the value of the candidate is higher by
4598      one iteration.  */
4599   if (stmt_is_after_inc)
4600     offset -= ratio * cstepi;
4601
4602   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4603      (symbol/var1/const parts may be omitted).  If we are looking for an
4604      address, find the cost of addressing this.  */
4605   if (address_p)
4606     return add_costs (cost,
4607                       get_address_cost (symbol_present, var_present,
4608                                         offset, ratio, cstepi,
4609                                         mem_mode,
4610                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4611                                         speed, stmt_is_after_inc,
4612                                         can_autoinc));
4613
4614   /* Otherwise estimate the costs for computing the expression.  */
4615   if (!symbol_present && !var_present && !offset)
4616     {
4617       if (ratio != 1)
4618         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4619       return cost;
4620     }
4621
4622   /* Symbol + offset should be compile-time computable so consider that they
4623       are added once to the variable, if present.  */
4624   if (var_present && (symbol_present || offset))
4625     cost.cost += adjust_setup_cost (data,
4626                                     add_cost (speed, TYPE_MODE (ctype)));
4627
4628   /* Having offset does not affect runtime cost in case it is added to
4629      symbol, but it increases complexity.  */
4630   if (offset)
4631     cost.complexity++;
4632
4633   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4634
4635   aratio = ratio > 0 ? ratio : -ratio;
4636   if (aratio != 1)
4637     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4638   return cost;
4639
4640 fallback:
4641   if (can_autoinc)
4642     *can_autoinc = false;
4643
4644   {
4645     /* Just get the expression, expand it and measure the cost.  */
4646     tree comp = get_computation_at (data->current_loop, use, cand, at);
4647
4648     if (!comp)
4649       return infinite_cost;
4650
4651     if (address_p)
4652       comp = build_simple_mem_ref (comp);
4653
4654     return new_cost (computation_cost (comp, speed), 0);
4655   }
4656 }
4657
4658 /* Determines the cost of the computation by that USE is expressed
4659    from induction variable CAND.  If ADDRESS_P is true, we just need
4660    to create an address from it, otherwise we want to get it into
4661    register.  A set of invariants we depend on is stored in
4662    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4663    autoinc addressing is likely.  */
4664
4665 static comp_cost
4666 get_computation_cost (struct ivopts_data *data,
4667                       struct iv_use *use, struct iv_cand *cand,
4668                       bool address_p, bitmap *depends_on,
4669                       bool *can_autoinc, int *inv_expr_id)
4670 {
4671   return get_computation_cost_at (data,
4672                                   use, cand, address_p, depends_on, use->stmt,
4673                                   can_autoinc, inv_expr_id);
4674 }
4675
4676 /* Determines cost of basing replacement of USE on CAND in a generic
4677    expression.  */
4678
4679 static bool
4680 determine_use_iv_cost_generic (struct ivopts_data *data,
4681                                struct iv_use *use, struct iv_cand *cand)
4682 {
4683   bitmap depends_on;
4684   comp_cost cost;
4685   int inv_expr_id = -1;
4686
4687   /* The simple case first -- if we need to express value of the preserved
4688      original biv, the cost is 0.  This also prevents us from counting the
4689      cost of increment twice -- once at this use and once in the cost of
4690      the candidate.  */
4691   if (cand->pos == IP_ORIGINAL
4692       && cand->incremented_at == use->stmt)
4693     {
4694       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4695                        ERROR_MARK, -1);
4696       return true;
4697     }
4698
4699   cost = get_computation_cost (data, use, cand, false, &depends_on,
4700                                NULL, &inv_expr_id);
4701
4702   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4703                    inv_expr_id);
4704
4705   return !infinite_cost_p (cost);
4706 }
4707
4708 /* Determines cost of basing replacement of USE on CAND in an address.  */
4709
4710 static bool
4711 determine_use_iv_cost_address (struct ivopts_data *data,
4712                                struct iv_use *use, struct iv_cand *cand)
4713 {
4714   bitmap depends_on;
4715   bool can_autoinc;
4716   int inv_expr_id = -1;
4717   struct iv_use *sub_use;
4718   comp_cost sub_cost;
4719   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4720                                          &can_autoinc, &inv_expr_id);
4721
4722   if (cand->ainc_use == use)
4723     {
4724       if (can_autoinc)
4725         cost.cost -= cand->cost_step;
4726       /* If we generated the candidate solely for exploiting autoincrement
4727          opportunities, and it turns out it can't be used, set the cost to
4728          infinity to make sure we ignore it.  */
4729       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4730         cost = infinite_cost;
4731     }
4732   for (sub_use = use->next;
4733        sub_use && !infinite_cost_p (cost);
4734        sub_use = sub_use->next)
4735     {
4736        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4737                                         &can_autoinc, &inv_expr_id);
4738        cost = add_costs (cost, sub_cost);
4739     }
4740
4741   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4742                    inv_expr_id);
4743
4744   return !infinite_cost_p (cost);
4745 }
4746
4747 /* Computes value of candidate CAND at position AT in iteration NITER, and
4748    stores it to VAL.  */
4749
4750 static void
4751 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4752                aff_tree *val)
4753 {
4754   aff_tree step, delta, nit;
4755   struct iv *iv = cand->iv;
4756   tree type = TREE_TYPE (iv->base);
4757   tree steptype = type;
4758   if (POINTER_TYPE_P (type))
4759     steptype = sizetype;
4760   steptype = unsigned_type_for (type);
4761
4762   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4763   aff_combination_convert (&step, steptype);
4764   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4765   aff_combination_convert (&nit, steptype);
4766   aff_combination_mult (&nit, &step, &delta);
4767   if (stmt_after_increment (loop, cand, at))
4768     aff_combination_add (&delta, &step);
4769
4770   tree_to_aff_combination (iv->base, type, val);
4771   if (!POINTER_TYPE_P (type))
4772     aff_combination_convert (val, steptype);
4773   aff_combination_add (val, &delta);
4774 }
4775
4776 /* Returns period of induction variable iv.  */
4777
4778 static tree
4779 iv_period (struct iv *iv)
4780 {
4781   tree step = iv->step, period, type;
4782   tree pow2div;
4783
4784   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4785
4786   type = unsigned_type_for (TREE_TYPE (step));
4787   /* Period of the iv is lcm (step, type_range)/step -1,
4788      i.e., N*type_range/step - 1. Since type range is power
4789      of two, N == (step >> num_of_ending_zeros_binary (step),
4790      so the final result is
4791
4792        (type_range >> num_of_ending_zeros_binary (step)) - 1
4793
4794   */
4795   pow2div = num_ending_zeros (step);
4796
4797   period = build_low_bits_mask (type,
4798                                 (TYPE_PRECISION (type)
4799                                  - tree_to_uhwi (pow2div)));
4800
4801   return period;
4802 }
4803
4804 /* Returns the comparison operator used when eliminating the iv USE.  */
4805
4806 static enum tree_code
4807 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4808 {
4809   struct loop *loop = data->current_loop;
4810   basic_block ex_bb;
4811   edge exit;
4812
4813   ex_bb = gimple_bb (use->stmt);
4814   exit = EDGE_SUCC (ex_bb, 0);
4815   if (flow_bb_inside_loop_p (loop, exit->dest))
4816     exit = EDGE_SUCC (ex_bb, 1);
4817
4818   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4819 }
4820
4821 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4822    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4823    calculation is performed in non-wrapping type.
4824
4825    TODO: More generally, we could test for the situation that
4826          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4827          This would require knowing the sign of OFFSET.  */
4828
4829 static bool
4830 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4831 {
4832   enum tree_code code;
4833   tree e1, e2;
4834   aff_tree aff_e1, aff_e2, aff_offset;
4835
4836   if (!nowrap_type_p (TREE_TYPE (base)))
4837     return false;
4838
4839   base = expand_simple_operations (base);
4840
4841   if (TREE_CODE (base) == SSA_NAME)
4842     {
4843       gimple stmt = SSA_NAME_DEF_STMT (base);
4844
4845       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4846         return false;
4847
4848       code = gimple_assign_rhs_code (stmt);
4849       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4850         return false;
4851
4852       e1 = gimple_assign_rhs1 (stmt);
4853       e2 = gimple_assign_rhs2 (stmt);
4854     }
4855   else
4856     {
4857       code = TREE_CODE (base);
4858       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4859         return false;
4860       e1 = TREE_OPERAND (base, 0);
4861       e2 = TREE_OPERAND (base, 1);
4862     }
4863
4864   /* Use affine expansion as deeper inspection to prove the equality.  */
4865   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4866                                   &aff_e2, &data->name_expansion_cache);
4867   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4868                                   &aff_offset, &data->name_expansion_cache);
4869   aff_combination_scale (&aff_offset, -1);
4870   switch (code)
4871     {
4872     case PLUS_EXPR:
4873       aff_combination_add (&aff_e2, &aff_offset);
4874       if (aff_combination_zero_p (&aff_e2))
4875         return true;
4876
4877       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4878                                       &aff_e1, &data->name_expansion_cache);
4879       aff_combination_add (&aff_e1, &aff_offset);
4880       return aff_combination_zero_p (&aff_e1);
4881
4882     case POINTER_PLUS_EXPR:
4883       aff_combination_add (&aff_e2, &aff_offset);
4884       return aff_combination_zero_p (&aff_e2);
4885
4886     default:
4887       return false;
4888     }
4889 }
4890
4891 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4892    comparison with CAND.  NITER describes the number of iterations of
4893    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4894
4895    We aim to handle the following situation:
4896
4897    sometype *base, *p;
4898    int a, b, i;
4899
4900    i = a;
4901    p = p_0 = base + a;
4902
4903    do
4904      {
4905        bla (*p);
4906        p++;
4907        i++;
4908      }
4909    while (i < b);
4910
4911    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4912    We aim to optimize this to
4913
4914    p = p_0 = base + a;
4915    do
4916      {
4917        bla (*p);
4918        p++;
4919      }
4920    while (p < p_0 - a + b);
4921
4922    This preserves the correctness, since the pointer arithmetics does not
4923    overflow.  More precisely:
4924
4925    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4926       overflow in computing it or the values of p.
4927    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4928       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4929
4930 static bool
4931 iv_elimination_compare_lt (struct ivopts_data *data,
4932                            struct iv_cand *cand, enum tree_code *comp_p,
4933                            struct tree_niter_desc *niter)
4934 {
4935   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4936   struct aff_tree nit, tmpa, tmpb;
4937   enum tree_code comp;
4938   HOST_WIDE_INT step;
4939
4940   /* We need to know that the candidate induction variable does not overflow.
4941      While more complex analysis may be used to prove this, for now just
4942      check that the variable appears in the original program and that it
4943      is computed in a type that guarantees no overflows.  */
4944   cand_type = TREE_TYPE (cand->iv->base);
4945   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4946     return false;
4947
4948   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4949      the calculation of the BOUND could overflow, making the comparison
4950      invalid.  */
4951   if (!data->loop_single_exit_p)
4952     return false;
4953
4954   /* We need to be able to decide whether candidate is increasing or decreasing
4955      in order to choose the right comparison operator.  */
4956   if (!cst_and_fits_in_hwi (cand->iv->step))
4957     return false;
4958   step = int_cst_value (cand->iv->step);
4959
4960   /* Check that the number of iterations matches the expected pattern:
4961      a + 1 > b ? 0 : b - a - 1.  */
4962   mbz = niter->may_be_zero;
4963   if (TREE_CODE (mbz) == GT_EXPR)
4964     {
4965       /* Handle a + 1 > b.  */
4966       tree op0 = TREE_OPERAND (mbz, 0);
4967       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4968         {
4969           a = TREE_OPERAND (op0, 0);
4970           b = TREE_OPERAND (mbz, 1);
4971         }
4972       else
4973         return false;
4974     }
4975   else if (TREE_CODE (mbz) == LT_EXPR)
4976     {
4977       tree op1 = TREE_OPERAND (mbz, 1);
4978
4979       /* Handle b < a + 1.  */
4980       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4981         {
4982           a = TREE_OPERAND (op1, 0);
4983           b = TREE_OPERAND (mbz, 0);
4984         }
4985       else
4986         return false;
4987     }
4988   else
4989     return false;
4990
4991   /* Expected number of iterations is B - A - 1.  Check that it matches
4992      the actual number, i.e., that B - A - NITER = 1.  */
4993   tree_to_aff_combination (niter->niter, nit_type, &nit);
4994   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4995   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4996   aff_combination_scale (&nit, -1);
4997   aff_combination_scale (&tmpa, -1);
4998   aff_combination_add (&tmpb, &tmpa);
4999   aff_combination_add (&tmpb, &nit);
5000   if (tmpb.n != 0 || tmpb.offset != 1)
5001     return false;
5002
5003   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5004      overflow.  */
5005   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5006                         cand->iv->step,
5007                         fold_convert (TREE_TYPE (cand->iv->step), a));
5008   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5009     return false;
5010
5011   /* Determine the new comparison operator.  */
5012   comp = step < 0 ? GT_EXPR : LT_EXPR;
5013   if (*comp_p == NE_EXPR)
5014     *comp_p = comp;
5015   else if (*comp_p == EQ_EXPR)
5016     *comp_p = invert_tree_comparison (comp, false);
5017   else
5018     gcc_unreachable ();
5019
5020   return true;
5021 }
5022
5023 /* Check whether it is possible to express the condition in USE by comparison
5024    of candidate CAND.  If so, store the value compared with to BOUND, and the
5025    comparison operator to COMP.  */
5026
5027 static bool
5028 may_eliminate_iv (struct ivopts_data *data,
5029                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5030                   enum tree_code *comp)
5031 {
5032   basic_block ex_bb;
5033   edge exit;
5034   tree period;
5035   struct loop *loop = data->current_loop;
5036   aff_tree bnd;
5037   struct tree_niter_desc *desc = NULL;
5038
5039   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5040     return false;
5041
5042   /* For now works only for exits that dominate the loop latch.
5043      TODO: extend to other conditions inside loop body.  */
5044   ex_bb = gimple_bb (use->stmt);
5045   if (use->stmt != last_stmt (ex_bb)
5046       || gimple_code (use->stmt) != GIMPLE_COND
5047       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5048     return false;
5049
5050   exit = EDGE_SUCC (ex_bb, 0);
5051   if (flow_bb_inside_loop_p (loop, exit->dest))
5052     exit = EDGE_SUCC (ex_bb, 1);
5053   if (flow_bb_inside_loop_p (loop, exit->dest))
5054     return false;
5055
5056   desc = niter_for_exit (data, exit);
5057   if (!desc)
5058     return false;
5059
5060   /* Determine whether we can use the variable to test the exit condition.
5061      This is the case iff the period of the induction variable is greater
5062      than the number of iterations for which the exit condition is true.  */
5063   period = iv_period (cand->iv);
5064
5065   /* If the number of iterations is constant, compare against it directly.  */
5066   if (TREE_CODE (desc->niter) == INTEGER_CST)
5067     {
5068       /* See cand_value_at.  */
5069       if (stmt_after_increment (loop, cand, use->stmt))
5070         {
5071           if (!tree_int_cst_lt (desc->niter, period))
5072             return false;
5073         }
5074       else
5075         {
5076           if (tree_int_cst_lt (period, desc->niter))
5077             return false;
5078         }
5079     }
5080
5081   /* If not, and if this is the only possible exit of the loop, see whether
5082      we can get a conservative estimate on the number of iterations of the
5083      entire loop and compare against that instead.  */
5084   else
5085     {
5086       widest_int period_value, max_niter;
5087
5088       max_niter = desc->max;
5089       if (stmt_after_increment (loop, cand, use->stmt))
5090         max_niter += 1;
5091       period_value = wi::to_widest (period);
5092       if (wi::gtu_p (max_niter, period_value))
5093         {
5094           /* See if we can take advantage of inferred loop bound information.  */
5095           if (data->loop_single_exit_p)
5096             {
5097               if (!max_loop_iterations (loop, &max_niter))
5098                 return false;
5099               /* The loop bound is already adjusted by adding 1.  */
5100               if (wi::gtu_p (max_niter, period_value))
5101                 return false;
5102             }
5103           else
5104             return false;
5105         }
5106     }
5107
5108   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5109
5110   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5111                          aff_combination_to_tree (&bnd));
5112   *comp = iv_elimination_compare (data, use);
5113
5114   /* It is unlikely that computing the number of iterations using division
5115      would be more profitable than keeping the original induction variable.  */
5116   if (expression_expensive_p (*bound))
5117     return false;
5118
5119   /* Sometimes, it is possible to handle the situation that the number of
5120      iterations may be zero unless additional assumtions by using <
5121      instead of != in the exit condition.
5122
5123      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5124            base the exit condition on it.  However, that is often too
5125            expensive.  */
5126   if (!integer_zerop (desc->may_be_zero))
5127     return iv_elimination_compare_lt (data, cand, comp, desc);
5128
5129   return true;
5130 }
5131
5132  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5133     be copied, if is is used in the loop body and DATA->body_includes_call.  */
5134
5135 static int
5136 parm_decl_cost (struct ivopts_data *data, tree bound)
5137 {
5138   tree sbound = bound;
5139   STRIP_NOPS (sbound);
5140
5141   if (TREE_CODE (sbound) == SSA_NAME
5142       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5143       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5144       && data->body_includes_call)
5145     return COSTS_N_INSNS (1);
5146
5147   return 0;
5148 }
5149
5150 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5151
5152 static bool
5153 determine_use_iv_cost_condition (struct ivopts_data *data,
5154                                  struct iv_use *use, struct iv_cand *cand)
5155 {
5156   tree bound = NULL_TREE;
5157   struct iv *cmp_iv;
5158   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5159   comp_cost elim_cost, express_cost, cost, bound_cost;
5160   bool ok;
5161   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5162   tree *control_var, *bound_cst;
5163   enum tree_code comp = ERROR_MARK;
5164
5165   /* Only consider real candidates.  */
5166   if (!cand->iv)
5167     {
5168       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5169                        ERROR_MARK, -1);
5170       return false;
5171     }
5172
5173   /* Try iv elimination.  */
5174   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5175     {
5176       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5177       if (elim_cost.cost == 0)
5178         elim_cost.cost = parm_decl_cost (data, bound);
5179       else if (TREE_CODE (bound) == INTEGER_CST)
5180         elim_cost.cost = 0;
5181       /* If we replace a loop condition 'i < n' with 'p < base + n',
5182          depends_on_elim will have 'base' and 'n' set, which implies
5183          that both 'base' and 'n' will be live during the loop.  More likely,
5184          'base + n' will be loop invariant, resulting in only one live value
5185          during the loop.  So in that case we clear depends_on_elim and set
5186         elim_inv_expr_id instead.  */
5187       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5188         {
5189           elim_inv_expr_id = get_expr_id (data, bound);
5190           bitmap_clear (depends_on_elim);
5191         }
5192       /* The bound is a loop invariant, so it will be only computed
5193          once.  */
5194       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5195     }
5196   else
5197     elim_cost = infinite_cost;
5198
5199   /* Try expressing the original giv.  If it is compared with an invariant,
5200      note that we cannot get rid of it.  */
5201   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5202                               NULL, &cmp_iv);
5203   gcc_assert (ok);
5204
5205   /* When the condition is a comparison of the candidate IV against
5206      zero, prefer this IV.
5207
5208      TODO: The constant that we're subtracting from the cost should
5209      be target-dependent.  This information should be added to the
5210      target costs for each backend.  */
5211   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5212       && integer_zerop (*bound_cst)
5213       && (operand_equal_p (*control_var, cand->var_after, 0)
5214           || operand_equal_p (*control_var, cand->var_before, 0)))
5215     elim_cost.cost -= 1;
5216
5217   express_cost = get_computation_cost (data, use, cand, false,
5218                                        &depends_on_express, NULL,
5219                                        &express_inv_expr_id);
5220   fd_ivopts_data = data;
5221   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5222
5223   /* Count the cost of the original bound as well.  */
5224   bound_cost = force_var_cost (data, *bound_cst, NULL);
5225   if (bound_cost.cost == 0)
5226     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5227   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5228     bound_cost.cost = 0;
5229   express_cost.cost += bound_cost.cost;
5230
5231   /* Choose the better approach, preferring the eliminated IV. */
5232   if (compare_costs (elim_cost, express_cost) <= 0)
5233     {
5234       cost = elim_cost;
5235       depends_on = depends_on_elim;
5236       depends_on_elim = NULL;
5237       inv_expr_id = elim_inv_expr_id;
5238     }
5239   else
5240     {
5241       cost = express_cost;
5242       depends_on = depends_on_express;
5243       depends_on_express = NULL;
5244       bound = NULL_TREE;
5245       comp = ERROR_MARK;
5246       inv_expr_id = express_inv_expr_id;
5247     }
5248
5249   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5250
5251   if (depends_on_elim)
5252     BITMAP_FREE (depends_on_elim);
5253   if (depends_on_express)
5254     BITMAP_FREE (depends_on_express);
5255
5256   return !infinite_cost_p (cost);
5257 }
5258
5259 /* Determines cost of basing replacement of USE on CAND.  Returns false
5260    if USE cannot be based on CAND.  */
5261
5262 static bool
5263 determine_use_iv_cost (struct ivopts_data *data,
5264                        struct iv_use *use, struct iv_cand *cand)
5265 {
5266   switch (use->type)
5267     {
5268     case USE_NONLINEAR_EXPR:
5269       return determine_use_iv_cost_generic (data, use, cand);
5270
5271     case USE_ADDRESS:
5272       return determine_use_iv_cost_address (data, use, cand);
5273
5274     case USE_COMPARE:
5275       return determine_use_iv_cost_condition (data, use, cand);
5276
5277     default:
5278       gcc_unreachable ();
5279     }
5280 }
5281
5282 /* Return true if get_computation_cost indicates that autoincrement is
5283    a possibility for the pair of USE and CAND, false otherwise.  */
5284
5285 static bool
5286 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5287                            struct iv_cand *cand)
5288 {
5289   bitmap depends_on;
5290   bool can_autoinc;
5291   comp_cost cost;
5292
5293   if (use->type != USE_ADDRESS)
5294     return false;
5295
5296   cost = get_computation_cost (data, use, cand, true, &depends_on,
5297                                &can_autoinc, NULL);
5298
5299   BITMAP_FREE (depends_on);
5300
5301   return !infinite_cost_p (cost) && can_autoinc;
5302 }
5303
5304 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5305    use that allows autoincrement, and set their AINC_USE if possible.  */
5306
5307 static void
5308 set_autoinc_for_original_candidates (struct ivopts_data *data)
5309 {
5310   unsigned i, j;
5311
5312   for (i = 0; i < n_iv_cands (data); i++)
5313     {
5314       struct iv_cand *cand = iv_cand (data, i);
5315       struct iv_use *closest_before = NULL;
5316       struct iv_use *closest_after = NULL;
5317       if (cand->pos != IP_ORIGINAL)
5318         continue;
5319
5320       for (j = 0; j < n_iv_uses (data); j++)
5321         {
5322           struct iv_use *use = iv_use (data, j);
5323           unsigned uid = gimple_uid (use->stmt);
5324
5325           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5326             continue;
5327
5328           if (uid < gimple_uid (cand->incremented_at)
5329               && (closest_before == NULL
5330                   || uid > gimple_uid (closest_before->stmt)))
5331             closest_before = use;
5332
5333           if (uid > gimple_uid (cand->incremented_at)
5334               && (closest_after == NULL
5335                   || uid < gimple_uid (closest_after->stmt)))
5336             closest_after = use;
5337         }
5338
5339       if (closest_before != NULL
5340           && autoinc_possible_for_pair (data, closest_before, cand))
5341         cand->ainc_use = closest_before;
5342       else if (closest_after != NULL
5343                && autoinc_possible_for_pair (data, closest_after, cand))
5344         cand->ainc_use = closest_after;
5345     }
5346 }
5347
5348 /* Finds the candidates for the induction variables.  */
5349
5350 static void
5351 find_iv_candidates (struct ivopts_data *data)
5352 {
5353   /* Add commonly used ivs.  */
5354   add_standard_iv_candidates (data);
5355
5356   /* Add old induction variables.  */
5357   add_old_ivs_candidates (data);
5358
5359   /* Add induction variables derived from uses.  */
5360   add_derived_ivs_candidates (data);
5361
5362   set_autoinc_for_original_candidates (data);
5363
5364   /* Record the important candidates.  */
5365   record_important_candidates (data);
5366 }
5367
5368 /* Determines costs of basing the use of the iv on an iv candidate.  */
5369
5370 static void
5371 determine_use_iv_costs (struct ivopts_data *data)
5372 {
5373   unsigned i, j;
5374   struct iv_use *use;
5375   struct iv_cand *cand;
5376   bitmap to_clear = BITMAP_ALLOC (NULL);
5377
5378   alloc_use_cost_map (data);
5379
5380   for (i = 0; i < n_iv_uses (data); i++)
5381     {
5382       use = iv_use (data, i);
5383
5384       if (data->consider_all_candidates)
5385         {
5386           for (j = 0; j < n_iv_cands (data); j++)
5387             {
5388               cand = iv_cand (data, j);
5389               determine_use_iv_cost (data, use, cand);
5390             }
5391         }
5392       else
5393         {
5394           bitmap_iterator bi;
5395
5396           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5397             {
5398               cand = iv_cand (data, j);
5399               if (!determine_use_iv_cost (data, use, cand))
5400                 bitmap_set_bit (to_clear, j);
5401             }
5402
5403           /* Remove the candidates for that the cost is infinite from
5404              the list of related candidates.  */
5405           bitmap_and_compl_into (use->related_cands, to_clear);
5406           bitmap_clear (to_clear);
5407         }
5408     }
5409
5410   BITMAP_FREE (to_clear);
5411
5412   if (dump_file && (dump_flags & TDF_DETAILS))
5413     {
5414       fprintf (dump_file, "Use-candidate costs:\n");
5415
5416       for (i = 0; i < n_iv_uses (data); i++)
5417         {
5418           use = iv_use (data, i);
5419
5420           fprintf (dump_file, "Use %d:\n", i);
5421           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5422           for (j = 0; j < use->n_map_members; j++)
5423             {
5424               if (!use->cost_map[j].cand
5425                   || infinite_cost_p (use->cost_map[j].cost))
5426                 continue;
5427
5428               fprintf (dump_file, "  %d\t%d\t%d\t",
5429                        use->cost_map[j].cand->id,
5430                        use->cost_map[j].cost.cost,
5431                        use->cost_map[j].cost.complexity);
5432               if (use->cost_map[j].depends_on)
5433                 bitmap_print (dump_file,
5434                               use->cost_map[j].depends_on, "","");
5435               if (use->cost_map[j].inv_expr_id != -1)
5436                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5437               fprintf (dump_file, "\n");
5438             }
5439
5440           fprintf (dump_file, "\n");
5441         }
5442       fprintf (dump_file, "\n");
5443     }
5444 }
5445
5446 /* Determines cost of the candidate CAND.  */
5447
5448 static void
5449 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5450 {
5451   comp_cost cost_base;
5452   unsigned cost, cost_step;
5453   tree base;
5454
5455   if (!cand->iv)
5456     {
5457       cand->cost = 0;
5458       return;
5459     }
5460
5461   /* There are two costs associated with the candidate -- its increment
5462      and its initialization.  The second is almost negligible for any loop
5463      that rolls enough, so we take it just very little into account.  */
5464
5465   base = cand->iv->base;
5466   cost_base = force_var_cost (data, base, NULL);
5467   /* It will be exceptional that the iv register happens to be initialized with
5468      the proper value at no cost.  In general, there will at least be a regcopy
5469      or a const set.  */
5470   if (cost_base.cost == 0)
5471     cost_base.cost = COSTS_N_INSNS (1);
5472   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5473
5474   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5475
5476   /* Prefer the original ivs unless we may gain something by replacing it.
5477      The reason is to make debugging simpler; so this is not relevant for
5478      artificial ivs created by other optimization passes.  */
5479   if (cand->pos != IP_ORIGINAL
5480       || !SSA_NAME_VAR (cand->var_before)
5481       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5482     cost++;
5483
5484   /* Prefer not to insert statements into latch unless there are some
5485      already (so that we do not create unnecessary jumps).  */
5486   if (cand->pos == IP_END
5487       && empty_block_p (ip_end_pos (data->current_loop)))
5488     cost++;
5489
5490   cand->cost = cost;
5491   cand->cost_step = cost_step;
5492 }
5493
5494 /* Determines costs of computation of the candidates.  */
5495
5496 static void
5497 determine_iv_costs (struct ivopts_data *data)
5498 {
5499   unsigned i;
5500
5501   if (dump_file && (dump_flags & TDF_DETAILS))
5502     {
5503       fprintf (dump_file, "Candidate costs:\n");
5504       fprintf (dump_file, "  cand\tcost\n");
5505     }
5506
5507   for (i = 0; i < n_iv_cands (data); i++)
5508     {
5509       struct iv_cand *cand = iv_cand (data, i);
5510
5511       determine_iv_cost (data, cand);
5512
5513       if (dump_file && (dump_flags & TDF_DETAILS))
5514         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5515     }
5516
5517   if (dump_file && (dump_flags & TDF_DETAILS))
5518     fprintf (dump_file, "\n");
5519 }
5520
5521 /* Calculates cost for having SIZE induction variables.  */
5522
5523 static unsigned
5524 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5525 {
5526   /* We add size to the cost, so that we prefer eliminating ivs
5527      if possible.  */
5528   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5529                                             data->body_includes_call);
5530 }
5531
5532 /* For each size of the induction variable set determine the penalty.  */
5533
5534 static void
5535 determine_set_costs (struct ivopts_data *data)
5536 {
5537   unsigned j, n;
5538   gphi *phi;
5539   gphi_iterator psi;
5540   tree op;
5541   struct loop *loop = data->current_loop;
5542   bitmap_iterator bi;
5543
5544   if (dump_file && (dump_flags & TDF_DETAILS))
5545     {
5546       fprintf (dump_file, "Global costs:\n");
5547       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5548       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5549       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5550       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5551     }
5552
5553   n = 0;
5554   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5555     {
5556       phi = psi.phi ();
5557       op = PHI_RESULT (phi);
5558
5559       if (virtual_operand_p (op))
5560         continue;
5561
5562       if (get_iv (data, op))
5563         continue;
5564
5565       n++;
5566     }
5567
5568   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5569     {
5570       struct version_info *info = ver_info (data, j);
5571
5572       if (info->inv_id && info->has_nonlin_use)
5573         n++;
5574     }
5575
5576   data->regs_used = n;
5577   if (dump_file && (dump_flags & TDF_DETAILS))
5578     fprintf (dump_file, "  regs_used %d\n", n);
5579
5580   if (dump_file && (dump_flags & TDF_DETAILS))
5581     {
5582       fprintf (dump_file, "  cost for size:\n");
5583       fprintf (dump_file, "  ivs\tcost\n");
5584       for (j = 0; j <= 2 * target_avail_regs; j++)
5585         fprintf (dump_file, "  %d\t%d\n", j,
5586                  ivopts_global_cost_for_size (data, j));
5587       fprintf (dump_file, "\n");
5588     }
5589 }
5590
5591 /* Returns true if A is a cheaper cost pair than B.  */
5592
5593 static bool
5594 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5595 {
5596   int cmp;
5597
5598   if (!a)
5599     return false;
5600
5601   if (!b)
5602     return true;
5603
5604   cmp = compare_costs (a->cost, b->cost);
5605   if (cmp < 0)
5606     return true;
5607
5608   if (cmp > 0)
5609     return false;
5610
5611   /* In case the costs are the same, prefer the cheaper candidate.  */
5612   if (a->cand->cost < b->cand->cost)
5613     return true;
5614
5615   return false;
5616 }
5617
5618
5619 /* Returns candidate by that USE is expressed in IVS.  */
5620
5621 static struct cost_pair *
5622 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5623 {
5624   return ivs->cand_for_use[use->id];
5625 }
5626
5627 /* Computes the cost field of IVS structure.  */
5628
5629 static void
5630 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5631 {
5632   comp_cost cost = ivs->cand_use_cost;
5633
5634   cost.cost += ivs->cand_cost;
5635
5636   cost.cost += ivopts_global_cost_for_size (data,
5637                                             ivs->n_regs + ivs->num_used_inv_expr);
5638
5639   ivs->cost = cost;
5640 }
5641
5642 /* Remove invariants in set INVS to set IVS.  */
5643
5644 static void
5645 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5646 {
5647   bitmap_iterator bi;
5648   unsigned iid;
5649
5650   if (!invs)
5651     return;
5652
5653   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5654     {
5655       ivs->n_invariant_uses[iid]--;
5656       if (ivs->n_invariant_uses[iid] == 0)
5657         ivs->n_regs--;
5658     }
5659 }
5660
5661 /* Set USE not to be expressed by any candidate in IVS.  */
5662
5663 static void
5664 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5665                  struct iv_use *use)
5666 {
5667   unsigned uid = use->id, cid;
5668   struct cost_pair *cp;
5669
5670   cp = ivs->cand_for_use[uid];
5671   if (!cp)
5672     return;
5673   cid = cp->cand->id;
5674
5675   ivs->bad_uses++;
5676   ivs->cand_for_use[uid] = NULL;
5677   ivs->n_cand_uses[cid]--;
5678
5679   if (ivs->n_cand_uses[cid] == 0)
5680     {
5681       bitmap_clear_bit (ivs->cands, cid);
5682       /* Do not count the pseudocandidates.  */
5683       if (cp->cand->iv)
5684         ivs->n_regs--;
5685       ivs->n_cands--;
5686       ivs->cand_cost -= cp->cand->cost;
5687
5688       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5689     }
5690
5691   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5692
5693   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5694
5695   if (cp->inv_expr_id != -1)
5696     {
5697       ivs->used_inv_expr[cp->inv_expr_id]--;
5698       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5699         ivs->num_used_inv_expr--;
5700     }
5701   iv_ca_recount_cost (data, ivs);
5702 }
5703
5704 /* Add invariants in set INVS to set IVS.  */
5705
5706 static void
5707 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5708 {
5709   bitmap_iterator bi;
5710   unsigned iid;
5711
5712   if (!invs)
5713     return;
5714
5715   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5716     {
5717       ivs->n_invariant_uses[iid]++;
5718       if (ivs->n_invariant_uses[iid] == 1)
5719         ivs->n_regs++;
5720     }
5721 }
5722
5723 /* Set cost pair for USE in set IVS to CP.  */
5724
5725 static void
5726 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5727               struct iv_use *use, struct cost_pair *cp)
5728 {
5729   unsigned uid = use->id, cid;
5730
5731   if (ivs->cand_for_use[uid] == cp)
5732     return;
5733
5734   if (ivs->cand_for_use[uid])
5735     iv_ca_set_no_cp (data, ivs, use);
5736
5737   if (cp)
5738     {
5739       cid = cp->cand->id;
5740
5741       ivs->bad_uses--;
5742       ivs->cand_for_use[uid] = cp;
5743       ivs->n_cand_uses[cid]++;
5744       if (ivs->n_cand_uses[cid] == 1)
5745         {
5746           bitmap_set_bit (ivs->cands, cid);
5747           /* Do not count the pseudocandidates.  */
5748           if (cp->cand->iv)
5749             ivs->n_regs++;
5750           ivs->n_cands++;
5751           ivs->cand_cost += cp->cand->cost;
5752
5753           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5754         }
5755
5756       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5757       iv_ca_set_add_invariants (ivs, cp->depends_on);
5758
5759       if (cp->inv_expr_id != -1)
5760         {
5761           ivs->used_inv_expr[cp->inv_expr_id]++;
5762           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5763             ivs->num_used_inv_expr++;
5764         }
5765       iv_ca_recount_cost (data, ivs);
5766     }
5767 }
5768
5769 /* Extend set IVS by expressing USE by some of the candidates in it
5770    if possible.  Consider all important candidates if candidates in
5771    set IVS don't give any result.  */
5772
5773 static void
5774 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5775                struct iv_use *use)
5776 {
5777   struct cost_pair *best_cp = NULL, *cp;
5778   bitmap_iterator bi;
5779   unsigned i;
5780   struct iv_cand *cand;
5781
5782   gcc_assert (ivs->upto >= use->id);
5783   ivs->upto++;
5784   ivs->bad_uses++;
5785
5786   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5787     {
5788       cand = iv_cand (data, i);
5789       cp = get_use_iv_cost (data, use, cand);
5790       if (cheaper_cost_pair (cp, best_cp))
5791         best_cp = cp;
5792     }
5793
5794   if (best_cp == NULL)
5795     {
5796       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5797         {
5798           cand = iv_cand (data, i);
5799           cp = get_use_iv_cost (data, use, cand);
5800           if (cheaper_cost_pair (cp, best_cp))
5801             best_cp = cp;
5802         }
5803     }
5804
5805   iv_ca_set_cp (data, ivs, use, best_cp);
5806 }
5807
5808 /* Get cost for assignment IVS.  */
5809
5810 static comp_cost
5811 iv_ca_cost (struct iv_ca *ivs)
5812 {
5813   /* This was a conditional expression but it triggered a bug in
5814      Sun C 5.5.  */
5815   if (ivs->bad_uses)
5816     return infinite_cost;
5817   else
5818     return ivs->cost;
5819 }
5820
5821 /* Returns true if all dependences of CP are among invariants in IVS.  */
5822
5823 static bool
5824 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5825 {
5826   unsigned i;
5827   bitmap_iterator bi;
5828
5829   if (!cp->depends_on)
5830     return true;
5831
5832   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5833     {
5834       if (ivs->n_invariant_uses[i] == 0)
5835         return false;
5836     }
5837
5838   return true;
5839 }
5840
5841 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5842    it before NEXT_CHANGE.  */
5843
5844 static struct iv_ca_delta *
5845 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5846                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5847 {
5848   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5849
5850   change->use = use;
5851   change->old_cp = old_cp;
5852   change->new_cp = new_cp;
5853   change->next_change = next_change;
5854
5855   return change;
5856 }
5857
5858 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5859    are rewritten.  */
5860
5861 static struct iv_ca_delta *
5862 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5863 {
5864   struct iv_ca_delta *last;
5865
5866   if (!l2)
5867     return l1;
5868
5869   if (!l1)
5870     return l2;
5871
5872   for (last = l1; last->next_change; last = last->next_change)
5873     continue;
5874   last->next_change = l2;
5875
5876   return l1;
5877 }
5878
5879 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5880
5881 static struct iv_ca_delta *
5882 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5883 {
5884   struct iv_ca_delta *act, *next, *prev = NULL;
5885
5886   for (act = delta; act; act = next)
5887     {
5888       next = act->next_change;
5889       act->next_change = prev;
5890       prev = act;
5891
5892       std::swap (act->old_cp, act->new_cp);
5893     }
5894
5895   return prev;
5896 }
5897
5898 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5899    reverted instead.  */
5900
5901 static void
5902 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5903                     struct iv_ca_delta *delta, bool forward)
5904 {
5905   struct cost_pair *from, *to;
5906   struct iv_ca_delta *act;
5907
5908   if (!forward)
5909     delta = iv_ca_delta_reverse (delta);
5910
5911   for (act = delta; act; act = act->next_change)
5912     {
5913       from = act->old_cp;
5914       to = act->new_cp;
5915       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5916       iv_ca_set_cp (data, ivs, act->use, to);
5917     }
5918
5919   if (!forward)
5920     iv_ca_delta_reverse (delta);
5921 }
5922
5923 /* Returns true if CAND is used in IVS.  */
5924
5925 static bool
5926 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5927 {
5928   return ivs->n_cand_uses[cand->id] > 0;
5929 }
5930
5931 /* Returns number of induction variable candidates in the set IVS.  */
5932
5933 static unsigned
5934 iv_ca_n_cands (struct iv_ca *ivs)
5935 {
5936   return ivs->n_cands;
5937 }
5938
5939 /* Free the list of changes DELTA.  */
5940
5941 static void
5942 iv_ca_delta_free (struct iv_ca_delta **delta)
5943 {
5944   struct iv_ca_delta *act, *next;
5945
5946   for (act = *delta; act; act = next)
5947     {
5948       next = act->next_change;
5949       free (act);
5950     }
5951
5952   *delta = NULL;
5953 }
5954
5955 /* Allocates new iv candidates assignment.  */
5956
5957 static struct iv_ca *
5958 iv_ca_new (struct ivopts_data *data)
5959 {
5960   struct iv_ca *nw = XNEW (struct iv_ca);
5961
5962   nw->upto = 0;
5963   nw->bad_uses = 0;
5964   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5965   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5966   nw->cands = BITMAP_ALLOC (NULL);
5967   nw->n_cands = 0;
5968   nw->n_regs = 0;
5969   nw->cand_use_cost = no_cost;
5970   nw->cand_cost = 0;
5971   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5972   nw->cost = no_cost;
5973   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5974   nw->num_used_inv_expr = 0;
5975
5976   return nw;
5977 }
5978
5979 /* Free memory occupied by the set IVS.  */
5980
5981 static void
5982 iv_ca_free (struct iv_ca **ivs)
5983 {
5984   free ((*ivs)->cand_for_use);
5985   free ((*ivs)->n_cand_uses);
5986   BITMAP_FREE ((*ivs)->cands);
5987   free ((*ivs)->n_invariant_uses);
5988   free ((*ivs)->used_inv_expr);
5989   free (*ivs);
5990   *ivs = NULL;
5991 }
5992
5993 /* Dumps IVS to FILE.  */
5994
5995 static void
5996 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5997 {
5998   const char *pref = "  invariants ";
5999   unsigned i;
6000   comp_cost cost = iv_ca_cost (ivs);
6001
6002   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6003   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6004            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6005   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6006
6007    for (i = 0; i < ivs->upto; i++)
6008     {
6009       struct iv_use *use = iv_use (data, i);
6010       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6011       if (cp)
6012         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6013                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6014       else
6015         fprintf (file, "   use:%d --> ??\n", use->id);
6016     }
6017
6018   for (i = 1; i <= data->max_inv_id; i++)
6019     if (ivs->n_invariant_uses[i])
6020       {
6021         fprintf (file, "%s%d", pref, i);
6022         pref = ", ";
6023       }
6024   fprintf (file, "\n\n");
6025 }
6026
6027 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6028    new set, and store differences in DELTA.  Number of induction variables
6029    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6030    the function will try to find a solution with mimimal iv candidates.  */
6031
6032 static comp_cost
6033 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6034               struct iv_cand *cand, struct iv_ca_delta **delta,
6035               unsigned *n_ivs, bool min_ncand)
6036 {
6037   unsigned i;
6038   comp_cost cost;
6039   struct iv_use *use;
6040   struct cost_pair *old_cp, *new_cp;
6041
6042   *delta = NULL;
6043   for (i = 0; i < ivs->upto; i++)
6044     {
6045       use = iv_use (data, i);
6046       old_cp = iv_ca_cand_for_use (ivs, use);
6047
6048       if (old_cp
6049           && old_cp->cand == cand)
6050         continue;
6051
6052       new_cp = get_use_iv_cost (data, use, cand);
6053       if (!new_cp)
6054         continue;
6055
6056       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6057         continue;
6058
6059       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6060         continue;
6061
6062       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6063     }
6064
6065   iv_ca_delta_commit (data, ivs, *delta, true);
6066   cost = iv_ca_cost (ivs);
6067   if (n_ivs)
6068     *n_ivs = iv_ca_n_cands (ivs);
6069   iv_ca_delta_commit (data, ivs, *delta, false);
6070
6071   return cost;
6072 }
6073
6074 /* Try narrowing set IVS by removing CAND.  Return the cost of
6075    the new set and store the differences in DELTA.  START is
6076    the candidate with which we start narrowing.  */
6077
6078 static comp_cost
6079 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6080               struct iv_cand *cand, struct iv_cand *start,
6081               struct iv_ca_delta **delta)
6082 {
6083   unsigned i, ci;
6084   struct iv_use *use;
6085   struct cost_pair *old_cp, *new_cp, *cp;
6086   bitmap_iterator bi;
6087   struct iv_cand *cnd;
6088   comp_cost cost, best_cost, acost;
6089
6090   *delta = NULL;
6091   for (i = 0; i < n_iv_uses (data); i++)
6092     {
6093       use = iv_use (data, i);
6094
6095       old_cp = iv_ca_cand_for_use (ivs, use);
6096       if (old_cp->cand != cand)
6097         continue;
6098
6099       best_cost = iv_ca_cost (ivs);
6100       /* Start narrowing with START.  */
6101       new_cp = get_use_iv_cost (data, use, start);
6102
6103       if (data->consider_all_candidates)
6104         {
6105           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6106             {
6107               if (ci == cand->id || (start && ci == start->id))
6108                 continue;
6109
6110               cnd = iv_cand (data, ci);
6111
6112               cp = get_use_iv_cost (data, use, cnd);
6113               if (!cp)
6114                 continue;
6115
6116               iv_ca_set_cp (data, ivs, use, cp);
6117               acost = iv_ca_cost (ivs);
6118
6119               if (compare_costs (acost, best_cost) < 0)
6120                 {
6121                   best_cost = acost;
6122                   new_cp = cp;
6123                 }
6124             }
6125         }
6126       else
6127         {
6128           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6129             {
6130               if (ci == cand->id || (start && ci == start->id))
6131                 continue;
6132
6133               cnd = iv_cand (data, ci);
6134
6135               cp = get_use_iv_cost (data, use, cnd);
6136               if (!cp)
6137                 continue;
6138
6139               iv_ca_set_cp (data, ivs, use, cp);
6140               acost = iv_ca_cost (ivs);
6141
6142               if (compare_costs (acost, best_cost) < 0)
6143                 {
6144                   best_cost = acost;
6145                   new_cp = cp;
6146                 }
6147             }
6148         }
6149       /* Restore to old cp for use.  */
6150       iv_ca_set_cp (data, ivs, use, old_cp);
6151
6152       if (!new_cp)
6153         {
6154           iv_ca_delta_free (delta);
6155           return infinite_cost;
6156         }
6157
6158       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6159     }
6160
6161   iv_ca_delta_commit (data, ivs, *delta, true);
6162   cost = iv_ca_cost (ivs);
6163   iv_ca_delta_commit (data, ivs, *delta, false);
6164
6165   return cost;
6166 }
6167
6168 /* Try optimizing the set of candidates IVS by removing candidates different
6169    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6170    differences in DELTA.  */
6171
6172 static comp_cost
6173 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6174              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6175 {
6176   bitmap_iterator bi;
6177   struct iv_ca_delta *act_delta, *best_delta;
6178   unsigned i;
6179   comp_cost best_cost, acost;
6180   struct iv_cand *cand;
6181
6182   best_delta = NULL;
6183   best_cost = iv_ca_cost (ivs);
6184
6185   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6186     {
6187       cand = iv_cand (data, i);
6188
6189       if (cand == except_cand)
6190         continue;
6191
6192       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6193
6194       if (compare_costs (acost, best_cost) < 0)
6195         {
6196           best_cost = acost;
6197           iv_ca_delta_free (&best_delta);
6198           best_delta = act_delta;
6199         }
6200       else
6201         iv_ca_delta_free (&act_delta);
6202     }
6203
6204   if (!best_delta)
6205     {
6206       *delta = NULL;
6207       return best_cost;
6208     }
6209
6210   /* Recurse to possibly remove other unnecessary ivs.  */
6211   iv_ca_delta_commit (data, ivs, best_delta, true);
6212   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6213   iv_ca_delta_commit (data, ivs, best_delta, false);
6214   *delta = iv_ca_delta_join (best_delta, *delta);
6215   return best_cost;
6216 }
6217
6218 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6219    cheaper local cost for USE than BEST_CP.  Return pointer to
6220    the corresponding cost_pair, otherwise just return BEST_CP.  */
6221
6222 static struct cost_pair*
6223 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6224                         unsigned int cand_idx, struct iv_cand *old_cand,
6225                         struct cost_pair *best_cp)
6226 {
6227   struct iv_cand *cand;
6228   struct cost_pair *cp;
6229
6230   gcc_assert (old_cand != NULL && best_cp != NULL);
6231   if (cand_idx == old_cand->id)
6232     return best_cp;
6233
6234   cand = iv_cand (data, cand_idx);
6235   cp = get_use_iv_cost (data, use, cand);
6236   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6237     return cp;
6238
6239   return best_cp;
6240 }
6241
6242 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6243    which are used by more than one iv uses.  For each of those candidates,
6244    this function tries to represent iv uses under that candidate using
6245    other ones with lower local cost, then tries to prune the new set.
6246    If the new set has lower cost, It returns the new cost after recording
6247    candidate replacement in list DELTA.  */
6248
6249 static comp_cost
6250 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6251                struct iv_ca_delta **delta)
6252 {
6253   bitmap_iterator bi, bj;
6254   unsigned int i, j, k;
6255   struct iv_use *use;
6256   struct iv_cand *cand;
6257   comp_cost orig_cost, acost;
6258   struct iv_ca_delta *act_delta, *tmp_delta;
6259   struct cost_pair *old_cp, *best_cp = NULL;
6260
6261   *delta = NULL;
6262   orig_cost = iv_ca_cost (ivs);
6263
6264   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6265     {
6266       if (ivs->n_cand_uses[i] == 1
6267           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6268         continue;
6269
6270       cand = iv_cand (data, i);
6271
6272       act_delta = NULL;
6273       /*  Represent uses under current candidate using other ones with
6274           lower local cost.  */
6275       for (j = 0; j < ivs->upto; j++)
6276         {
6277           use = iv_use (data, j);
6278           old_cp = iv_ca_cand_for_use (ivs, use);
6279
6280           if (old_cp->cand != cand)
6281             continue;
6282
6283           best_cp = old_cp;
6284           if (data->consider_all_candidates)
6285             for (k = 0; k < n_iv_cands (data); k++)
6286               best_cp = cheaper_cost_with_cand (data, use, k,
6287                                                 old_cp->cand, best_cp);
6288           else
6289             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6290               best_cp = cheaper_cost_with_cand (data, use, k,
6291                                                 old_cp->cand, best_cp);
6292
6293           if (best_cp == old_cp)
6294             continue;
6295
6296           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6297         }
6298       /* No need for further prune.  */
6299       if (!act_delta)
6300         continue;
6301
6302       /* Prune the new candidate set.  */
6303       iv_ca_delta_commit (data, ivs, act_delta, true);
6304       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6305       iv_ca_delta_commit (data, ivs, act_delta, false);
6306       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6307
6308       if (compare_costs (acost, orig_cost) < 0)
6309         {
6310           *delta = act_delta;
6311           return acost;
6312         }
6313       else
6314         iv_ca_delta_free (&act_delta);
6315     }
6316
6317   return orig_cost;
6318 }
6319
6320 /* Tries to extend the sets IVS in the best possible way in order
6321    to express the USE.  If ORIGINALP is true, prefer candidates from
6322    the original set of IVs, otherwise favor important candidates not
6323    based on any memory object.  */
6324
6325 static bool
6326 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6327                   struct iv_use *use, bool originalp)
6328 {
6329   comp_cost best_cost, act_cost;
6330   unsigned i;
6331   bitmap_iterator bi;
6332   struct iv_cand *cand;
6333   struct iv_ca_delta *best_delta = NULL, *act_delta;
6334   struct cost_pair *cp;
6335
6336   iv_ca_add_use (data, ivs, use);
6337   best_cost = iv_ca_cost (ivs);
6338   cp = iv_ca_cand_for_use (ivs, use);
6339   if (cp)
6340     {
6341       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6342       iv_ca_set_no_cp (data, ivs, use);
6343     }
6344
6345   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6346      first try important candidates not based on any memory object.  Only if
6347      this fails, try the specific ones.  Rationale -- in loops with many
6348      variables the best choice often is to use just one generic biv.  If we
6349      added here many ivs specific to the uses, the optimization algorithm later
6350      would be likely to get stuck in a local minimum, thus causing us to create
6351      too many ivs.  The approach from few ivs to more seems more likely to be
6352      successful -- starting from few ivs, replacing an expensive use by a
6353      specific iv should always be a win.  */
6354   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6355     {
6356       cand = iv_cand (data, i);
6357
6358       if (originalp && cand->pos !=IP_ORIGINAL)
6359         continue;
6360
6361       if (!originalp && cand->iv->base_object != NULL_TREE)
6362         continue;
6363
6364       if (iv_ca_cand_used_p (ivs, cand))
6365         continue;
6366
6367       cp = get_use_iv_cost (data, use, cand);
6368       if (!cp)
6369         continue;
6370
6371       iv_ca_set_cp (data, ivs, use, cp);
6372       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6373                                true);
6374       iv_ca_set_no_cp (data, ivs, use);
6375       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6376
6377       if (compare_costs (act_cost, best_cost) < 0)
6378         {
6379           best_cost = act_cost;
6380
6381           iv_ca_delta_free (&best_delta);
6382           best_delta = act_delta;
6383         }
6384       else
6385         iv_ca_delta_free (&act_delta);
6386     }
6387
6388   if (infinite_cost_p (best_cost))
6389     {
6390       for (i = 0; i < use->n_map_members; i++)
6391         {
6392           cp = use->cost_map + i;
6393           cand = cp->cand;
6394           if (!cand)
6395             continue;
6396
6397           /* Already tried this.  */
6398           if (cand->important)
6399             {
6400               if (originalp && cand->pos == IP_ORIGINAL)
6401                 continue;
6402               if (!originalp && cand->iv->base_object == NULL_TREE)
6403                 continue;
6404             }
6405
6406           if (iv_ca_cand_used_p (ivs, cand))
6407             continue;
6408
6409           act_delta = NULL;
6410           iv_ca_set_cp (data, ivs, use, cp);
6411           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6412           iv_ca_set_no_cp (data, ivs, use);
6413           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6414                                        cp, act_delta);
6415
6416           if (compare_costs (act_cost, best_cost) < 0)
6417             {
6418               best_cost = act_cost;
6419
6420               if (best_delta)
6421                 iv_ca_delta_free (&best_delta);
6422               best_delta = act_delta;
6423             }
6424           else
6425             iv_ca_delta_free (&act_delta);
6426         }
6427     }
6428
6429   iv_ca_delta_commit (data, ivs, best_delta, true);
6430   iv_ca_delta_free (&best_delta);
6431
6432   return !infinite_cost_p (best_cost);
6433 }
6434
6435 /* Finds an initial assignment of candidates to uses.  */
6436
6437 static struct iv_ca *
6438 get_initial_solution (struct ivopts_data *data, bool originalp)
6439 {
6440   struct iv_ca *ivs = iv_ca_new (data);
6441   unsigned i;
6442
6443   for (i = 0; i < n_iv_uses (data); i++)
6444     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6445       {
6446         iv_ca_free (&ivs);
6447         return NULL;
6448       }
6449
6450   return ivs;
6451 }
6452
6453 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6454    points to a bool variable, this function tries to break local
6455    optimal fixed-point by replacing candidates in IVS if it's true.  */
6456
6457 static bool
6458 try_improve_iv_set (struct ivopts_data *data,
6459                     struct iv_ca *ivs, bool *try_replace_p)
6460 {
6461   unsigned i, n_ivs;
6462   comp_cost acost, best_cost = iv_ca_cost (ivs);
6463   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6464   struct iv_cand *cand;
6465
6466   /* Try extending the set of induction variables by one.  */
6467   for (i = 0; i < n_iv_cands (data); i++)
6468     {
6469       cand = iv_cand (data, i);
6470
6471       if (iv_ca_cand_used_p (ivs, cand))
6472         continue;
6473
6474       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6475       if (!act_delta)
6476         continue;
6477
6478       /* If we successfully added the candidate and the set is small enough,
6479          try optimizing it by removing other candidates.  */
6480       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6481         {
6482           iv_ca_delta_commit (data, ivs, act_delta, true);
6483           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6484           iv_ca_delta_commit (data, ivs, act_delta, false);
6485           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6486         }
6487
6488       if (compare_costs (acost, best_cost) < 0)
6489         {
6490           best_cost = acost;
6491           iv_ca_delta_free (&best_delta);
6492           best_delta = act_delta;
6493         }
6494       else
6495         iv_ca_delta_free (&act_delta);
6496     }
6497
6498   if (!best_delta)
6499     {
6500       /* Try removing the candidates from the set instead.  */
6501       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6502
6503       if (!best_delta && *try_replace_p)
6504         {
6505           *try_replace_p = false;
6506           /* So far candidate selecting algorithm tends to choose fewer IVs
6507              so that it can handle cases in which loops have many variables
6508              but the best choice is often to use only one general biv.  One
6509              weakness is it can't handle opposite cases, in which different
6510              candidates should be chosen with respect to each use.  To solve
6511              the problem, we replace candidates in a manner described by the
6512              comments of iv_ca_replace, thus give general algorithm a chance
6513              to break local optimal fixed-point in these cases.  */
6514           best_cost = iv_ca_replace (data, ivs, &best_delta);
6515         }
6516
6517       if (!best_delta)
6518         return false;
6519     }
6520
6521   iv_ca_delta_commit (data, ivs, best_delta, true);
6522   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6523   iv_ca_delta_free (&best_delta);
6524   return true;
6525 }
6526
6527 /* Attempts to find the optimal set of induction variables.  We do simple
6528    greedy heuristic -- we try to replace at most one candidate in the selected
6529    solution and remove the unused ivs while this improves the cost.  */
6530
6531 static struct iv_ca *
6532 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6533 {
6534   struct iv_ca *set;
6535   bool try_replace_p = true;
6536
6537   /* Get the initial solution.  */
6538   set = get_initial_solution (data, originalp);
6539   if (!set)
6540     {
6541       if (dump_file && (dump_flags & TDF_DETAILS))
6542         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6543       return NULL;
6544     }
6545
6546   if (dump_file && (dump_flags & TDF_DETAILS))
6547     {
6548       fprintf (dump_file, "Initial set of candidates:\n");
6549       iv_ca_dump (data, dump_file, set);
6550     }
6551
6552   while (try_improve_iv_set (data, set, &try_replace_p))
6553     {
6554       if (dump_file && (dump_flags & TDF_DETAILS))
6555         {
6556           fprintf (dump_file, "Improved to:\n");
6557           iv_ca_dump (data, dump_file, set);
6558         }
6559     }
6560
6561   return set;
6562 }
6563
6564 static struct iv_ca *
6565 find_optimal_iv_set (struct ivopts_data *data)
6566 {
6567   unsigned i;
6568   struct iv_ca *set, *origset;
6569   struct iv_use *use;
6570   comp_cost cost, origcost;
6571
6572   /* Determine the cost based on a strategy that starts with original IVs,
6573      and try again using a strategy that prefers candidates not based
6574      on any IVs.  */
6575   origset = find_optimal_iv_set_1 (data, true);
6576   set = find_optimal_iv_set_1 (data, false);
6577
6578   if (!origset && !set)
6579     return NULL;
6580
6581   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6582   cost = set ? iv_ca_cost (set) : infinite_cost;
6583
6584   if (dump_file && (dump_flags & TDF_DETAILS))
6585     {
6586       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6587                origcost.cost, origcost.complexity);
6588       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6589                cost.cost, cost.complexity);
6590     }
6591
6592   /* Choose the one with the best cost.  */
6593   if (compare_costs (origcost, cost) <= 0)
6594     {
6595       if (set)
6596         iv_ca_free (&set);
6597       set = origset;
6598     }
6599   else if (origset)
6600     iv_ca_free (&origset);
6601
6602   for (i = 0; i < n_iv_uses (data); i++)
6603     {
6604       use = iv_use (data, i);
6605       use->selected = iv_ca_cand_for_use (set, use)->cand;
6606     }
6607
6608   return set;
6609 }
6610
6611 /* Creates a new induction variable corresponding to CAND.  */
6612
6613 static void
6614 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6615 {
6616   gimple_stmt_iterator incr_pos;
6617   tree base;
6618   bool after = false;
6619
6620   if (!cand->iv)
6621     return;
6622
6623   switch (cand->pos)
6624     {
6625     case IP_NORMAL:
6626       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6627       break;
6628
6629     case IP_END:
6630       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6631       after = true;
6632       break;
6633
6634     case IP_AFTER_USE:
6635       after = true;
6636       /* fall through */
6637     case IP_BEFORE_USE:
6638       incr_pos = gsi_for_stmt (cand->incremented_at);
6639       break;
6640
6641     case IP_ORIGINAL:
6642       /* Mark that the iv is preserved.  */
6643       name_info (data, cand->var_before)->preserve_biv = true;
6644       name_info (data, cand->var_after)->preserve_biv = true;
6645
6646       /* Rewrite the increment so that it uses var_before directly.  */
6647       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6648       return;
6649     }
6650
6651   gimple_add_tmp_var (cand->var_before);
6652
6653   base = unshare_expr (cand->iv->base);
6654
6655   create_iv (base, unshare_expr (cand->iv->step),
6656              cand->var_before, data->current_loop,
6657              &incr_pos, after, &cand->var_before, &cand->var_after);
6658 }
6659
6660 /* Creates new induction variables described in SET.  */
6661
6662 static void
6663 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6664 {
6665   unsigned i;
6666   struct iv_cand *cand;
6667   bitmap_iterator bi;
6668
6669   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6670     {
6671       cand = iv_cand (data, i);
6672       create_new_iv (data, cand);
6673     }
6674
6675   if (dump_file && (dump_flags & TDF_DETAILS))
6676     {
6677       fprintf (dump_file, "Selected IV set for loop %d",
6678                data->current_loop->num);
6679       if (data->loop_loc != UNKNOWN_LOCATION)
6680         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6681                  LOCATION_LINE (data->loop_loc));
6682       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6683       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6684         {
6685           cand = iv_cand (data, i);
6686           dump_cand (dump_file, cand);
6687         }
6688       fprintf (dump_file, "\n");
6689     }
6690 }
6691
6692 /* Rewrites USE (definition of iv used in a nonlinear expression)
6693    using candidate CAND.  */
6694
6695 static void
6696 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6697                             struct iv_use *use, struct iv_cand *cand)
6698 {
6699   tree comp;
6700   tree op, tgt;
6701   gassign *ass;
6702   gimple_stmt_iterator bsi;
6703
6704   /* An important special case -- if we are asked to express value of
6705      the original iv by itself, just exit; there is no need to
6706      introduce a new computation (that might also need casting the
6707      variable to unsigned and back).  */
6708   if (cand->pos == IP_ORIGINAL
6709       && cand->incremented_at == use->stmt)
6710     {
6711       enum tree_code stmt_code;
6712
6713       gcc_assert (is_gimple_assign (use->stmt));
6714       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6715
6716       /* Check whether we may leave the computation unchanged.
6717          This is the case only if it does not rely on other
6718          computations in the loop -- otherwise, the computation
6719          we rely upon may be removed in remove_unused_ivs,
6720          thus leading to ICE.  */
6721       stmt_code = gimple_assign_rhs_code (use->stmt);
6722       if (stmt_code == PLUS_EXPR
6723           || stmt_code == MINUS_EXPR
6724           || stmt_code == POINTER_PLUS_EXPR)
6725         {
6726           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6727             op = gimple_assign_rhs2 (use->stmt);
6728           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6729             op = gimple_assign_rhs1 (use->stmt);
6730           else
6731             op = NULL_TREE;
6732         }
6733       else
6734         op = NULL_TREE;
6735
6736       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6737         return;
6738     }
6739
6740   comp = get_computation (data->current_loop, use, cand);
6741   gcc_assert (comp != NULL_TREE);
6742
6743   switch (gimple_code (use->stmt))
6744     {
6745     case GIMPLE_PHI:
6746       tgt = PHI_RESULT (use->stmt);
6747
6748       /* If we should keep the biv, do not replace it.  */
6749       if (name_info (data, tgt)->preserve_biv)
6750         return;
6751
6752       bsi = gsi_after_labels (gimple_bb (use->stmt));
6753       break;
6754
6755     case GIMPLE_ASSIGN:
6756       tgt = gimple_assign_lhs (use->stmt);
6757       bsi = gsi_for_stmt (use->stmt);
6758       break;
6759
6760     default:
6761       gcc_unreachable ();
6762     }
6763
6764   if (!valid_gimple_rhs_p (comp)
6765       || (gimple_code (use->stmt) != GIMPLE_PHI
6766           /* We can't allow re-allocating the stmt as it might be pointed
6767              to still.  */
6768           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6769               >= gimple_num_ops (gsi_stmt (bsi)))))
6770     {
6771       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6772                                        true, GSI_SAME_STMT);
6773       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6774         {
6775           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6776           /* As this isn't a plain copy we have to reset alignment
6777              information.  */
6778           if (SSA_NAME_PTR_INFO (comp))
6779             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6780         }
6781     }
6782
6783   if (gimple_code (use->stmt) == GIMPLE_PHI)
6784     {
6785       ass = gimple_build_assign (tgt, comp);
6786       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6787
6788       bsi = gsi_for_stmt (use->stmt);
6789       remove_phi_node (&bsi, false);
6790     }
6791   else
6792     {
6793       gimple_assign_set_rhs_from_tree (&bsi, comp);
6794       use->stmt = gsi_stmt (bsi);
6795     }
6796 }
6797
6798 /* Performs a peephole optimization to reorder the iv update statement with
6799    a mem ref to enable instruction combining in later phases. The mem ref uses
6800    the iv value before the update, so the reordering transformation requires
6801    adjustment of the offset. CAND is the selected IV_CAND.
6802
6803    Example:
6804
6805    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6806    iv2 = iv1 + 1;
6807
6808    if (t < val)      (1)
6809      goto L;
6810    goto Head;
6811
6812
6813    directly propagating t over to (1) will introduce overlapping live range
6814    thus increase register pressure. This peephole transform it into:
6815
6816
6817    iv2 = iv1 + 1;
6818    t = MEM_REF (base, iv2, 8, 8);
6819    if (t < val)
6820      goto L;
6821    goto Head;
6822 */
6823
6824 static void
6825 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6826 {
6827   tree var_after;
6828   gimple iv_update, stmt;
6829   basic_block bb;
6830   gimple_stmt_iterator gsi, gsi_iv;
6831
6832   if (cand->pos != IP_NORMAL)
6833     return;
6834
6835   var_after = cand->var_after;
6836   iv_update = SSA_NAME_DEF_STMT (var_after);
6837
6838   bb = gimple_bb (iv_update);
6839   gsi = gsi_last_nondebug_bb (bb);
6840   stmt = gsi_stmt (gsi);
6841
6842   /* Only handle conditional statement for now.  */
6843   if (gimple_code (stmt) != GIMPLE_COND)
6844     return;
6845
6846   gsi_prev_nondebug (&gsi);
6847   stmt = gsi_stmt (gsi);
6848   if (stmt != iv_update)
6849     return;
6850
6851   gsi_prev_nondebug (&gsi);
6852   if (gsi_end_p (gsi))
6853     return;
6854
6855   stmt = gsi_stmt (gsi);
6856   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6857     return;
6858
6859   if (stmt != use->stmt)
6860     return;
6861
6862   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6863     return;
6864
6865   if (dump_file && (dump_flags & TDF_DETAILS))
6866     {
6867       fprintf (dump_file, "Reordering \n");
6868       print_gimple_stmt (dump_file, iv_update, 0, 0);
6869       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6870       fprintf (dump_file, "\n");
6871     }
6872
6873   gsi = gsi_for_stmt (use->stmt);
6874   gsi_iv = gsi_for_stmt (iv_update);
6875   gsi_move_before (&gsi_iv, &gsi);
6876
6877   cand->pos = IP_BEFORE_USE;
6878   cand->incremented_at = use->stmt;
6879 }
6880
6881 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6882
6883 static void
6884 rewrite_use_address_1 (struct ivopts_data *data,
6885                        struct iv_use *use, struct iv_cand *cand)
6886 {
6887   aff_tree aff;
6888   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6889   tree base_hint = NULL_TREE;
6890   tree ref, iv;
6891   bool ok;
6892
6893   adjust_iv_update_pos (cand, use);
6894   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6895   gcc_assert (ok);
6896   unshare_aff_combination (&aff);
6897
6898   /* To avoid undefined overflow problems, all IV candidates use unsigned
6899      integer types.  The drawback is that this makes it impossible for
6900      create_mem_ref to distinguish an IV that is based on a memory object
6901      from one that represents simply an offset.
6902
6903      To work around this problem, we pass a hint to create_mem_ref that
6904      indicates which variable (if any) in aff is an IV based on a memory
6905      object.  Note that we only consider the candidate.  If this is not
6906      based on an object, the base of the reference is in some subexpression
6907      of the use -- but these will use pointer types, so they are recognized
6908      by the create_mem_ref heuristics anyway.  */
6909   if (cand->iv->base_object)
6910     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6911
6912   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6913   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6914                         reference_alias_ptr_type (*use->op_p),
6915                         iv, base_hint, data->speed);
6916   copy_ref_info (ref, *use->op_p);
6917   *use->op_p = ref;
6918 }
6919
6920 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6921    first use of a group, rewrites sub uses in the group too.  */
6922
6923 static void
6924 rewrite_use_address (struct ivopts_data *data,
6925                       struct iv_use *use, struct iv_cand *cand)
6926 {
6927   struct iv_use *next;
6928
6929   gcc_assert (use->sub_id == 0);
6930   rewrite_use_address_1 (data, use, cand);
6931   update_stmt (use->stmt);
6932
6933   for (next = use->next; next != NULL; next = next->next)
6934     {
6935       rewrite_use_address_1 (data, next, cand);
6936       update_stmt (next->stmt);
6937     }
6938
6939   return;
6940 }
6941
6942 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6943    candidate CAND.  */
6944
6945 static void
6946 rewrite_use_compare (struct ivopts_data *data,
6947                      struct iv_use *use, struct iv_cand *cand)
6948 {
6949   tree comp, *var_p, op, bound;
6950   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6951   enum tree_code compare;
6952   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6953   bool ok;
6954
6955   bound = cp->value;
6956   if (bound)
6957     {
6958       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6959       tree var_type = TREE_TYPE (var);
6960       gimple_seq stmts;
6961
6962       if (dump_file && (dump_flags & TDF_DETAILS))
6963         {
6964           fprintf (dump_file, "Replacing exit test: ");
6965           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6966         }
6967       compare = cp->comp;
6968       bound = unshare_expr (fold_convert (var_type, bound));
6969       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6970       if (stmts)
6971         gsi_insert_seq_on_edge_immediate (
6972                 loop_preheader_edge (data->current_loop),
6973                 stmts);
6974
6975       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6976       gimple_cond_set_lhs (cond_stmt, var);
6977       gimple_cond_set_code (cond_stmt, compare);
6978       gimple_cond_set_rhs (cond_stmt, op);
6979       return;
6980     }
6981
6982   /* The induction variable elimination failed; just express the original
6983      giv.  */
6984   comp = get_computation (data->current_loop, use, cand);
6985   gcc_assert (comp != NULL_TREE);
6986
6987   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6988   gcc_assert (ok);
6989
6990   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6991                                      true, GSI_SAME_STMT);
6992 }
6993
6994 /* Rewrites USE using candidate CAND.  */
6995
6996 static void
6997 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6998 {
6999   switch (use->type)
7000     {
7001       case USE_NONLINEAR_EXPR:
7002         rewrite_use_nonlinear_expr (data, use, cand);
7003         break;
7004
7005       case USE_ADDRESS:
7006         rewrite_use_address (data, use, cand);
7007         break;
7008
7009       case USE_COMPARE:
7010         rewrite_use_compare (data, use, cand);
7011         break;
7012
7013       default:
7014         gcc_unreachable ();
7015     }
7016
7017   update_stmt (use->stmt);
7018 }
7019
7020 /* Rewrite the uses using the selected induction variables.  */
7021
7022 static void
7023 rewrite_uses (struct ivopts_data *data)
7024 {
7025   unsigned i;
7026   struct iv_cand *cand;
7027   struct iv_use *use;
7028
7029   for (i = 0; i < n_iv_uses (data); i++)
7030     {
7031       use = iv_use (data, i);
7032       cand = use->selected;
7033       gcc_assert (cand);
7034
7035       rewrite_use (data, use, cand);
7036     }
7037 }
7038
7039 /* Removes the ivs that are not used after rewriting.  */
7040
7041 static void
7042 remove_unused_ivs (struct ivopts_data *data)
7043 {
7044   unsigned j;
7045   bitmap_iterator bi;
7046   bitmap toremove = BITMAP_ALLOC (NULL);
7047
7048   /* Figure out an order in which to release SSA DEFs so that we don't
7049      release something that we'd have to propagate into a debug stmt
7050      afterwards.  */
7051   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7052     {
7053       struct version_info *info;
7054
7055       info = ver_info (data, j);
7056       if (info->iv
7057           && !integer_zerop (info->iv->step)
7058           && !info->inv_id
7059           && !info->iv->have_use_for
7060           && !info->preserve_biv)
7061         {
7062           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7063
7064           tree def = info->iv->ssa_name;
7065
7066           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7067             {
7068               imm_use_iterator imm_iter;
7069               use_operand_p use_p;
7070               gimple stmt;
7071               int count = 0;
7072
7073               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7074                 {
7075                   if (!gimple_debug_bind_p (stmt))
7076                     continue;
7077
7078                   /* We just want to determine whether to do nothing
7079                      (count == 0), to substitute the computed
7080                      expression into a single use of the SSA DEF by
7081                      itself (count == 1), or to use a debug temp
7082                      because the SSA DEF is used multiple times or as
7083                      part of a larger expression (count > 1). */
7084                   count++;
7085                   if (gimple_debug_bind_get_value (stmt) != def)
7086                     count++;
7087
7088                   if (count > 1)
7089                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7090                 }
7091
7092               if (!count)
7093                 continue;
7094
7095               struct iv_use dummy_use;
7096               struct iv_cand *best_cand = NULL, *cand;
7097               unsigned i, best_pref = 0, cand_pref;
7098
7099               memset (&dummy_use, 0, sizeof (dummy_use));
7100               dummy_use.iv = info->iv;
7101               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7102                 {
7103                   cand = iv_use (data, i)->selected;
7104                   if (cand == best_cand)
7105                     continue;
7106                   cand_pref = operand_equal_p (cand->iv->step,
7107                                                info->iv->step, 0)
7108                     ? 4 : 0;
7109                   cand_pref
7110                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7111                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7112                     ? 2 : 0;
7113                   cand_pref
7114                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7115                     ? 1 : 0;
7116                   if (best_cand == NULL || best_pref < cand_pref)
7117                     {
7118                       best_cand = cand;
7119                       best_pref = cand_pref;
7120                     }
7121                 }
7122
7123               if (!best_cand)
7124                 continue;
7125
7126               tree comp = get_computation_at (data->current_loop,
7127                                               &dummy_use, best_cand,
7128                                               SSA_NAME_DEF_STMT (def));
7129               if (!comp)
7130                 continue;
7131
7132               if (count > 1)
7133                 {
7134                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7135                   DECL_ARTIFICIAL (vexpr) = 1;
7136                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7137                   if (SSA_NAME_VAR (def))
7138                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7139                   else
7140                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7141                   gdebug *def_temp
7142                     = gimple_build_debug_bind (vexpr, comp, NULL);
7143                   gimple_stmt_iterator gsi;
7144
7145                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7146                     gsi = gsi_after_labels (gimple_bb
7147                                             (SSA_NAME_DEF_STMT (def)));
7148                   else
7149                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7150
7151                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7152                   comp = vexpr;
7153                 }
7154
7155               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7156                 {
7157                   if (!gimple_debug_bind_p (stmt))
7158                     continue;
7159
7160                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7161                     SET_USE (use_p, comp);
7162
7163                   update_stmt (stmt);
7164                 }
7165             }
7166         }
7167     }
7168
7169   release_defs_bitset (toremove);
7170
7171   BITMAP_FREE (toremove);
7172 }
7173
7174 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7175    for hash_map::traverse.  */
7176
7177 bool
7178 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7179 {
7180   free (value);
7181   return true;
7182 }
7183
7184 /* Frees data allocated by the optimization of a single loop.  */
7185
7186 static void
7187 free_loop_data (struct ivopts_data *data)
7188 {
7189   unsigned i, j;
7190   bitmap_iterator bi;
7191   tree obj;
7192
7193   if (data->niters)
7194     {
7195       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7196       delete data->niters;
7197       data->niters = NULL;
7198     }
7199
7200   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7201     {
7202       struct version_info *info;
7203
7204       info = ver_info (data, i);
7205       free (info->iv);
7206       info->iv = NULL;
7207       info->has_nonlin_use = false;
7208       info->preserve_biv = false;
7209       info->inv_id = 0;
7210     }
7211   bitmap_clear (data->relevant);
7212   bitmap_clear (data->important_candidates);
7213
7214   for (i = 0; i < n_iv_uses (data); i++)
7215     {
7216       struct iv_use *use = iv_use (data, i);
7217       struct iv_use *pre = use, *sub = use->next;
7218
7219       while (sub)
7220         {
7221           gcc_assert (sub->related_cands == NULL);
7222           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7223
7224           free (sub->iv);
7225           pre = sub;
7226           sub = sub->next;
7227           free (pre);
7228         }
7229
7230       free (use->iv);
7231       BITMAP_FREE (use->related_cands);
7232       for (j = 0; j < use->n_map_members; j++)
7233         if (use->cost_map[j].depends_on)
7234           BITMAP_FREE (use->cost_map[j].depends_on);
7235       free (use->cost_map);
7236       free (use);
7237     }
7238   data->iv_uses.truncate (0);
7239
7240   for (i = 0; i < n_iv_cands (data); i++)
7241     {
7242       struct iv_cand *cand = iv_cand (data, i);
7243
7244       free (cand->iv);
7245       if (cand->depends_on)
7246         BITMAP_FREE (cand->depends_on);
7247       free (cand);
7248     }
7249   data->iv_candidates.truncate (0);
7250
7251   if (data->version_info_size < num_ssa_names)
7252     {
7253       data->version_info_size = 2 * num_ssa_names;
7254       free (data->version_info);
7255       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7256     }
7257
7258   data->max_inv_id = 0;
7259
7260   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7261     SET_DECL_RTL (obj, NULL_RTX);
7262
7263   decl_rtl_to_reset.truncate (0);
7264
7265   data->inv_expr_tab->empty ();
7266   data->inv_expr_id = 0;
7267 }
7268
7269 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7270    loop tree.  */
7271
7272 static void
7273 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7274 {
7275   free_loop_data (data);
7276   free (data->version_info);
7277   BITMAP_FREE (data->relevant);
7278   BITMAP_FREE (data->important_candidates);
7279
7280   decl_rtl_to_reset.release ();
7281   data->iv_uses.release ();
7282   data->iv_candidates.release ();
7283   delete data->inv_expr_tab;
7284   data->inv_expr_tab = NULL;
7285   free_affine_expand_cache (&data->name_expansion_cache);
7286 }
7287
7288 /* Returns true if the loop body BODY includes any function calls.  */
7289
7290 static bool
7291 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7292 {
7293   gimple_stmt_iterator gsi;
7294   unsigned i;
7295
7296   for (i = 0; i < num_nodes; i++)
7297     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7298       {
7299         gimple stmt = gsi_stmt (gsi);
7300         if (is_gimple_call (stmt)
7301             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7302           return true;
7303       }
7304   return false;
7305 }
7306
7307 /* Optimizes the LOOP.  Returns true if anything changed.  */
7308
7309 static bool
7310 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7311 {
7312   bool changed = false;
7313   struct iv_ca *iv_ca;
7314   edge exit = single_dom_exit (loop);
7315   basic_block *body;
7316
7317   gcc_assert (!data->niters);
7318   data->current_loop = loop;
7319   data->loop_loc = find_loop_location (loop);
7320   data->speed = optimize_loop_for_speed_p (loop);
7321
7322   if (dump_file && (dump_flags & TDF_DETAILS))
7323     {
7324       fprintf (dump_file, "Processing loop %d", loop->num);
7325       if (data->loop_loc != UNKNOWN_LOCATION)
7326         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7327                  LOCATION_LINE (data->loop_loc));
7328       fprintf (dump_file, "\n");
7329
7330       if (exit)
7331         {
7332           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7333                    exit->src->index, exit->dest->index);
7334           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7335           fprintf (dump_file, "\n");
7336         }
7337
7338       fprintf (dump_file, "\n");
7339     }
7340
7341   body = get_loop_body (loop);
7342   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7343   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7344   free (body);
7345
7346   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7347
7348   /* For each ssa name determines whether it behaves as an induction variable
7349      in some loop.  */
7350   if (!find_induction_variables (data))
7351     goto finish;
7352
7353   /* Finds interesting uses (item 1).  */
7354   find_interesting_uses (data);
7355   group_address_uses (data);
7356   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7357     goto finish;
7358
7359   /* Finds candidates for the induction variables (item 2).  */
7360   find_iv_candidates (data);
7361
7362   /* Calculates the costs (item 3, part 1).  */
7363   determine_iv_costs (data);
7364   determine_use_iv_costs (data);
7365   determine_set_costs (data);
7366
7367   /* Find the optimal set of induction variables (item 3, part 2).  */
7368   iv_ca = find_optimal_iv_set (data);
7369   if (!iv_ca)
7370     goto finish;
7371   changed = true;
7372
7373   /* Create the new induction variables (item 4, part 1).  */
7374   create_new_ivs (data, iv_ca);
7375   iv_ca_free (&iv_ca);
7376
7377   /* Rewrite the uses (item 4, part 2).  */
7378   rewrite_uses (data);
7379
7380   /* Remove the ivs that are unused after rewriting.  */
7381   remove_unused_ivs (data);
7382
7383   /* We have changed the structure of induction variables; it might happen
7384      that definitions in the scev database refer to some of them that were
7385      eliminated.  */
7386   scev_reset ();
7387
7388 finish:
7389   free_loop_data (data);
7390
7391   return changed;
7392 }
7393
7394 /* Main entry point.  Optimizes induction variables in loops.  */
7395
7396 void
7397 tree_ssa_iv_optimize (void)
7398 {
7399   struct loop *loop;
7400   struct ivopts_data data;
7401
7402   tree_ssa_iv_optimize_init (&data);
7403
7404   /* Optimize the loops starting with the innermost ones.  */
7405   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7406     {
7407       if (dump_file && (dump_flags & TDF_DETAILS))
7408         flow_loop_dump (loop, dump_file, NULL, 1);
7409
7410       tree_ssa_iv_optimize_loop (&data, loop);
7411     }
7412
7413   tree_ssa_iv_optimize_finalize (&data);
7414 }