gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "hash-set.h"
  69 #include "vec.h"
  70 #include "input.h"
  71 #include "alias.h"
  72 #include "symtab.h"
  73 #include "inchash.h"
  74 #include "tree.h"
  75 #include "fold-const.h"
  76 #include "stor-layout.h"
  77 #include "tm_p.h"
  78 #include "predict.h"
  79 #include "hard-reg-set.h"
  80 #include "function.h"
  81 #include "dominance.h"
  82 #include "cfg.h"
  83 #include "basic-block.h"
  84 #include "gimple-pretty-print.h"
  85 #include "hash-map.h"
  86 #include "hash-table.h"
  87 #include "tree-ssa-alias.h"
  88 #include "internal-fn.h"
  89 #include "tree-eh.h"
  90 #include "gimple-expr.h"
  91 #include "is-a.h"
  92 #include "gimple.h"
  93 #include "gimplify.h"
  94 #include "gimple-iterator.h"
  95 #include "gimplify-me.h"
  96 #include "gimple-ssa.h"
  97 #include "plugin-api.h"
  98 #include "ipa-ref.h"
  99 #include "cgraph.h"
 100 #include "tree-cfg.h"
 101 #include "tree-phinodes.h"
 102 #include "ssa-iterators.h"
 103 #include "stringpool.h"
 104 #include "tree-ssanames.h"
 105 #include "tree-ssa-loop-ivopts.h"
 106 #include "tree-ssa-loop-manip.h"
 107 #include "tree-ssa-loop-niter.h"
 108 #include "tree-ssa-loop.h"
 109 #include "hashtab.h"
 110 #include "rtl.h"
 111 #include "flags.h"
 112 #include "statistics.h"
 113 #include "insn-config.h"
 114 #include "expmed.h"
 115 #include "dojump.h"
 116 #include "explow.h"
 117 #include "calls.h"
 118 #include "emit-rtl.h"
 119 #include "varasm.h"
 120 #include "stmt.h"
 121 #include "expr.h"
 122 #include "tree-dfa.h"
 123 #include "tree-ssa.h"
 124 #include "cfgloop.h"
 125 #include "tree-pass.h"
 126 #include "tree-chrec.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "params.h"
 129 #include "langhooks.h"
 130 #include "tree-affine.h"
 131 #include "target.h"
 132 #include "tree-inline.h"
 133 #include "tree-ssa-propagate.h"
 134 #include "tree-ssa-address.h"
 135 #include "builtins.h"
 136 #include "tree-vectorizer.h"
 137
 138 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 139    cost of different addressing modes.  This should be moved to a TBD
 140    interface between the GIMPLE and RTL worlds.  */
 141 #include "recog.h"
 142
 143 /* The infinite cost.  */
 144 #define INFTY 10000000
 145
 146 #define AVG_LOOP_NITER(LOOP) 5
 147
 148 /* Returns the expected number of loop iterations for LOOP.
 149    The average trip count is computed from profile data if it
 150    exists. */
 151
 152 static inline HOST_WIDE_INT
 153 avg_loop_niter (struct loop *loop)
 154 {
 155   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 156   if (niter == -1)
 157     return AVG_LOOP_NITER (loop);
 158
 159   return niter;
 160 }
 161
 162 /* Representation of the induction variable.  */
 163 struct iv
 164 {
 165   tree base;            /* Initial value of the iv.  */
 166   tree base_object;     /* A memory object to that the induction variable points.  */
 167   tree step;            /* Step of the iv (constant only).  */
 168   tree ssa_name;        /* The ssa name with the value.  */
 169   unsigned use_id;      /* The identifier in the use if it is the case.  */
 170   bool biv_p;           /* Is it a biv?  */
 171   bool have_use_for;    /* Do we already have a use for it?  */
 172   bool no_overflow;     /* True if the iv doesn't overflow.  */
 173 };
 174
 175 /* Per-ssa version information (induction variable descriptions, etc.).  */
 176 struct version_info
 177 {
 178   tree name;            /* The ssa name.  */
 179   struct iv *iv;        /* Induction variable description.  */
 180   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 181                            an expression that is not an induction variable.  */
 182   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 183   unsigned inv_id;      /* Id of an invariant.  */
 184 };
 185
 186 /* Types of uses.  */
 187 enum use_type
 188 {
 189   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 190   USE_ADDRESS,          /* Use in an address.  */
 191   USE_COMPARE           /* Use is a compare.  */
 192 };
 193
 194 /* Cost of a computation.  */
 195 typedef struct
 196 {
 197   int cost;             /* The runtime cost.  */
 198   unsigned complexity;  /* The estimate of the complexity of the code for
 199                            the computation (in no concrete units --
 200                            complexity field should be larger for more
 201                            complex expressions and addressing modes).  */
 202 } comp_cost;
 203
 204 static const comp_cost no_cost = {0, 0};
 205 static const comp_cost infinite_cost = {INFTY, INFTY};
 206
 207 /* The candidate - cost pair.  */
 208 struct cost_pair
 209 {
 210   struct iv_cand *cand; /* The candidate.  */
 211   comp_cost cost;       /* The cost.  */
 212   bitmap depends_on;    /* The list of invariants that have to be
 213                            preserved.  */
 214   tree value;           /* For final value elimination, the expression for
 215                            the final value of the iv.  For iv elimination,
 216                            the new bound to compare with.  */
 217   enum tree_code comp;  /* For iv elimination, the comparison.  */
 218   int inv_expr_id;      /* Loop invariant expression id.  */
 219 };
 220
 221 /* Use.  */
 222 struct iv_use
 223 {
 224   unsigned id;          /* The id of the use.  */
 225   unsigned sub_id;      /* The id of the sub use.  */
 226   enum use_type type;   /* Type of the use.  */
 227   struct iv *iv;        /* The induction variable it is based on.  */
 228   gimple stmt;          /* Statement in that it occurs.  */
 229   tree *op_p;           /* The place where it occurs.  */
 230   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 231                            important ones.  */
 232
 233   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 234   struct cost_pair *cost_map;
 235                         /* The costs wrto the iv candidates.  */
 236
 237   struct iv_cand *selected;
 238                         /* The selected candidate.  */
 239
 240   struct iv_use *next;  /* The next sub use.  */
 241   tree addr_base;       /* Base address with const offset stripped.  */
 242   unsigned HOST_WIDE_INT addr_offset;
 243                         /* Const offset stripped from base address.  */
 244 };
 245
 246 /* The position where the iv is computed.  */
 247 enum iv_position
 248 {
 249   IP_NORMAL,            /* At the end, just before the exit condition.  */
 250   IP_END,               /* At the end of the latch block.  */
 251   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 252   IP_AFTER_USE,         /* Immediately after a specific use.  */
 253   IP_ORIGINAL           /* The original biv.  */
 254 };
 255
 256 /* The induction variable candidate.  */
 257 struct iv_cand
 258 {
 259   unsigned id;          /* The number of the candidate.  */
 260   bool important;       /* Whether this is an "important" candidate, i.e. such
 261                            that it should be considered by all uses.  */
 262   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 263   gimple incremented_at;/* For original biv, the statement where it is
 264                            incremented.  */
 265   tree var_before;      /* The variable used for it before increment.  */
 266   tree var_after;       /* The variable used for it after increment.  */
 267   struct iv *iv;        /* The value of the candidate.  NULL for
 268                            "pseudocandidate" used to indicate the possibility
 269                            to replace the final value of an iv by direct
 270                            computation of the value.  */
 271   unsigned cost;        /* Cost of the candidate.  */
 272   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 273   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 274                               where it is incremented.  */
 275   bitmap depends_on;    /* The list of invariants that are used in step of the
 276                            biv.  */
 277 };
 278
 279 /* Loop invariant expression hashtable entry.  */
 280 struct iv_inv_expr_ent
 281 {
 282   tree expr;
 283   int id;
 284   hashval_t hash;
 285 };
 286
 287 /* The data used by the induction variable optimizations.  */
 288
 289 typedef struct iv_use *iv_use_p;
 290
 291 typedef struct iv_cand *iv_cand_p;
 292
 293 /* Hashtable helpers.  */
 294
 295 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 296 {
 297   typedef iv_inv_expr_ent *value_type;
 298   typedef iv_inv_expr_ent *compare_type;
 299   static inline hashval_t hash (const iv_inv_expr_ent *);
 300   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 301 };
 302
 303 /* Hash function for loop invariant expressions.  */
 304
 305 inline hashval_t
 306 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 307 {
 308   return expr->hash;
 309 }
 310
 311 /* Hash table equality function for expressions.  */
 312
 313 inline bool
 314 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 315                            const iv_inv_expr_ent *expr2)
 316 {
 317   return expr1->hash == expr2->hash
 318          && operand_equal_p (expr1->expr, expr2->expr, 0);
 319 }
 320
 321 struct ivopts_data
 322 {
 323   /* The currently optimized loop.  */
 324   struct loop *current_loop;
 325   source_location loop_loc;
 326
 327   /* Numbers of iterations for all exits of the current loop.  */
 328   hash_map<edge, tree_niter_desc *> *niters;
 329
 330   /* Number of registers used in it.  */
 331   unsigned regs_used;
 332
 333   /* The size of version_info array allocated.  */
 334   unsigned version_info_size;
 335
 336   /* The array of information for the ssa names.  */
 337   struct version_info *version_info;
 338
 339   /* The hashtable of loop invariant expressions created
 340      by ivopt.  */
 341   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 342
 343   /* Loop invariant expression id.  */
 344   int inv_expr_id;
 345
 346   /* The bitmap of indices in version_info whose value was changed.  */
 347   bitmap relevant;
 348
 349   /* The uses of induction variables.  */
 350   vec<iv_use_p> iv_uses;
 351
 352   /* The candidates.  */
 353   vec<iv_cand_p> iv_candidates;
 354
 355   /* A bitmap of important candidates.  */
 356   bitmap important_candidates;
 357
 358   /* Cache used by tree_to_aff_combination_expand.  */
 359   hash_map<tree, name_expansion *> *name_expansion_cache;
 360
 361   /* The maximum invariant id.  */
 362   unsigned max_inv_id;
 363
 364   /* Whether to consider just related and important candidates when replacing a
 365      use.  */
 366   bool consider_all_candidates;
 367
 368   /* Are we optimizing for speed?  */
 369   bool speed;
 370
 371   /* Whether the loop body includes any function calls.  */
 372   bool body_includes_call;
 373
 374   /* Whether the loop body can only be exited via single exit.  */
 375   bool loop_single_exit_p;
 376 };
 377
 378 /* An assignment of iv candidates to uses.  */
 379
 380 struct iv_ca
 381 {
 382   /* The number of uses covered by the assignment.  */
 383   unsigned upto;
 384
 385   /* Number of uses that cannot be expressed by the candidates in the set.  */
 386   unsigned bad_uses;
 387
 388   /* Candidate assigned to a use, together with the related costs.  */
 389   struct cost_pair **cand_for_use;
 390
 391   /* Number of times each candidate is used.  */
 392   unsigned *n_cand_uses;
 393
 394   /* The candidates used.  */
 395   bitmap cands;
 396
 397   /* The number of candidates in the set.  */
 398   unsigned n_cands;
 399
 400   /* Total number of registers needed.  */
 401   unsigned n_regs;
 402
 403   /* Total cost of expressing uses.  */
 404   comp_cost cand_use_cost;
 405
 406   /* Total cost of candidates.  */
 407   unsigned cand_cost;
 408
 409   /* Number of times each invariant is used.  */
 410   unsigned *n_invariant_uses;
 411
 412   /* The array holding the number of uses of each loop
 413      invariant expressions created by ivopt.  */
 414   unsigned *used_inv_expr;
 415
 416   /* The number of created loop invariants.  */
 417   unsigned num_used_inv_expr;
 418
 419   /* Total cost of the assignment.  */
 420   comp_cost cost;
 421 };
 422
 423 /* Difference of two iv candidate assignments.  */
 424
 425 struct iv_ca_delta
 426 {
 427   /* Changed use.  */
 428   struct iv_use *use;
 429
 430   /* An old assignment (for rollback purposes).  */
 431   struct cost_pair *old_cp;
 432
 433   /* A new assignment.  */
 434   struct cost_pair *new_cp;
 435
 436   /* Next change in the list.  */
 437   struct iv_ca_delta *next_change;
 438 };
 439
 440 /* Bound on number of candidates below that all candidates are considered.  */
 441
 442 #define CONSIDER_ALL_CANDIDATES_BOUND \
 443   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 444
 445 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 446    optimizing such a loop would help, and it would take ages).  */
 447
 448 #define MAX_CONSIDERED_USES \
 449   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 450
 451 /* If there are at most this number of ivs in the set, try removing unnecessary
 452    ivs from the set always.  */
 453
 454 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 455   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 456
 457 /* The list of trees for that the decl_rtl field must be reset is stored
 458    here.  */
 459
 460 static vec<tree> decl_rtl_to_reset;
 461
 462 static comp_cost force_expr_to_var_cost (tree, bool);
 463
 464 /* Number of uses recorded in DATA.  */
 465
 466 static inline unsigned
 467 n_iv_uses (struct ivopts_data *data)
 468 {
 469   return data->iv_uses.length ();
 470 }
 471
 472 /* Ith use recorded in DATA.  */
 473
 474 static inline struct iv_use *
 475 iv_use (struct ivopts_data *data, unsigned i)
 476 {
 477   return data->iv_uses[i];
 478 }
 479
 480 /* Number of candidates recorded in DATA.  */
 481
 482 static inline unsigned
 483 n_iv_cands (struct ivopts_data *data)
 484 {
 485   return data->iv_candidates.length ();
 486 }
 487
 488 /* Ith candidate recorded in DATA.  */
 489
 490 static inline struct iv_cand *
 491 iv_cand (struct ivopts_data *data, unsigned i)
 492 {
 493   return data->iv_candidates[i];
 494 }
 495
 496 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 497
 498 edge
 499 single_dom_exit (struct loop *loop)
 500 {
 501   edge exit = single_exit (loop);
 502
 503   if (!exit)
 504     return NULL;
 505
 506   if (!just_once_each_iteration_p (loop, exit->src))
 507     return NULL;
 508
 509   return exit;
 510 }
 511
 512 /* Dumps information about the induction variable IV to FILE.  */
 513
 514 void
 515 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 516 {
 517   if (iv->ssa_name && dump_name)
 518     {
 519       fprintf (file, "ssa name ");
 520       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 521       fprintf (file, "\n");
 522     }
 523
 524   fprintf (file, "  type ");
 525   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 526   fprintf (file, "\n");
 527
 528   if (iv->step)
 529     {
 530       fprintf (file, "  base ");
 531       print_generic_expr (file, iv->base, TDF_SLIM);
 532       fprintf (file, "\n");
 533
 534       fprintf (file, "  step ");
 535       print_generic_expr (file, iv->step, TDF_SLIM);
 536       fprintf (file, "\n");
 537     }
 538   else
 539     {
 540       fprintf (file, "  invariant ");
 541       print_generic_expr (file, iv->base, TDF_SLIM);
 542       fprintf (file, "\n");
 543     }
 544
 545   if (iv->base_object)
 546     {
 547       fprintf (file, "  base object ");
 548       print_generic_expr (file, iv->base_object, TDF_SLIM);
 549       fprintf (file, "\n");
 550     }
 551
 552   if (iv->biv_p)
 553     fprintf (file, "  is a biv\n");
 554 }
 555
 556 /* Dumps information about the USE to FILE.  */
 557
 558 void
 559 dump_use (FILE *file, struct iv_use *use)
 560 {
 561   fprintf (file, "use %d", use->id);
 562   if (use->sub_id)
 563     fprintf (file, ".%d", use->sub_id);
 564
 565   fprintf (file, "\n");
 566
 567   switch (use->type)
 568     {
 569     case USE_NONLINEAR_EXPR:
 570       fprintf (file, "  generic\n");
 571       break;
 572
 573     case USE_ADDRESS:
 574       fprintf (file, "  address\n");
 575       break;
 576
 577     case USE_COMPARE:
 578       fprintf (file, "  compare\n");
 579       break;
 580
 581     default:
 582       gcc_unreachable ();
 583     }
 584
 585   fprintf (file, "  in statement ");
 586   print_gimple_stmt (file, use->stmt, 0, 0);
 587   fprintf (file, "\n");
 588
 589   fprintf (file, "  at position ");
 590   if (use->op_p)
 591     print_generic_expr (file, *use->op_p, TDF_SLIM);
 592   fprintf (file, "\n");
 593
 594   dump_iv (file, use->iv, false);
 595
 596   if (use->related_cands)
 597     {
 598       fprintf (file, "  related candidates ");
 599       dump_bitmap (file, use->related_cands);
 600     }
 601 }
 602
 603 /* Dumps information about the uses to FILE.  */
 604
 605 void
 606 dump_uses (FILE *file, struct ivopts_data *data)
 607 {
 608   unsigned i;
 609   struct iv_use *use;
 610
 611   for (i = 0; i < n_iv_uses (data); i++)
 612     {
 613       use = iv_use (data, i);
 614       do
 615         {
 616           dump_use (file, use);
 617           use = use->next;
 618         }
 619       while (use);
 620       fprintf (file, "\n");
 621     }
 622 }
 623
 624 /* Dumps information about induction variable candidate CAND to FILE.  */
 625
 626 void
 627 dump_cand (FILE *file, struct iv_cand *cand)
 628 {
 629   struct iv *iv = cand->iv;
 630
 631   fprintf (file, "candidate %d%s\n",
 632            cand->id, cand->important ? " (important)" : "");
 633
 634   if (cand->depends_on)
 635     {
 636       fprintf (file, "  depends on ");
 637       dump_bitmap (file, cand->depends_on);
 638     }
 639
 640   if (!iv)
 641     {
 642       fprintf (file, "  final value replacement\n");
 643       return;
 644     }
 645
 646   if (cand->var_before)
 647     {
 648       fprintf (file, "  var_before ");
 649       print_generic_expr (file, cand->var_before, TDF_SLIM);
 650       fprintf (file, "\n");
 651     }
 652   if (cand->var_after)
 653     {
 654       fprintf (file, "  var_after ");
 655       print_generic_expr (file, cand->var_after, TDF_SLIM);
 656       fprintf (file, "\n");
 657     }
 658
 659   switch (cand->pos)
 660     {
 661     case IP_NORMAL:
 662       fprintf (file, "  incremented before exit test\n");
 663       break;
 664
 665     case IP_BEFORE_USE:
 666       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 667       break;
 668
 669     case IP_AFTER_USE:
 670       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 671       break;
 672
 673     case IP_END:
 674       fprintf (file, "  incremented at end\n");
 675       break;
 676
 677     case IP_ORIGINAL:
 678       fprintf (file, "  original biv\n");
 679       break;
 680     }
 681
 682   dump_iv (file, iv, false);
 683 }
 684
 685 /* Returns the info for ssa version VER.  */
 686
 687 static inline struct version_info *
 688 ver_info (struct ivopts_data *data, unsigned ver)
 689 {
 690   return data->version_info + ver;
 691 }
 692
 693 /* Returns the info for ssa name NAME.  */
 694
 695 static inline struct version_info *
 696 name_info (struct ivopts_data *data, tree name)
 697 {
 698   return ver_info (data, SSA_NAME_VERSION (name));
 699 }
 700
 701 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 702    emitted in LOOP.  */
 703
 704 static bool
 705 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 706 {
 707   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 708
 709   gcc_assert (bb);
 710
 711   if (sbb == loop->latch)
 712     return true;
 713
 714   if (sbb != bb)
 715     return false;
 716
 717   return stmt == last_stmt (bb);
 718 }
 719
 720 /* Returns true if STMT if after the place where the original induction
 721    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 722    if the positions are identical.  */
 723
 724 static bool
 725 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 726 {
 727   basic_block cand_bb = gimple_bb (cand->incremented_at);
 728   basic_block stmt_bb = gimple_bb (stmt);
 729
 730   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 731     return false;
 732
 733   if (stmt_bb != cand_bb)
 734     return true;
 735
 736   if (true_if_equal
 737       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 738     return true;
 739   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 740 }
 741
 742 /* Returns true if STMT if after the place where the induction variable
 743    CAND is incremented in LOOP.  */
 744
 745 static bool
 746 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 747 {
 748   switch (cand->pos)
 749     {
 750     case IP_END:
 751       return false;
 752
 753     case IP_NORMAL:
 754       return stmt_after_ip_normal_pos (loop, stmt);
 755
 756     case IP_ORIGINAL:
 757     case IP_AFTER_USE:
 758       return stmt_after_inc_pos (cand, stmt, false);
 759
 760     case IP_BEFORE_USE:
 761       return stmt_after_inc_pos (cand, stmt, true);
 762
 763     default:
 764       gcc_unreachable ();
 765     }
 766 }
 767
 768 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 769
 770 static bool
 771 abnormal_ssa_name_p (tree exp)
 772 {
 773   if (!exp)
 774     return false;
 775
 776   if (TREE_CODE (exp) != SSA_NAME)
 777     return false;
 778
 779   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 780 }
 781
 782 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 783    abnormal phi node.  Callback for for_each_index.  */
 784
 785 static bool
 786 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 787                                   void *data ATTRIBUTE_UNUSED)
 788 {
 789   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 790     {
 791       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 792         return false;
 793       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 794         return false;
 795     }
 796
 797   return !abnormal_ssa_name_p (*index);
 798 }
 799
 800 /* Returns true if EXPR contains a ssa name that occurs in an
 801    abnormal phi node.  */
 802
 803 bool
 804 contains_abnormal_ssa_name_p (tree expr)
 805 {
 806   enum tree_code code;
 807   enum tree_code_class codeclass;
 808
 809   if (!expr)
 810     return false;
 811
 812   code = TREE_CODE (expr);
 813   codeclass = TREE_CODE_CLASS (code);
 814
 815   if (code == SSA_NAME)
 816     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 817
 818   if (code == INTEGER_CST
 819       || is_gimple_min_invariant (expr))
 820     return false;
 821
 822   if (code == ADDR_EXPR)
 823     return !for_each_index (&TREE_OPERAND (expr, 0),
 824                             idx_contains_abnormal_ssa_name_p,
 825                             NULL);
 826
 827   if (code == COND_EXPR)
 828     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 829       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 830       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 831
 832   switch (codeclass)
 833     {
 834     case tcc_binary:
 835     case tcc_comparison:
 836       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 837         return true;
 838
 839       /* Fallthru.  */
 840     case tcc_unary:
 841       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 842         return true;
 843
 844       break;
 845
 846     default:
 847       gcc_unreachable ();
 848     }
 849
 850   return false;
 851 }
 852
 853 /*  Returns the structure describing number of iterations determined from
 854     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 855
 856 static struct tree_niter_desc *
 857 niter_for_exit (struct ivopts_data *data, edge exit)
 858 {
 859   struct tree_niter_desc *desc;
 860   tree_niter_desc **slot;
 861
 862   if (!data->niters)
 863     {
 864       data->niters = new hash_map<edge, tree_niter_desc *>;
 865       slot = NULL;
 866     }
 867   else
 868     slot = data->niters->get (exit);
 869
 870   if (!slot)
 871     {
 872       /* Try to determine number of iterations.  We cannot safely work with ssa
 873          names that appear in phi nodes on abnormal edges, so that we do not
 874          create overlapping life ranges for them (PR 27283).  */
 875       desc = XNEW (struct tree_niter_desc);
 876       if (!number_of_iterations_exit (data->current_loop,
 877                                       exit, desc, true)
 878           || contains_abnormal_ssa_name_p (desc->niter))
 879         {
 880           XDELETE (desc);
 881           desc = NULL;
 882         }
 883       data->niters->put (exit, desc);
 884     }
 885   else
 886     desc = *slot;
 887
 888   return desc;
 889 }
 890
 891 /* Returns the structure describing number of iterations determined from
 892    single dominating exit of DATA->current_loop, or NULL if something
 893    goes wrong.  */
 894
 895 static struct tree_niter_desc *
 896 niter_for_single_dom_exit (struct ivopts_data *data)
 897 {
 898   edge exit = single_dom_exit (data->current_loop);
 899
 900   if (!exit)
 901     return NULL;
 902
 903   return niter_for_exit (data, exit);
 904 }
 905
 906 /* Initializes data structures used by the iv optimization pass, stored
 907    in DATA.  */
 908
 909 static void
 910 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 911 {
 912   data->version_info_size = 2 * num_ssa_names;
 913   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 914   data->relevant = BITMAP_ALLOC (NULL);
 915   data->important_candidates = BITMAP_ALLOC (NULL);
 916   data->max_inv_id = 0;
 917   data->niters = NULL;
 918   data->iv_uses.create (20);
 919   data->iv_candidates.create (20);
 920   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 921   data->inv_expr_id = 0;
 922   data->name_expansion_cache = NULL;
 923   decl_rtl_to_reset.create (20);
 924 }
 925
 926 /* Returns a memory object to that EXPR points.  In case we are able to
 927    determine that it does not point to any such object, NULL is returned.  */
 928
 929 static tree
 930 determine_base_object (tree expr)
 931 {
 932   enum tree_code code = TREE_CODE (expr);
 933   tree base, obj;
 934
 935   /* If this is a pointer casted to any type, we need to determine
 936      the base object for the pointer; so handle conversions before
 937      throwing away non-pointer expressions.  */
 938   if (CONVERT_EXPR_P (expr))
 939     return determine_base_object (TREE_OPERAND (expr, 0));
 940
 941   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 942     return NULL_TREE;
 943
 944   switch (code)
 945     {
 946     case INTEGER_CST:
 947       return NULL_TREE;
 948
 949     case ADDR_EXPR:
 950       obj = TREE_OPERAND (expr, 0);
 951       base = get_base_address (obj);
 952
 953       if (!base)
 954         return expr;
 955
 956       if (TREE_CODE (base) == MEM_REF)
 957         return determine_base_object (TREE_OPERAND (base, 0));
 958
 959       return fold_convert (ptr_type_node,
 960                            build_fold_addr_expr (base));
 961
 962     case POINTER_PLUS_EXPR:
 963       return determine_base_object (TREE_OPERAND (expr, 0));
 964
 965     case PLUS_EXPR:
 966     case MINUS_EXPR:
 967       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 968       gcc_unreachable ();
 969
 970     default:
 971       return fold_convert (ptr_type_node, expr);
 972     }
 973 }
 974
 975 /* Return true if address expression with non-DECL_P operand appears
 976    in EXPR.  */
 977
 978 static bool
 979 contain_complex_addr_expr (tree expr)
 980 {
 981   bool res = false;
 982
 983   STRIP_NOPS (expr);
 984   switch (TREE_CODE (expr))
 985     {
 986     case POINTER_PLUS_EXPR:
 987     case PLUS_EXPR:
 988     case MINUS_EXPR:
 989       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 990       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 991       break;
 992
 993     case ADDR_EXPR:
 994       return (!DECL_P (TREE_OPERAND (expr, 0)));
 995
 996     default:
 997       return false;
 998     }
 999
1000   return res;
1001 }
1002
1003 /* Allocates an induction variable with given initial value BASE and step STEP
1004    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1005
1006 static struct iv *
1007 alloc_iv (tree base, tree step, bool no_overflow = false)
1008 {
1009   tree expr = base;
1010   struct iv *iv = XCNEW (struct iv);
1011   gcc_assert (step != NULL_TREE);
1012
1013   /* Lower address expression in base except ones with DECL_P as operand.
1014      By doing this:
1015        1) More accurate cost can be computed for address expressions;
1016        2) Duplicate candidates won't be created for bases in different
1017           forms, like &a[0] and &a.  */
1018   STRIP_NOPS (expr);
1019   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1020       || contain_complex_addr_expr (expr))
1021     {
1022       aff_tree comb;
1023       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1024       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1025     }
1026
1027   iv->base = base;
1028   iv->base_object = determine_base_object (base);
1029   iv->step = step;
1030   iv->biv_p = false;
1031   iv->have_use_for = false;
1032   iv->use_id = 0;
1033   iv->ssa_name = NULL_TREE;
1034   iv->no_overflow = no_overflow;
1035
1036   return iv;
1037 }
1038
1039 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1040    doesn't overflow.  */
1041
1042 static void
1043 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1044         bool no_overflow)
1045 {
1046   struct version_info *info = name_info (data, iv);
1047
1048   gcc_assert (!info->iv);
1049
1050   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1051   info->iv = alloc_iv (base, step, no_overflow);
1052   info->iv->ssa_name = iv;
1053 }
1054
1055 /* Finds induction variable declaration for VAR.  */
1056
1057 static struct iv *
1058 get_iv (struct ivopts_data *data, tree var)
1059 {
1060   basic_block bb;
1061   tree type = TREE_TYPE (var);
1062
1063   if (!POINTER_TYPE_P (type)
1064       && !INTEGRAL_TYPE_P (type))
1065     return NULL;
1066
1067   if (!name_info (data, var)->iv)
1068     {
1069       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1070
1071       if (!bb
1072           || !flow_bb_inside_loop_p (data->current_loop, bb))
1073         set_iv (data, var, var, build_int_cst (type, 0), true);
1074     }
1075
1076   return name_info (data, var)->iv;
1077 }
1078
1079 /* Return the first non-invariant ssa var found in EXPR.  */
1080
1081 static tree
1082 extract_single_var_from_expr (tree expr)
1083 {
1084   int i, n;
1085   tree tmp;
1086   enum tree_code code;
1087
1088   if (!expr || is_gimple_min_invariant (expr))
1089     return NULL;
1090
1091   code = TREE_CODE (expr);
1092   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1093     {
1094       n = TREE_OPERAND_LENGTH (expr);
1095       for (i = 0; i < n; i++)
1096         {
1097           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1098
1099           if (tmp)
1100             return tmp;
1101         }
1102     }
1103   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1104 }
1105
1106 /* Finds basic ivs.  */
1107
1108 static bool
1109 find_bivs (struct ivopts_data *data)
1110 {
1111   gphi *phi;
1112   affine_iv iv;
1113   tree step, type, base, stop;
1114   bool found = false;
1115   struct loop *loop = data->current_loop;
1116   gphi_iterator psi;
1117
1118   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1119     {
1120       phi = psi.phi ();
1121
1122       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1123         continue;
1124
1125       if (virtual_operand_p (PHI_RESULT (phi)))
1126         continue;
1127
1128       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1129         continue;
1130
1131       if (integer_zerop (iv.step))
1132         continue;
1133
1134       step = iv.step;
1135       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1136       /* Stop expanding iv base at the first ssa var referred by iv step.
1137          Ideally we should stop at any ssa var, because that's expensive
1138          and unusual to happen, we just do it on the first one.
1139
1140          See PR64705 for the rationale.  */
1141       stop = extract_single_var_from_expr (step);
1142       base = expand_simple_operations (base, stop);
1143       if (contains_abnormal_ssa_name_p (base)
1144           || contains_abnormal_ssa_name_p (step))
1145         continue;
1146
1147       type = TREE_TYPE (PHI_RESULT (phi));
1148       base = fold_convert (type, base);
1149       if (step)
1150         {
1151           if (POINTER_TYPE_P (type))
1152             step = convert_to_ptrofftype (step);
1153           else
1154             step = fold_convert (type, step);
1155         }
1156
1157       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1158       found = true;
1159     }
1160
1161   return found;
1162 }
1163
1164 /* Marks basic ivs.  */
1165
1166 static void
1167 mark_bivs (struct ivopts_data *data)
1168 {
1169   gphi *phi;
1170   gimple def;
1171   tree var;
1172   struct iv *iv, *incr_iv;
1173   struct loop *loop = data->current_loop;
1174   basic_block incr_bb;
1175   gphi_iterator psi;
1176
1177   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1178     {
1179       phi = psi.phi ();
1180
1181       iv = get_iv (data, PHI_RESULT (phi));
1182       if (!iv)
1183         continue;
1184
1185       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1186       def = SSA_NAME_DEF_STMT (var);
1187       /* Don't mark iv peeled from other one as biv.  */
1188       if (def
1189           && gimple_code (def) == GIMPLE_PHI
1190           && gimple_bb (def) == loop->header)
1191         continue;
1192
1193       incr_iv = get_iv (data, var);
1194       if (!incr_iv)
1195         continue;
1196
1197       /* If the increment is in the subloop, ignore it.  */
1198       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1199       if (incr_bb->loop_father != data->current_loop
1200           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1201         continue;
1202
1203       iv->biv_p = true;
1204       incr_iv->biv_p = true;
1205     }
1206 }
1207
1208 /* Checks whether STMT defines a linear induction variable and stores its
1209    parameters to IV.  */
1210
1211 static bool
1212 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1213 {
1214   tree lhs, stop;
1215   struct loop *loop = data->current_loop;
1216
1217   iv->base = NULL_TREE;
1218   iv->step = NULL_TREE;
1219
1220   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1221     return false;
1222
1223   lhs = gimple_assign_lhs (stmt);
1224   if (TREE_CODE (lhs) != SSA_NAME)
1225     return false;
1226
1227   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1228     return false;
1229
1230   /* Stop expanding iv base at the first ssa var referred by iv step.
1231      Ideally we should stop at any ssa var, because that's expensive
1232      and unusual to happen, we just do it on the first one.
1233
1234      See PR64705 for the rationale.  */
1235   stop = extract_single_var_from_expr (iv->step);
1236   iv->base = expand_simple_operations (iv->base, stop);
1237   if (contains_abnormal_ssa_name_p (iv->base)
1238       || contains_abnormal_ssa_name_p (iv->step))
1239     return false;
1240
1241   /* If STMT could throw, then do not consider STMT as defining a GIV.
1242      While this will suppress optimizations, we can not safely delete this
1243      GIV and associated statements, even if it appears it is not used.  */
1244   if (stmt_could_throw_p (stmt))
1245     return false;
1246
1247   return true;
1248 }
1249
1250 /* Finds general ivs in statement STMT.  */
1251
1252 static void
1253 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1254 {
1255   affine_iv iv;
1256
1257   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1258     return;
1259
1260   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1261 }
1262
1263 /* Finds general ivs in basic block BB.  */
1264
1265 static void
1266 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1267 {
1268   gimple_stmt_iterator bsi;
1269
1270   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1271     find_givs_in_stmt (data, gsi_stmt (bsi));
1272 }
1273
1274 /* Finds general ivs.  */
1275
1276 static void
1277 find_givs (struct ivopts_data *data)
1278 {
1279   struct loop *loop = data->current_loop;
1280   basic_block *body = get_loop_body_in_dom_order (loop);
1281   unsigned i;
1282
1283   for (i = 0; i < loop->num_nodes; i++)
1284     find_givs_in_bb (data, body[i]);
1285   free (body);
1286 }
1287
1288 /* For each ssa name defined in LOOP determines whether it is an induction
1289    variable and if so, its initial value and step.  */
1290
1291 static bool
1292 find_induction_variables (struct ivopts_data *data)
1293 {
1294   unsigned i;
1295   bitmap_iterator bi;
1296
1297   if (!find_bivs (data))
1298     return false;
1299
1300   find_givs (data);
1301   mark_bivs (data);
1302
1303   if (dump_file && (dump_flags & TDF_DETAILS))
1304     {
1305       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1306
1307       if (niter)
1308         {
1309           fprintf (dump_file, "  number of iterations ");
1310           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1311           if (!integer_zerop (niter->may_be_zero))
1312             {
1313               fprintf (dump_file, "; zero if ");
1314               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1315             }
1316           fprintf (dump_file, "\n\n");
1317         };
1318
1319       fprintf (dump_file, "Induction variables:\n\n");
1320
1321       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1322         {
1323           if (ver_info (data, i)->iv)
1324             dump_iv (dump_file, ver_info (data, i)->iv, true);
1325         }
1326     }
1327
1328   return true;
1329 }
1330
1331 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1332    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1333    is the const offset stripped from IV base.  For uses of other types,
1334    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1335
1336 static struct iv_use *
1337 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1338             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1339             unsigned HOST_WIDE_INT addr_offset = 0)
1340 {
1341   struct iv_use *use = XCNEW (struct iv_use);
1342
1343   use->id = n_iv_uses (data);
1344   use->sub_id = 0;
1345   use->type = use_type;
1346   use->iv = iv;
1347   use->stmt = stmt;
1348   use->op_p = use_p;
1349   use->related_cands = BITMAP_ALLOC (NULL);
1350   use->next = NULL;
1351   use->addr_base = addr_base;
1352   use->addr_offset = addr_offset;
1353
1354   data->iv_uses.safe_push (use);
1355
1356   return use;
1357 }
1358
1359 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360    The sub use is recorded under the one whose use id is ID_GROUP.  */
1361
1362 static struct iv_use *
1363 record_sub_use (struct ivopts_data *data, tree *use_p,
1364                     struct iv *iv, gimple stmt, enum use_type use_type,
1365                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1366                     unsigned int id_group)
1367 {
1368   struct iv_use *use = XCNEW (struct iv_use);
1369   struct iv_use *group = iv_use (data, id_group);
1370
1371   use->id = group->id;
1372   use->sub_id = 0;
1373   use->type = use_type;
1374   use->iv = iv;
1375   use->stmt = stmt;
1376   use->op_p = use_p;
1377   use->related_cands = NULL;
1378   use->addr_base = addr_base;
1379   use->addr_offset = addr_offset;
1380
1381   /* Sub use list is maintained in offset ascending order.  */
1382   if (addr_offset <= group->addr_offset)
1383     {
1384       use->related_cands = group->related_cands;
1385       group->related_cands = NULL;
1386       use->next = group;
1387       data->iv_uses[id_group] = use;
1388     }
1389   else
1390     {
1391       struct iv_use *pre;
1392       do
1393         {
1394           pre = group;
1395           group = group->next;
1396         }
1397       while (group && addr_offset > group->addr_offset);
1398       use->next = pre->next;
1399       pre->next = use;
1400     }
1401
1402   /* To avoid showing ssa name in the dumps, if it was not reset by the
1403      caller.  */
1404   iv->ssa_name = NULL_TREE;
1405
1406   return use;
1407 }
1408
1409 /* Checks whether OP is a loop-level invariant and if so, records it.
1410    NONLINEAR_USE is true if the invariant is used in a way we do not
1411    handle specially.  */
1412
1413 static void
1414 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1415 {
1416   basic_block bb;
1417   struct version_info *info;
1418
1419   if (TREE_CODE (op) != SSA_NAME
1420       || virtual_operand_p (op))
1421     return;
1422
1423   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1424   if (bb
1425       && flow_bb_inside_loop_p (data->current_loop, bb))
1426     return;
1427
1428   info = name_info (data, op);
1429   info->name = op;
1430   info->has_nonlin_use |= nonlinear_use;
1431   if (!info->inv_id)
1432     info->inv_id = ++data->max_inv_id;
1433   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1434 }
1435
1436 /* Checks whether the use OP is interesting and if so, records it.  */
1437
1438 static struct iv_use *
1439 find_interesting_uses_op (struct ivopts_data *data, tree op)
1440 {
1441   struct iv *iv;
1442   struct iv *civ;
1443   gimple stmt;
1444   struct iv_use *use;
1445
1446   if (TREE_CODE (op) != SSA_NAME)
1447     return NULL;
1448
1449   iv = get_iv (data, op);
1450   if (!iv)
1451     return NULL;
1452
1453   if (iv->have_use_for)
1454     {
1455       use = iv_use (data, iv->use_id);
1456
1457       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1458       return use;
1459     }
1460
1461   if (integer_zerop (iv->step))
1462     {
1463       record_invariant (data, op, true);
1464       return NULL;
1465     }
1466   iv->have_use_for = true;
1467
1468   civ = XNEW (struct iv);
1469   *civ = *iv;
1470
1471   stmt = SSA_NAME_DEF_STMT (op);
1472   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1473               || is_gimple_assign (stmt));
1474
1475   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1476   iv->use_id = use->id;
1477
1478   return use;
1479 }
1480
1481 /* Given a condition in statement STMT, checks whether it is a compare
1482    of an induction variable and an invariant.  If this is the case,
1483    CONTROL_VAR is set to location of the iv, BOUND to the location of
1484    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1485    induction variable descriptions, and true is returned.  If this is not
1486    the case, CONTROL_VAR and BOUND are set to the arguments of the
1487    condition and false is returned.  */
1488
1489 static bool
1490 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1491                        tree **control_var, tree **bound,
1492                        struct iv **iv_var, struct iv **iv_bound)
1493 {
1494   /* The objects returned when COND has constant operands.  */
1495   static struct iv const_iv;
1496   static tree zero;
1497   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1498   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1499   bool ret = false;
1500
1501   if (gimple_code (stmt) == GIMPLE_COND)
1502     {
1503       gcond *cond_stmt = as_a <gcond *> (stmt);
1504       op0 = gimple_cond_lhs_ptr (cond_stmt);
1505       op1 = gimple_cond_rhs_ptr (cond_stmt);
1506     }
1507   else
1508     {
1509       op0 = gimple_assign_rhs1_ptr (stmt);
1510       op1 = gimple_assign_rhs2_ptr (stmt);
1511     }
1512
1513   zero = integer_zero_node;
1514   const_iv.step = integer_zero_node;
1515
1516   if (TREE_CODE (*op0) == SSA_NAME)
1517     iv0 = get_iv (data, *op0);
1518   if (TREE_CODE (*op1) == SSA_NAME)
1519     iv1 = get_iv (data, *op1);
1520
1521   /* Exactly one of the compared values must be an iv, and the other one must
1522      be an invariant.  */
1523   if (!iv0 || !iv1)
1524     goto end;
1525
1526   if (integer_zerop (iv0->step))
1527     {
1528       /* Control variable may be on the other side.  */
1529       tmp_op = op0; op0 = op1; op1 = tmp_op;
1530       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1531     }
1532   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1533
1534 end:
1535   if (control_var)
1536     *control_var = op0;
1537   if (iv_var)
1538     *iv_var = iv0;
1539   if (bound)
1540     *bound = op1;
1541   if (iv_bound)
1542     *iv_bound = iv1;
1543
1544   return ret;
1545 }
1546
1547 /* Checks whether the condition in STMT is interesting and if so,
1548    records it.  */
1549
1550 static void
1551 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1552 {
1553   tree *var_p, *bound_p;
1554   struct iv *var_iv, *civ;
1555
1556   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1557     {
1558       find_interesting_uses_op (data, *var_p);
1559       find_interesting_uses_op (data, *bound_p);
1560       return;
1561     }
1562
1563   civ = XNEW (struct iv);
1564   *civ = *var_iv;
1565   record_use (data, NULL, civ, stmt, USE_COMPARE);
1566 }
1567
1568 /* Returns the outermost loop EXPR is obviously invariant in
1569    relative to the loop LOOP, i.e. if all its operands are defined
1570    outside of the returned loop.  Returns NULL if EXPR is not
1571    even obviously invariant in LOOP.  */
1572
1573 struct loop *
1574 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1575 {
1576   basic_block def_bb;
1577   unsigned i, len;
1578
1579   if (is_gimple_min_invariant (expr))
1580     return current_loops->tree_root;
1581
1582   if (TREE_CODE (expr) == SSA_NAME)
1583     {
1584       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1585       if (def_bb)
1586         {
1587           if (flow_bb_inside_loop_p (loop, def_bb))
1588             return NULL;
1589           return superloop_at_depth (loop,
1590                                      loop_depth (def_bb->loop_father) + 1);
1591         }
1592
1593       return current_loops->tree_root;
1594     }
1595
1596   if (!EXPR_P (expr))
1597     return NULL;
1598
1599   unsigned maxdepth = 0;
1600   len = TREE_OPERAND_LENGTH (expr);
1601   for (i = 0; i < len; i++)
1602     {
1603       struct loop *ivloop;
1604       if (!TREE_OPERAND (expr, i))
1605         continue;
1606
1607       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1608       if (!ivloop)
1609         return NULL;
1610       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1611     }
1612
1613   return superloop_at_depth (loop, maxdepth);
1614 }
1615
1616 /* Returns true if expression EXPR is obviously invariant in LOOP,
1617    i.e. if all its operands are defined outside of the LOOP.  LOOP
1618    should not be the function body.  */
1619
1620 bool
1621 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1622 {
1623   basic_block def_bb;
1624   unsigned i, len;
1625
1626   gcc_assert (loop_depth (loop) > 0);
1627
1628   if (is_gimple_min_invariant (expr))
1629     return true;
1630
1631   if (TREE_CODE (expr) == SSA_NAME)
1632     {
1633       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1634       if (def_bb
1635           && flow_bb_inside_loop_p (loop, def_bb))
1636         return false;
1637
1638       return true;
1639     }
1640
1641   if (!EXPR_P (expr))
1642     return false;
1643
1644   len = TREE_OPERAND_LENGTH (expr);
1645   for (i = 0; i < len; i++)
1646     if (TREE_OPERAND (expr, i)
1647         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1648       return false;
1649
1650   return true;
1651 }
1652
1653 /* Cumulates the steps of indices into DATA and replaces their values with the
1654    initial ones.  Returns false when the value of the index cannot be determined.
1655    Callback for for_each_index.  */
1656
1657 struct ifs_ivopts_data
1658 {
1659   struct ivopts_data *ivopts_data;
1660   gimple stmt;
1661   tree step;
1662 };
1663
1664 static bool
1665 idx_find_step (tree base, tree *idx, void *data)
1666 {
1667   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1668   struct iv *iv;
1669   bool use_overflow_semantics = false;
1670   tree step, iv_base, iv_step, lbound, off;
1671   struct loop *loop = dta->ivopts_data->current_loop;
1672
1673   /* If base is a component ref, require that the offset of the reference
1674      be invariant.  */
1675   if (TREE_CODE (base) == COMPONENT_REF)
1676     {
1677       off = component_ref_field_offset (base);
1678       return expr_invariant_in_loop_p (loop, off);
1679     }
1680
1681   /* If base is array, first check whether we will be able to move the
1682      reference out of the loop (in order to take its address in strength
1683      reduction).  In order for this to work we need both lower bound
1684      and step to be loop invariants.  */
1685   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1686     {
1687       /* Moreover, for a range, the size needs to be invariant as well.  */
1688       if (TREE_CODE (base) == ARRAY_RANGE_REF
1689           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1690         return false;
1691
1692       step = array_ref_element_size (base);
1693       lbound = array_ref_low_bound (base);
1694
1695       if (!expr_invariant_in_loop_p (loop, step)
1696           || !expr_invariant_in_loop_p (loop, lbound))
1697         return false;
1698     }
1699
1700   if (TREE_CODE (*idx) != SSA_NAME)
1701     return true;
1702
1703   iv = get_iv (dta->ivopts_data, *idx);
1704   if (!iv)
1705     return false;
1706
1707   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1708           *&x[0], which is not folded and does not trigger the
1709           ARRAY_REF path below.  */
1710   *idx = iv->base;
1711
1712   if (integer_zerop (iv->step))
1713     return true;
1714
1715   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1716     {
1717       step = array_ref_element_size (base);
1718
1719       /* We only handle addresses whose step is an integer constant.  */
1720       if (TREE_CODE (step) != INTEGER_CST)
1721         return false;
1722     }
1723   else
1724     /* The step for pointer arithmetics already is 1 byte.  */
1725     step = size_one_node;
1726
1727   iv_base = iv->base;
1728   iv_step = iv->step;
1729   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1730     use_overflow_semantics = true;
1731
1732   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1733                             sizetype, &iv_base, &iv_step, dta->stmt,
1734                             use_overflow_semantics))
1735     {
1736       /* The index might wrap.  */
1737       return false;
1738     }
1739
1740   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1741   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1742
1743   return true;
1744 }
1745
1746 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1747    object is passed to it in DATA.  */
1748
1749 static bool
1750 idx_record_use (tree base, tree *idx,
1751                 void *vdata)
1752 {
1753   struct ivopts_data *data = (struct ivopts_data *) vdata;
1754   find_interesting_uses_op (data, *idx);
1755   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1756     {
1757       find_interesting_uses_op (data, array_ref_element_size (base));
1758       find_interesting_uses_op (data, array_ref_low_bound (base));
1759     }
1760   return true;
1761 }
1762
1763 /* If we can prove that TOP = cst * BOT for some constant cst,
1764    store cst to MUL and return true.  Otherwise return false.
1765    The returned value is always sign-extended, regardless of the
1766    signedness of TOP and BOT.  */
1767
1768 static bool
1769 constant_multiple_of (tree top, tree bot, widest_int *mul)
1770 {
1771   tree mby;
1772   enum tree_code code;
1773   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1774   widest_int res, p0, p1;
1775
1776   STRIP_NOPS (top);
1777   STRIP_NOPS (bot);
1778
1779   if (operand_equal_p (top, bot, 0))
1780     {
1781       *mul = 1;
1782       return true;
1783     }
1784
1785   code = TREE_CODE (top);
1786   switch (code)
1787     {
1788     case MULT_EXPR:
1789       mby = TREE_OPERAND (top, 1);
1790       if (TREE_CODE (mby) != INTEGER_CST)
1791         return false;
1792
1793       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1794         return false;
1795
1796       *mul = wi::sext (res * wi::to_widest (mby), precision);
1797       return true;
1798
1799     case PLUS_EXPR:
1800     case MINUS_EXPR:
1801       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1802           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1803         return false;
1804
1805       if (code == MINUS_EXPR)
1806         p1 = -p1;
1807       *mul = wi::sext (p0 + p1, precision);
1808       return true;
1809
1810     case INTEGER_CST:
1811       if (TREE_CODE (bot) != INTEGER_CST)
1812         return false;
1813
1814       p0 = widest_int::from (top, SIGNED);
1815       p1 = widest_int::from (bot, SIGNED);
1816       if (p1 == 0)
1817         return false;
1818       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1819       return res == 0;
1820
1821     default:
1822       return false;
1823     }
1824 }
1825
1826 /* Return true if memory reference REF with step STEP may be unaligned.  */
1827
1828 static bool
1829 may_be_unaligned_p (tree ref, tree step)
1830 {
1831   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1832      thus they are not misaligned.  */
1833   if (TREE_CODE (ref) == TARGET_MEM_REF)
1834     return false;
1835
1836   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1837   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1838     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1839
1840   unsigned HOST_WIDE_INT bitpos;
1841   unsigned int ref_align;
1842   get_object_alignment_1 (ref, &ref_align, &bitpos);
1843   if (ref_align < align
1844       || (bitpos % align) != 0
1845       || (bitpos % BITS_PER_UNIT) != 0)
1846     return true;
1847
1848   unsigned int trailing_zeros = tree_ctz (step);
1849   if (trailing_zeros < HOST_BITS_PER_INT
1850       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1851     return true;
1852
1853   return false;
1854 }
1855
1856 /* Return true if EXPR may be non-addressable.   */
1857
1858 bool
1859 may_be_nonaddressable_p (tree expr)
1860 {
1861   switch (TREE_CODE (expr))
1862     {
1863     case TARGET_MEM_REF:
1864       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1865          target, thus they are always addressable.  */
1866       return false;
1867
1868     case COMPONENT_REF:
1869       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1870              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1871
1872     case VIEW_CONVERT_EXPR:
1873       /* This kind of view-conversions may wrap non-addressable objects
1874          and make them look addressable.  After some processing the
1875          non-addressability may be uncovered again, causing ADDR_EXPRs
1876          of inappropriate objects to be built.  */
1877       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1878           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1879         return true;
1880
1881       /* ... fall through ... */
1882
1883     case ARRAY_REF:
1884     case ARRAY_RANGE_REF:
1885       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1886
1887     CASE_CONVERT:
1888       return true;
1889
1890     default:
1891       break;
1892     }
1893
1894   return false;
1895 }
1896
1897 static tree
1898 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1899
1900 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1901    If there is an existing use which has same stripped iv base and step,
1902    this function records this one as a sub use to that; otherwise records
1903    it as a normal one.  */
1904
1905 static struct iv_use *
1906 record_group_use (struct ivopts_data *data, tree *use_p,
1907                   struct iv *iv, gimple stmt, enum use_type use_type)
1908 {
1909   unsigned int i;
1910   struct iv_use *use;
1911   tree addr_base;
1912   unsigned HOST_WIDE_INT addr_offset;
1913
1914   /* Only support sub use for address type uses, that is, with base
1915      object.  */
1916   if (!iv->base_object)
1917     return record_use (data, use_p, iv, stmt, use_type);
1918
1919   addr_base = strip_offset (iv->base, &addr_offset);
1920   for (i = 0; i < n_iv_uses (data); i++)
1921     {
1922       use = iv_use (data, i);
1923       if (use->type != USE_ADDRESS || !use->iv->base_object)
1924         continue;
1925
1926       /* Check if it has the same stripped base and step.  */
1927       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1928           && operand_equal_p (iv->step, use->iv->step, 0)
1929           && operand_equal_p (addr_base, use->addr_base, 0))
1930         break;
1931     }
1932
1933   if (i == n_iv_uses (data))
1934     return record_use (data, use_p, iv, stmt,
1935                        use_type, addr_base, addr_offset);
1936   else
1937     return record_sub_use (data, use_p, iv, stmt,
1938                            use_type, addr_base, addr_offset, i);
1939 }
1940
1941 /* Finds addresses in *OP_P inside STMT.  */
1942
1943 static void
1944 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1945 {
1946   tree base = *op_p, step = size_zero_node;
1947   struct iv *civ;
1948   struct ifs_ivopts_data ifs_ivopts_data;
1949
1950   /* Do not play with volatile memory references.  A bit too conservative,
1951      perhaps, but safe.  */
1952   if (gimple_has_volatile_ops (stmt))
1953     goto fail;
1954
1955   /* Ignore bitfields for now.  Not really something terribly complicated
1956      to handle.  TODO.  */
1957   if (TREE_CODE (base) == BIT_FIELD_REF)
1958     goto fail;
1959
1960   base = unshare_expr (base);
1961
1962   if (TREE_CODE (base) == TARGET_MEM_REF)
1963     {
1964       tree type = build_pointer_type (TREE_TYPE (base));
1965       tree astep;
1966
1967       if (TMR_BASE (base)
1968           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1969         {
1970           civ = get_iv (data, TMR_BASE (base));
1971           if (!civ)
1972             goto fail;
1973
1974           TMR_BASE (base) = civ->base;
1975           step = civ->step;
1976         }
1977       if (TMR_INDEX2 (base)
1978           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1979         {
1980           civ = get_iv (data, TMR_INDEX2 (base));
1981           if (!civ)
1982             goto fail;
1983
1984           TMR_INDEX2 (base) = civ->base;
1985           step = civ->step;
1986         }
1987       if (TMR_INDEX (base)
1988           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1989         {
1990           civ = get_iv (data, TMR_INDEX (base));
1991           if (!civ)
1992             goto fail;
1993
1994           TMR_INDEX (base) = civ->base;
1995           astep = civ->step;
1996
1997           if (astep)
1998             {
1999               if (TMR_STEP (base))
2000                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2001
2002               step = fold_build2 (PLUS_EXPR, type, step, astep);
2003             }
2004         }
2005
2006       if (integer_zerop (step))
2007         goto fail;
2008       base = tree_mem_ref_addr (type, base);
2009     }
2010   else
2011     {
2012       ifs_ivopts_data.ivopts_data = data;
2013       ifs_ivopts_data.stmt = stmt;
2014       ifs_ivopts_data.step = size_zero_node;
2015       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2016           || integer_zerop (ifs_ivopts_data.step))
2017         goto fail;
2018       step = ifs_ivopts_data.step;
2019
2020       /* Check that the base expression is addressable.  This needs
2021          to be done after substituting bases of IVs into it.  */
2022       if (may_be_nonaddressable_p (base))
2023         goto fail;
2024
2025       /* Moreover, on strict alignment platforms, check that it is
2026          sufficiently aligned.  */
2027       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2028         goto fail;
2029
2030       base = build_fold_addr_expr (base);
2031
2032       /* Substituting bases of IVs into the base expression might
2033          have caused folding opportunities.  */
2034       if (TREE_CODE (base) == ADDR_EXPR)
2035         {
2036           tree *ref = &TREE_OPERAND (base, 0);
2037           while (handled_component_p (*ref))
2038             ref = &TREE_OPERAND (*ref, 0);
2039           if (TREE_CODE (*ref) == MEM_REF)
2040             {
2041               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2042                                       TREE_OPERAND (*ref, 0),
2043                                       TREE_OPERAND (*ref, 1));
2044               if (tem)
2045                 *ref = tem;
2046             }
2047         }
2048     }
2049
2050   civ = alloc_iv (base, step);
2051   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2052   return;
2053
2054 fail:
2055   for_each_index (op_p, idx_record_use, data);
2056 }
2057
2058 /* Finds and records invariants used in STMT.  */
2059
2060 static void
2061 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2062 {
2063   ssa_op_iter iter;
2064   use_operand_p use_p;
2065   tree op;
2066
2067   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2068     {
2069       op = USE_FROM_PTR (use_p);
2070       record_invariant (data, op, false);
2071     }
2072 }
2073
2074 /* Finds interesting uses of induction variables in the statement STMT.  */
2075
2076 static void
2077 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2078 {
2079   struct iv *iv;
2080   tree op, *lhs, *rhs;
2081   ssa_op_iter iter;
2082   use_operand_p use_p;
2083   enum tree_code code;
2084
2085   find_invariants_stmt (data, stmt);
2086
2087   if (gimple_code (stmt) == GIMPLE_COND)
2088     {
2089       find_interesting_uses_cond (data, stmt);
2090       return;
2091     }
2092
2093   if (is_gimple_assign (stmt))
2094     {
2095       lhs = gimple_assign_lhs_ptr (stmt);
2096       rhs = gimple_assign_rhs1_ptr (stmt);
2097
2098       if (TREE_CODE (*lhs) == SSA_NAME)
2099         {
2100           /* If the statement defines an induction variable, the uses are not
2101              interesting by themselves.  */
2102
2103           iv = get_iv (data, *lhs);
2104
2105           if (iv && !integer_zerop (iv->step))
2106             return;
2107         }
2108
2109       code = gimple_assign_rhs_code (stmt);
2110       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2111           && (REFERENCE_CLASS_P (*rhs)
2112               || is_gimple_val (*rhs)))
2113         {
2114           if (REFERENCE_CLASS_P (*rhs))
2115             find_interesting_uses_address (data, stmt, rhs);
2116           else
2117             find_interesting_uses_op (data, *rhs);
2118
2119           if (REFERENCE_CLASS_P (*lhs))
2120             find_interesting_uses_address (data, stmt, lhs);
2121           return;
2122         }
2123       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2124         {
2125           find_interesting_uses_cond (data, stmt);
2126           return;
2127         }
2128
2129       /* TODO -- we should also handle address uses of type
2130
2131          memory = call (whatever);
2132
2133          and
2134
2135          call (memory).  */
2136     }
2137
2138   if (gimple_code (stmt) == GIMPLE_PHI
2139       && gimple_bb (stmt) == data->current_loop->header)
2140     {
2141       iv = get_iv (data, PHI_RESULT (stmt));
2142
2143       if (iv && !integer_zerop (iv->step))
2144         return;
2145     }
2146
2147   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2148     {
2149       op = USE_FROM_PTR (use_p);
2150
2151       if (TREE_CODE (op) != SSA_NAME)
2152         continue;
2153
2154       iv = get_iv (data, op);
2155       if (!iv)
2156         continue;
2157
2158       find_interesting_uses_op (data, op);
2159     }
2160 }
2161
2162 /* Finds interesting uses of induction variables outside of loops
2163    on loop exit edge EXIT.  */
2164
2165 static void
2166 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2167 {
2168   gphi *phi;
2169   gphi_iterator psi;
2170   tree def;
2171
2172   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2173     {
2174       phi = psi.phi ();
2175       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2176       if (!virtual_operand_p (def))
2177         find_interesting_uses_op (data, def);
2178     }
2179 }
2180
2181 /* Finds uses of the induction variables that are interesting.  */
2182
2183 static void
2184 find_interesting_uses (struct ivopts_data *data)
2185 {
2186   basic_block bb;
2187   gimple_stmt_iterator bsi;
2188   basic_block *body = get_loop_body (data->current_loop);
2189   unsigned i;
2190   struct version_info *info;
2191   edge e;
2192
2193   if (dump_file && (dump_flags & TDF_DETAILS))
2194     fprintf (dump_file, "Uses:\n\n");
2195
2196   for (i = 0; i < data->current_loop->num_nodes; i++)
2197     {
2198       edge_iterator ei;
2199       bb = body[i];
2200
2201       FOR_EACH_EDGE (e, ei, bb->succs)
2202         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2203             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2204           find_interesting_uses_outside (data, e);
2205
2206       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2207         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2208       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2209         if (!is_gimple_debug (gsi_stmt (bsi)))
2210           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2211     }
2212
2213   if (dump_file && (dump_flags & TDF_DETAILS))
2214     {
2215       bitmap_iterator bi;
2216
2217       fprintf (dump_file, "\n");
2218
2219       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2220         {
2221           info = ver_info (data, i);
2222           if (info->inv_id)
2223             {
2224               fprintf (dump_file, "  ");
2225               print_generic_expr (dump_file, info->name, TDF_SLIM);
2226               fprintf (dump_file, " is invariant (%d)%s\n",
2227                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2228             }
2229         }
2230
2231       fprintf (dump_file, "\n");
2232     }
2233
2234   free (body);
2235 }
2236
2237 /* Compute maximum offset of [base + offset] addressing mode
2238    for memory reference represented by USE.  */
2239
2240 static HOST_WIDE_INT
2241 compute_max_addr_offset (struct iv_use *use)
2242 {
2243   int width;
2244   rtx reg, addr;
2245   HOST_WIDE_INT i, off;
2246   unsigned list_index, num;
2247   addr_space_t as;
2248   machine_mode mem_mode, addr_mode;
2249   static vec<HOST_WIDE_INT> max_offset_list;
2250
2251   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2252   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2253
2254   num = max_offset_list.length ();
2255   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2256   if (list_index >= num)
2257     {
2258       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2259       for (; num < max_offset_list.length (); num++)
2260         max_offset_list[num] = -1;
2261     }
2262
2263   off = max_offset_list[list_index];
2264   if (off != -1)
2265     return off;
2266
2267   addr_mode = targetm.addr_space.address_mode (as);
2268   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2269   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2270
2271   width = GET_MODE_BITSIZE (addr_mode) - 1;
2272   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2273     width = HOST_BITS_PER_WIDE_INT - 1;
2274
2275   for (i = width; i > 0; i--)
2276     {
2277       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2278       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2279       if (memory_address_addr_space_p (mem_mode, addr, as))
2280         break;
2281
2282       /* For some strict-alignment targets, the offset must be naturally
2283          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2284       off = ((unsigned HOST_WIDE_INT) 1 << i);
2285       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2286         {
2287           off -= GET_MODE_SIZE (mem_mode);
2288           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2289           if (memory_address_addr_space_p (mem_mode, addr, as))
2290             break;
2291         }
2292     }
2293   if (i == 0)
2294     off = 0;
2295
2296   max_offset_list[list_index] = off;
2297   return off;
2298 }
2299
2300 /* Check if all small groups should be split.  Return true if and
2301    only if:
2302
2303      1) At least one groups contain two uses with different offsets.
2304      2) No group contains more than two uses with different offsets.
2305
2306    Return false otherwise.  We want to split such groups because:
2307
2308      1) Small groups don't have much benefit and may interfer with
2309         general candidate selection.
2310      2) Size for problem with only small groups is usually small and
2311         general algorithm can handle it well.
2312
2313    TODO -- Above claim may not hold when auto increment is supported.  */
2314
2315 static bool
2316 split_all_small_groups (struct ivopts_data *data)
2317 {
2318   bool split_p = false;
2319   unsigned int i, n, distinct;
2320   struct iv_use *pre, *use;
2321
2322   n = n_iv_uses (data);
2323   for (i = 0; i < n; i++)
2324     {
2325       use = iv_use (data, i);
2326       if (!use->next)
2327         continue;
2328
2329       distinct = 1;
2330       gcc_assert (use->type == USE_ADDRESS);
2331       for (pre = use, use = use->next; use; pre = use, use = use->next)
2332         {
2333           if (pre->addr_offset != use->addr_offset)
2334             distinct++;
2335
2336           if (distinct > 2)
2337             return false;
2338         }
2339       if (distinct == 2)
2340         split_p = true;
2341     }
2342
2343   return split_p;
2344 }
2345
2346 /* For each group of address type uses, this function further groups
2347    these uses according to the maximum offset supported by target's
2348    [base + offset] addressing mode.  */
2349
2350 static void
2351 group_address_uses (struct ivopts_data *data)
2352 {
2353   HOST_WIDE_INT max_offset = -1;
2354   unsigned int i, n, sub_id;
2355   struct iv_use *pre, *use;
2356   unsigned HOST_WIDE_INT addr_offset_first;
2357
2358   /* Reset max offset to split all small groups.  */
2359   if (split_all_small_groups (data))
2360     max_offset = 0;
2361
2362   n = n_iv_uses (data);
2363   for (i = 0; i < n; i++)
2364     {
2365       use = iv_use (data, i);
2366       if (!use->next)
2367         continue;
2368
2369       gcc_assert (use->type == USE_ADDRESS);
2370       if (max_offset != 0)
2371         max_offset = compute_max_addr_offset (use);
2372
2373       while (use)
2374         {
2375           sub_id = 0;
2376           addr_offset_first = use->addr_offset;
2377           /* Only uses with offset that can fit in offset part against
2378              the first use can be grouped together.  */
2379           for (pre = use, use = use->next;
2380                use && (use->addr_offset - addr_offset_first
2381                        <= (unsigned HOST_WIDE_INT) max_offset);
2382                pre = use, use = use->next)
2383             {
2384               use->id = pre->id;
2385               use->sub_id = ++sub_id;
2386             }
2387
2388           /* Break the list and create new group.  */
2389           if (use)
2390             {
2391               pre->next = NULL;
2392               use->id = n_iv_uses (data);
2393               use->related_cands = BITMAP_ALLOC (NULL);
2394               data->iv_uses.safe_push (use);
2395             }
2396         }
2397     }
2398
2399   if (dump_file && (dump_flags & TDF_DETAILS))
2400     dump_uses (dump_file, data);
2401 }
2402
2403 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2404    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2405    we are at the top-level of the processed address.  */
2406
2407 static tree
2408 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2409                 HOST_WIDE_INT *offset)
2410 {
2411   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2412   enum tree_code code;
2413   tree type, orig_type = TREE_TYPE (expr);
2414   HOST_WIDE_INT off0, off1, st;
2415   tree orig_expr = expr;
2416
2417   STRIP_NOPS (expr);
2418
2419   type = TREE_TYPE (expr);
2420   code = TREE_CODE (expr);
2421   *offset = 0;
2422
2423   switch (code)
2424     {
2425     case INTEGER_CST:
2426       if (!cst_and_fits_in_hwi (expr)
2427           || integer_zerop (expr))
2428         return orig_expr;
2429
2430       *offset = int_cst_value (expr);
2431       return build_int_cst (orig_type, 0);
2432
2433     case POINTER_PLUS_EXPR:
2434     case PLUS_EXPR:
2435     case MINUS_EXPR:
2436       op0 = TREE_OPERAND (expr, 0);
2437       op1 = TREE_OPERAND (expr, 1);
2438
2439       op0 = strip_offset_1 (op0, false, false, &off0);
2440       op1 = strip_offset_1 (op1, false, false, &off1);
2441
2442       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2443       if (op0 == TREE_OPERAND (expr, 0)
2444           && op1 == TREE_OPERAND (expr, 1))
2445         return orig_expr;
2446
2447       if (integer_zerop (op1))
2448         expr = op0;
2449       else if (integer_zerop (op0))
2450         {
2451           if (code == MINUS_EXPR)
2452             expr = fold_build1 (NEGATE_EXPR, type, op1);
2453           else
2454             expr = op1;
2455         }
2456       else
2457         expr = fold_build2 (code, type, op0, op1);
2458
2459       return fold_convert (orig_type, expr);
2460
2461     case MULT_EXPR:
2462       op1 = TREE_OPERAND (expr, 1);
2463       if (!cst_and_fits_in_hwi (op1))
2464         return orig_expr;
2465
2466       op0 = TREE_OPERAND (expr, 0);
2467       op0 = strip_offset_1 (op0, false, false, &off0);
2468       if (op0 == TREE_OPERAND (expr, 0))
2469         return orig_expr;
2470
2471       *offset = off0 * int_cst_value (op1);
2472       if (integer_zerop (op0))
2473         expr = op0;
2474       else
2475         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2476
2477       return fold_convert (orig_type, expr);
2478
2479     case ARRAY_REF:
2480     case ARRAY_RANGE_REF:
2481       if (!inside_addr)
2482         return orig_expr;
2483
2484       step = array_ref_element_size (expr);
2485       if (!cst_and_fits_in_hwi (step))
2486         break;
2487
2488       st = int_cst_value (step);
2489       op1 = TREE_OPERAND (expr, 1);
2490       op1 = strip_offset_1 (op1, false, false, &off1);
2491       *offset = off1 * st;
2492
2493       if (top_compref
2494           && integer_zerop (op1))
2495         {
2496           /* Strip the component reference completely.  */
2497           op0 = TREE_OPERAND (expr, 0);
2498           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2499           *offset += off0;
2500           return op0;
2501         }
2502       break;
2503
2504     case COMPONENT_REF:
2505       {
2506         tree field;
2507
2508         if (!inside_addr)
2509           return orig_expr;
2510
2511         tmp = component_ref_field_offset (expr);
2512         field = TREE_OPERAND (expr, 1);
2513         if (top_compref
2514             && cst_and_fits_in_hwi (tmp)
2515             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2516           {
2517             HOST_WIDE_INT boffset, abs_off;
2518
2519             /* Strip the component reference completely.  */
2520             op0 = TREE_OPERAND (expr, 0);
2521             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2522             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2523             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2524             if (boffset < 0)
2525               abs_off = -abs_off;
2526
2527             *offset = off0 + int_cst_value (tmp) + abs_off;
2528             return op0;
2529           }
2530       }
2531       break;
2532
2533     case ADDR_EXPR:
2534       op0 = TREE_OPERAND (expr, 0);
2535       op0 = strip_offset_1 (op0, true, true, &off0);
2536       *offset += off0;
2537
2538       if (op0 == TREE_OPERAND (expr, 0))
2539         return orig_expr;
2540
2541       expr = build_fold_addr_expr (op0);
2542       return fold_convert (orig_type, expr);
2543
2544     case MEM_REF:
2545       /* ???  Offset operand?  */
2546       inside_addr = false;
2547       break;
2548
2549     default:
2550       return orig_expr;
2551     }
2552
2553   /* Default handling of expressions for that we want to recurse into
2554      the first operand.  */
2555   op0 = TREE_OPERAND (expr, 0);
2556   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2557   *offset += off0;
2558
2559   if (op0 == TREE_OPERAND (expr, 0)
2560       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2561     return orig_expr;
2562
2563   expr = copy_node (expr);
2564   TREE_OPERAND (expr, 0) = op0;
2565   if (op1)
2566     TREE_OPERAND (expr, 1) = op1;
2567
2568   /* Inside address, we might strip the top level component references,
2569      thus changing type of the expression.  Handling of ADDR_EXPR
2570      will fix that.  */
2571   expr = fold_convert (orig_type, expr);
2572
2573   return expr;
2574 }
2575
2576 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2577
2578 static tree
2579 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2580 {
2581   HOST_WIDE_INT off;
2582   tree core = strip_offset_1 (expr, false, false, &off);
2583   *offset = off;
2584   return core;
2585 }
2586
2587 /* Returns variant of TYPE that can be used as base for different uses.
2588    We return unsigned type with the same precision, which avoids problems
2589    with overflows.  */
2590
2591 static tree
2592 generic_type_for (tree type)
2593 {
2594   if (POINTER_TYPE_P (type))
2595     return unsigned_type_for (type);
2596
2597   if (TYPE_UNSIGNED (type))
2598     return type;
2599
2600   return unsigned_type_for (type);
2601 }
2602
2603 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2604    the bitmap to that we should store it.  */
2605
2606 static struct ivopts_data *fd_ivopts_data;
2607 static tree
2608 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2609 {
2610   bitmap *depends_on = (bitmap *) data;
2611   struct version_info *info;
2612
2613   if (TREE_CODE (*expr_p) != SSA_NAME)
2614     return NULL_TREE;
2615   info = name_info (fd_ivopts_data, *expr_p);
2616
2617   if (!info->inv_id || info->has_nonlin_use)
2618     return NULL_TREE;
2619
2620   if (!*depends_on)
2621     *depends_on = BITMAP_ALLOC (NULL);
2622   bitmap_set_bit (*depends_on, info->inv_id);
2623
2624   return NULL_TREE;
2625 }
2626
2627 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2628    position to POS.  If USE is not NULL, the candidate is set as related to
2629    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2630    replacement of the final value of the iv by a direct computation.  */
2631
2632 static struct iv_cand *
2633 add_candidate_1 (struct ivopts_data *data,
2634                  tree base, tree step, bool important, enum iv_position pos,
2635                  struct iv_use *use, gimple incremented_at)
2636 {
2637   unsigned i;
2638   struct iv_cand *cand = NULL;
2639   tree type, orig_type;
2640
2641   /* For non-original variables, make sure their values are computed in a type
2642      that does not invoke undefined behavior on overflows (since in general,
2643      we cannot prove that these induction variables are non-wrapping).  */
2644   if (pos != IP_ORIGINAL)
2645     {
2646       orig_type = TREE_TYPE (base);
2647       type = generic_type_for (orig_type);
2648       if (type != orig_type)
2649         {
2650           base = fold_convert (type, base);
2651           step = fold_convert (type, step);
2652         }
2653     }
2654
2655   for (i = 0; i < n_iv_cands (data); i++)
2656     {
2657       cand = iv_cand (data, i);
2658
2659       if (cand->pos != pos)
2660         continue;
2661
2662       if (cand->incremented_at != incremented_at
2663           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2664               && cand->ainc_use != use))
2665         continue;
2666
2667       if (!cand->iv)
2668         {
2669           if (!base && !step)
2670             break;
2671
2672           continue;
2673         }
2674
2675       if (!base && !step)
2676         continue;
2677
2678       if (operand_equal_p (base, cand->iv->base, 0)
2679           && operand_equal_p (step, cand->iv->step, 0)
2680           && (TYPE_PRECISION (TREE_TYPE (base))
2681               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2682         break;
2683     }
2684
2685   if (i == n_iv_cands (data))
2686     {
2687       cand = XCNEW (struct iv_cand);
2688       cand->id = i;
2689
2690       if (!base && !step)
2691         cand->iv = NULL;
2692       else
2693         cand->iv = alloc_iv (base, step);
2694
2695       cand->pos = pos;
2696       if (pos != IP_ORIGINAL && cand->iv)
2697         {
2698           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2699           cand->var_after = cand->var_before;
2700         }
2701       cand->important = important;
2702       cand->incremented_at = incremented_at;
2703       data->iv_candidates.safe_push (cand);
2704
2705       if (step
2706           && TREE_CODE (step) != INTEGER_CST)
2707         {
2708           fd_ivopts_data = data;
2709           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2710         }
2711
2712       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2713         cand->ainc_use = use;
2714       else
2715         cand->ainc_use = NULL;
2716
2717       if (dump_file && (dump_flags & TDF_DETAILS))
2718         dump_cand (dump_file, cand);
2719     }
2720
2721   if (important && !cand->important)
2722     {
2723       cand->important = true;
2724       if (dump_file && (dump_flags & TDF_DETAILS))
2725         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2726     }
2727
2728   if (use)
2729     {
2730       bitmap_set_bit (use->related_cands, i);
2731       if (dump_file && (dump_flags & TDF_DETAILS))
2732         fprintf (dump_file, "Candidate %d is related to use %d\n",
2733                  cand->id, use->id);
2734     }
2735
2736   return cand;
2737 }
2738
2739 /* Returns true if incrementing the induction variable at the end of the LOOP
2740    is allowed.
2741
2742    The purpose is to avoid splitting latch edge with a biv increment, thus
2743    creating a jump, possibly confusing other optimization passes and leaving
2744    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2745    is not available (so we do not have a better alternative), or if the latch
2746    edge is already nonempty.  */
2747
2748 static bool
2749 allow_ip_end_pos_p (struct loop *loop)
2750 {
2751   if (!ip_normal_pos (loop))
2752     return true;
2753
2754   if (!empty_block_p (ip_end_pos (loop)))
2755     return true;
2756
2757   return false;
2758 }
2759
2760 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2761    Important field is set to IMPORTANT.  */
2762
2763 static void
2764 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2765                         bool important, struct iv_use *use)
2766 {
2767   basic_block use_bb = gimple_bb (use->stmt);
2768   machine_mode mem_mode;
2769   unsigned HOST_WIDE_INT cstepi;
2770
2771   /* If we insert the increment in any position other than the standard
2772      ones, we must ensure that it is incremented once per iteration.
2773      It must not be in an inner nested loop, or one side of an if
2774      statement.  */
2775   if (use_bb->loop_father != data->current_loop
2776       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2777       || stmt_could_throw_p (use->stmt)
2778       || !cst_and_fits_in_hwi (step))
2779     return;
2780
2781   cstepi = int_cst_value (step);
2782
2783   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2784   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2785         || USE_STORE_PRE_INCREMENT (mem_mode))
2786        && GET_MODE_SIZE (mem_mode) == cstepi)
2787       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2788            || USE_STORE_PRE_DECREMENT (mem_mode))
2789           && GET_MODE_SIZE (mem_mode) == -cstepi))
2790     {
2791       enum tree_code code = MINUS_EXPR;
2792       tree new_base;
2793       tree new_step = step;
2794
2795       if (POINTER_TYPE_P (TREE_TYPE (base)))
2796         {
2797           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2798           code = POINTER_PLUS_EXPR;
2799         }
2800       else
2801         new_step = fold_convert (TREE_TYPE (base), new_step);
2802       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2803       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2804                        use->stmt);
2805     }
2806   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2807         || USE_STORE_POST_INCREMENT (mem_mode))
2808        && GET_MODE_SIZE (mem_mode) == cstepi)
2809       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2810            || USE_STORE_POST_DECREMENT (mem_mode))
2811           && GET_MODE_SIZE (mem_mode) == -cstepi))
2812     {
2813       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2814                        use->stmt);
2815     }
2816 }
2817
2818 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2819    position to POS.  If USE is not NULL, the candidate is set as related to
2820    it.  The candidate computation is scheduled on all available positions.  */
2821
2822 static void
2823 add_candidate (struct ivopts_data *data,
2824                tree base, tree step, bool important, struct iv_use *use)
2825 {
2826   gcc_assert (use == NULL || use->sub_id == 0);
2827
2828   if (ip_normal_pos (data->current_loop))
2829     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2830   if (ip_end_pos (data->current_loop)
2831       && allow_ip_end_pos_p (data->current_loop))
2832     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2833
2834   if (use != NULL && use->type == USE_ADDRESS)
2835     add_autoinc_candidates (data, base, step, important, use);
2836 }
2837
2838 /* Adds standard iv candidates.  */
2839
2840 static void
2841 add_standard_iv_candidates (struct ivopts_data *data)
2842 {
2843   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2844
2845   /* The same for a double-integer type if it is still fast enough.  */
2846   if (TYPE_PRECISION
2847         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2848       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2849     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2850                    build_int_cst (long_integer_type_node, 1), true, NULL);
2851
2852   /* The same for a double-integer type if it is still fast enough.  */
2853   if (TYPE_PRECISION
2854         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2855       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2856     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2857                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2858 }
2859
2860
2861 /* Adds candidates bases on the old induction variable IV.  */
2862
2863 static void
2864 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2865 {
2866   gimple phi;
2867   tree def;
2868   struct iv_cand *cand;
2869
2870   add_candidate (data, iv->base, iv->step, true, NULL);
2871
2872   /* The same, but with initial value zero.  */
2873   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2874     add_candidate (data, size_int (0), iv->step, true, NULL);
2875   else
2876     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2877                    iv->step, true, NULL);
2878
2879   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2880   if (gimple_code (phi) == GIMPLE_PHI)
2881     {
2882       /* Additionally record the possibility of leaving the original iv
2883          untouched.  */
2884       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2885       /* Don't add candidate if it's from another PHI node because
2886          it's an affine iv appearing in the form of PEELED_CHREC.  */
2887       phi = SSA_NAME_DEF_STMT (def);
2888       if (gimple_code (phi) != GIMPLE_PHI)
2889         {
2890           cand = add_candidate_1 (data,
2891                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2892                                   SSA_NAME_DEF_STMT (def));
2893           cand->var_before = iv->ssa_name;
2894           cand->var_after = def;
2895         }
2896       else
2897         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2898     }
2899 }
2900
2901 /* Adds candidates based on the old induction variables.  */
2902
2903 static void
2904 add_old_ivs_candidates (struct ivopts_data *data)
2905 {
2906   unsigned i;
2907   struct iv *iv;
2908   bitmap_iterator bi;
2909
2910   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2911     {
2912       iv = ver_info (data, i)->iv;
2913       if (iv && iv->biv_p && !integer_zerop (iv->step))
2914         add_old_iv_candidates (data, iv);
2915     }
2916 }
2917
2918 /* Adds candidates based on the value of the induction variable IV and USE.  */
2919
2920 static void
2921 add_iv_value_candidates (struct ivopts_data *data,
2922                          struct iv *iv, struct iv_use *use)
2923 {
2924   unsigned HOST_WIDE_INT offset;
2925   tree base;
2926   tree basetype;
2927
2928   add_candidate (data, iv->base, iv->step, false, use);
2929
2930   /* The same, but with initial value zero.  Make such variable important,
2931      since it is generic enough so that possibly many uses may be based
2932      on it.  */
2933   basetype = TREE_TYPE (iv->base);
2934   if (POINTER_TYPE_P (basetype))
2935     basetype = sizetype;
2936   add_candidate (data, build_int_cst (basetype, 0),
2937                  iv->step, true, use);
2938
2939   /* Third, try removing the constant offset.  Make sure to even
2940      add a candidate for &a[0] vs. (T *)&a.  */
2941   base = strip_offset (iv->base, &offset);
2942   if (offset
2943       || base != iv->base)
2944     add_candidate (data, base, iv->step, false, use);
2945 }
2946
2947 /* Adds candidates based on the uses.  */
2948
2949 static void
2950 add_derived_ivs_candidates (struct ivopts_data *data)
2951 {
2952   unsigned i;
2953
2954   for (i = 0; i < n_iv_uses (data); i++)
2955     {
2956       struct iv_use *use = iv_use (data, i);
2957
2958       if (!use)
2959         continue;
2960
2961       switch (use->type)
2962         {
2963         case USE_NONLINEAR_EXPR:
2964         case USE_COMPARE:
2965         case USE_ADDRESS:
2966           /* Just add the ivs based on the value of the iv used here.  */
2967           add_iv_value_candidates (data, use->iv, use);
2968           break;
2969
2970         default:
2971           gcc_unreachable ();
2972         }
2973     }
2974 }
2975
2976 /* Record important candidates and add them to related_cands bitmaps
2977    if needed.  */
2978
2979 static void
2980 record_important_candidates (struct ivopts_data *data)
2981 {
2982   unsigned i;
2983   struct iv_use *use;
2984
2985   for (i = 0; i < n_iv_cands (data); i++)
2986     {
2987       struct iv_cand *cand = iv_cand (data, i);
2988
2989       if (cand->important)
2990         bitmap_set_bit (data->important_candidates, i);
2991     }
2992
2993   data->consider_all_candidates = (n_iv_cands (data)
2994                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2995
2996   if (data->consider_all_candidates)
2997     {
2998       /* We will not need "related_cands" bitmaps in this case,
2999          so release them to decrease peak memory consumption.  */
3000       for (i = 0; i < n_iv_uses (data); i++)
3001         {
3002           use = iv_use (data, i);
3003           BITMAP_FREE (use->related_cands);
3004         }
3005     }
3006   else
3007     {
3008       /* Add important candidates to the related_cands bitmaps.  */
3009       for (i = 0; i < n_iv_uses (data); i++)
3010         bitmap_ior_into (iv_use (data, i)->related_cands,
3011                          data->important_candidates);
3012     }
3013 }
3014
3015 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3016    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3017    we allocate a simple list to every use.  */
3018
3019 static void
3020 alloc_use_cost_map (struct ivopts_data *data)
3021 {
3022   unsigned i, size, s;
3023
3024   for (i = 0; i < n_iv_uses (data); i++)
3025     {
3026       struct iv_use *use = iv_use (data, i);
3027
3028       if (data->consider_all_candidates)
3029         size = n_iv_cands (data);
3030       else
3031         {
3032           s = bitmap_count_bits (use->related_cands);
3033
3034           /* Round up to the power of two, so that moduling by it is fast.  */
3035           size = s ? (1 << ceil_log2 (s)) : 1;
3036         }
3037
3038       use->n_map_members = size;
3039       use->cost_map = XCNEWVEC (struct cost_pair, size);
3040     }
3041 }
3042
3043 /* Returns description of computation cost of expression whose runtime
3044    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3045
3046 static comp_cost
3047 new_cost (unsigned runtime, unsigned complexity)
3048 {
3049   comp_cost cost;
3050
3051   cost.cost = runtime;
3052   cost.complexity = complexity;
3053
3054   return cost;
3055 }
3056
3057 /* Returns true if COST is infinite.  */
3058
3059 static bool
3060 infinite_cost_p (comp_cost cost)
3061 {
3062   return cost.cost == INFTY;
3063 }
3064
3065 /* Adds costs COST1 and COST2.  */
3066
3067 static comp_cost
3068 add_costs (comp_cost cost1, comp_cost cost2)
3069 {
3070   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3071     return infinite_cost;
3072
3073   cost1.cost += cost2.cost;
3074   cost1.complexity += cost2.complexity;
3075
3076   return cost1;
3077 }
3078 /* Subtracts costs COST1 and COST2.  */
3079
3080 static comp_cost
3081 sub_costs (comp_cost cost1, comp_cost cost2)
3082 {
3083   cost1.cost -= cost2.cost;
3084   cost1.complexity -= cost2.complexity;
3085
3086   return cost1;
3087 }
3088
3089 /* Returns a negative number if COST1 < COST2, a positive number if
3090    COST1 > COST2, and 0 if COST1 = COST2.  */
3091
3092 static int
3093 compare_costs (comp_cost cost1, comp_cost cost2)
3094 {
3095   if (cost1.cost == cost2.cost)
3096     return cost1.complexity - cost2.complexity;
3097
3098   return cost1.cost - cost2.cost;
3099 }
3100
3101 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3102    on invariants DEPENDS_ON and that the value used in expressing it
3103    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3104
3105 static void
3106 set_use_iv_cost (struct ivopts_data *data,
3107                  struct iv_use *use, struct iv_cand *cand,
3108                  comp_cost cost, bitmap depends_on, tree value,
3109                  enum tree_code comp, int inv_expr_id)
3110 {
3111   unsigned i, s;
3112
3113   if (infinite_cost_p (cost))
3114     {
3115       BITMAP_FREE (depends_on);
3116       return;
3117     }
3118
3119   if (data->consider_all_candidates)
3120     {
3121       use->cost_map[cand->id].cand = cand;
3122       use->cost_map[cand->id].cost = cost;
3123       use->cost_map[cand->id].depends_on = depends_on;
3124       use->cost_map[cand->id].value = value;
3125       use->cost_map[cand->id].comp = comp;
3126       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3127       return;
3128     }
3129
3130   /* n_map_members is a power of two, so this computes modulo.  */
3131   s = cand->id & (use->n_map_members - 1);
3132   for (i = s; i < use->n_map_members; i++)
3133     if (!use->cost_map[i].cand)
3134       goto found;
3135   for (i = 0; i < s; i++)
3136     if (!use->cost_map[i].cand)
3137       goto found;
3138
3139   gcc_unreachable ();
3140
3141 found:
3142   use->cost_map[i].cand = cand;
3143   use->cost_map[i].cost = cost;
3144   use->cost_map[i].depends_on = depends_on;
3145   use->cost_map[i].value = value;
3146   use->cost_map[i].comp = comp;
3147   use->cost_map[i].inv_expr_id = inv_expr_id;
3148 }
3149
3150 /* Gets cost of (USE, CANDIDATE) pair.  */
3151
3152 static struct cost_pair *
3153 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3154                  struct iv_cand *cand)
3155 {
3156   unsigned i, s;
3157   struct cost_pair *ret;
3158
3159   if (!cand)
3160     return NULL;
3161
3162   if (data->consider_all_candidates)
3163     {
3164       ret = use->cost_map + cand->id;
3165       if (!ret->cand)
3166         return NULL;
3167
3168       return ret;
3169     }
3170
3171   /* n_map_members is a power of two, so this computes modulo.  */
3172   s = cand->id & (use->n_map_members - 1);
3173   for (i = s; i < use->n_map_members; i++)
3174     if (use->cost_map[i].cand == cand)
3175       return use->cost_map + i;
3176     else if (use->cost_map[i].cand == NULL)
3177       return NULL;
3178   for (i = 0; i < s; i++)
3179     if (use->cost_map[i].cand == cand)
3180       return use->cost_map + i;
3181     else if (use->cost_map[i].cand == NULL)
3182       return NULL;
3183
3184   return NULL;
3185 }
3186
3187 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3188 static rtx
3189 produce_memory_decl_rtl (tree obj, int *regno)
3190 {
3191   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3192   machine_mode address_mode = targetm.addr_space.address_mode (as);
3193   rtx x;
3194
3195   gcc_assert (obj);
3196   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3197     {
3198       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3199       x = gen_rtx_SYMBOL_REF (address_mode, name);
3200       SET_SYMBOL_REF_DECL (x, obj);
3201       x = gen_rtx_MEM (DECL_MODE (obj), x);
3202       set_mem_addr_space (x, as);
3203       targetm.encode_section_info (obj, x, true);
3204     }
3205   else
3206     {
3207       x = gen_raw_REG (address_mode, (*regno)++);
3208       x = gen_rtx_MEM (DECL_MODE (obj), x);
3209       set_mem_addr_space (x, as);
3210     }
3211
3212   return x;
3213 }
3214
3215 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3216    walk_tree.  DATA contains the actual fake register number.  */
3217
3218 static tree
3219 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3220 {
3221   tree obj = NULL_TREE;
3222   rtx x = NULL_RTX;
3223   int *regno = (int *) data;
3224
3225   switch (TREE_CODE (*expr_p))
3226     {
3227     case ADDR_EXPR:
3228       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3229            handled_component_p (*expr_p);
3230            expr_p = &TREE_OPERAND (*expr_p, 0))
3231         continue;
3232       obj = *expr_p;
3233       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3234         x = produce_memory_decl_rtl (obj, regno);
3235       break;
3236
3237     case SSA_NAME:
3238       *ws = 0;
3239       obj = SSA_NAME_VAR (*expr_p);
3240       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3241       if (!obj)
3242         return NULL_TREE;
3243       if (!DECL_RTL_SET_P (obj))
3244         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3245       break;
3246
3247     case VAR_DECL:
3248     case PARM_DECL:
3249     case RESULT_DECL:
3250       *ws = 0;
3251       obj = *expr_p;
3252
3253       if (DECL_RTL_SET_P (obj))
3254         break;
3255
3256       if (DECL_MODE (obj) == BLKmode)
3257         x = produce_memory_decl_rtl (obj, regno);
3258       else
3259         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3260
3261       break;
3262
3263     default:
3264       break;
3265     }
3266
3267   if (x)
3268     {
3269       decl_rtl_to_reset.safe_push (obj);
3270       SET_DECL_RTL (obj, x);
3271     }
3272
3273   return NULL_TREE;
3274 }
3275
3276 /* Determines cost of the computation of EXPR.  */
3277
3278 static unsigned
3279 computation_cost (tree expr, bool speed)
3280 {
3281   rtx_insn *seq;
3282   rtx rslt;
3283   tree type = TREE_TYPE (expr);
3284   unsigned cost;
3285   /* Avoid using hard regs in ways which may be unsupported.  */
3286   int regno = LAST_VIRTUAL_REGISTER + 1;
3287   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3288   enum node_frequency real_frequency = node->frequency;
3289
3290   node->frequency = NODE_FREQUENCY_NORMAL;
3291   crtl->maybe_hot_insn_p = speed;
3292   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3293   start_sequence ();
3294   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3295   seq = get_insns ();
3296   end_sequence ();
3297   default_rtl_profile ();
3298   node->frequency = real_frequency;
3299
3300   cost = seq_cost (seq, speed);
3301   if (MEM_P (rslt))
3302     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3303                           TYPE_ADDR_SPACE (type), speed);
3304   else if (!REG_P (rslt))
3305     cost += set_src_cost (rslt, speed);
3306
3307   return cost;
3308 }
3309
3310 /* Returns variable containing the value of candidate CAND at statement AT.  */
3311
3312 static tree
3313 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3314 {
3315   if (stmt_after_increment (loop, cand, stmt))
3316     return cand->var_after;
3317   else
3318     return cand->var_before;
3319 }
3320
3321 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3322    same precision that is at least as wide as the precision of TYPE, stores
3323    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3324    type of A and B.  */
3325
3326 static tree
3327 determine_common_wider_type (tree *a, tree *b)
3328 {
3329   tree wider_type = NULL;
3330   tree suba, subb;
3331   tree atype = TREE_TYPE (*a);
3332
3333   if (CONVERT_EXPR_P (*a))
3334     {
3335       suba = TREE_OPERAND (*a, 0);
3336       wider_type = TREE_TYPE (suba);
3337       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3338         return atype;
3339     }
3340   else
3341     return atype;
3342
3343   if (CONVERT_EXPR_P (*b))
3344     {
3345       subb = TREE_OPERAND (*b, 0);
3346       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3347         return atype;
3348     }
3349   else
3350     return atype;
3351
3352   *a = suba;
3353   *b = subb;
3354   return wider_type;
3355 }
3356
3357 /* Determines the expression by that USE is expressed from induction variable
3358    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3359    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3360
3361 static bool
3362 get_computation_aff (struct loop *loop,
3363                      struct iv_use *use, struct iv_cand *cand, gimple at,
3364                      struct aff_tree *aff)
3365 {
3366   tree ubase = use->iv->base;
3367   tree ustep = use->iv->step;
3368   tree cbase = cand->iv->base;
3369   tree cstep = cand->iv->step, cstep_common;
3370   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3371   tree common_type, var;
3372   tree uutype;
3373   aff_tree cbase_aff, var_aff;
3374   widest_int rat;
3375
3376   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3377     {
3378       /* We do not have a precision to express the values of use.  */
3379       return false;
3380     }
3381
3382   var = var_at_stmt (loop, cand, at);
3383   uutype = unsigned_type_for (utype);
3384
3385   /* If the conversion is not noop, perform it.  */
3386   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3387     {
3388       cstep = fold_convert (uutype, cstep);
3389       cbase = fold_convert (uutype, cbase);
3390       var = fold_convert (uutype, var);
3391     }
3392
3393   if (!constant_multiple_of (ustep, cstep, &rat))
3394     return false;
3395
3396   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3397      type, we achieve better folding by computing their difference in this
3398      wider type, and cast the result to UUTYPE.  We do not need to worry about
3399      overflows, as all the arithmetics will in the end be performed in UUTYPE
3400      anyway.  */
3401   common_type = determine_common_wider_type (&ubase, &cbase);
3402
3403   /* use = ubase - ratio * cbase + ratio * var.  */
3404   tree_to_aff_combination (ubase, common_type, aff);
3405   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3406   tree_to_aff_combination (var, uutype, &var_aff);
3407
3408   /* We need to shift the value if we are after the increment.  */
3409   if (stmt_after_increment (loop, cand, at))
3410     {
3411       aff_tree cstep_aff;
3412
3413       if (common_type != uutype)
3414         cstep_common = fold_convert (common_type, cstep);
3415       else
3416         cstep_common = cstep;
3417
3418       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3419       aff_combination_add (&cbase_aff, &cstep_aff);
3420     }
3421
3422   aff_combination_scale (&cbase_aff, -rat);
3423   aff_combination_add (aff, &cbase_aff);
3424   if (common_type != uutype)
3425     aff_combination_convert (aff, uutype);
3426
3427   aff_combination_scale (&var_aff, rat);
3428   aff_combination_add (aff, &var_aff);
3429
3430   return true;
3431 }
3432
3433 /* Return the type of USE.  */
3434
3435 static tree
3436 get_use_type (struct iv_use *use)
3437 {
3438   tree base_type = TREE_TYPE (use->iv->base);
3439   tree type;
3440
3441   if (use->type == USE_ADDRESS)
3442     {
3443       /* The base_type may be a void pointer.  Create a pointer type based on
3444          the mem_ref instead.  */
3445       type = build_pointer_type (TREE_TYPE (*use->op_p));
3446       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3447                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3448     }
3449   else
3450     type = base_type;
3451
3452   return type;
3453 }
3454
3455 /* Determines the expression by that USE is expressed from induction variable
3456    CAND at statement AT in LOOP.  The computation is unshared.  */
3457
3458 static tree
3459 get_computation_at (struct loop *loop,
3460                     struct iv_use *use, struct iv_cand *cand, gimple at)
3461 {
3462   aff_tree aff;
3463   tree type = get_use_type (use);
3464
3465   if (!get_computation_aff (loop, use, cand, at, &aff))
3466     return NULL_TREE;
3467   unshare_aff_combination (&aff);
3468   return fold_convert (type, aff_combination_to_tree (&aff));
3469 }
3470
3471 /* Determines the expression by that USE is expressed from induction variable
3472    CAND in LOOP.  The computation is unshared.  */
3473
3474 static tree
3475 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3476 {
3477   return get_computation_at (loop, use, cand, use->stmt);
3478 }
3479
3480 /* Adjust the cost COST for being in loop setup rather than loop body.
3481    If we're optimizing for space, the loop setup overhead is constant;
3482    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3483 static unsigned
3484 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3485 {
3486   if (cost == INFTY)
3487     return cost;
3488   else if (optimize_loop_for_speed_p (data->current_loop))
3489     return cost / avg_loop_niter (data->current_loop);
3490   else
3491     return cost;
3492 }
3493
3494 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3495    validity for a memory reference accessing memory of mode MODE in
3496    address space AS.  */
3497
3498
3499 bool
3500 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3501                                  addr_space_t as)
3502 {
3503 #define MAX_RATIO 128
3504   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3505   static vec<sbitmap> valid_mult_list;
3506   sbitmap valid_mult;
3507
3508   if (data_index >= valid_mult_list.length ())
3509     valid_mult_list.safe_grow_cleared (data_index + 1);
3510
3511   valid_mult = valid_mult_list[data_index];
3512   if (!valid_mult)
3513     {
3514       machine_mode address_mode = targetm.addr_space.address_mode (as);
3515       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3516       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3517       rtx addr, scaled;
3518       HOST_WIDE_INT i;
3519
3520       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3521       bitmap_clear (valid_mult);
3522       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3523       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3524       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3525         {
3526           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3527           if (memory_address_addr_space_p (mode, addr, as)
3528               || memory_address_addr_space_p (mode, scaled, as))
3529             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3530         }
3531
3532       if (dump_file && (dump_flags & TDF_DETAILS))
3533         {
3534           fprintf (dump_file, "  allowed multipliers:");
3535           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3536             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3537               fprintf (dump_file, " %d", (int) i);
3538           fprintf (dump_file, "\n");
3539           fprintf (dump_file, "\n");
3540         }
3541
3542       valid_mult_list[data_index] = valid_mult;
3543     }
3544
3545   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3546     return false;
3547
3548   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3549 }
3550
3551 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3552    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3553    variable is omitted.  Compute the cost for a memory reference that accesses
3554    a memory location of mode MEM_MODE in address space AS.
3555
3556    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3557    size of MEM_MODE / RATIO) is available.  To make this determination, we
3558    look at the size of the increment to be made, which is given in CSTEP.
3559    CSTEP may be zero if the step is unknown.
3560    STMT_AFTER_INC is true iff the statement we're looking at is after the
3561    increment of the original biv.
3562
3563    TODO -- there must be some better way.  This all is quite crude.  */
3564
3565 enum ainc_type
3566 {
3567   AINC_PRE_INC,         /* Pre increment.  */
3568   AINC_PRE_DEC,         /* Pre decrement.  */
3569   AINC_POST_INC,        /* Post increment.  */
3570   AINC_POST_DEC,        /* Post decrement.  */
3571   AINC_NONE             /* Also the number of auto increment types.  */
3572 };
3573
3574 typedef struct address_cost_data_s
3575 {
3576   HOST_WIDE_INT min_offset, max_offset;
3577   unsigned costs[2][2][2][2];
3578   unsigned ainc_costs[AINC_NONE];
3579 } *address_cost_data;
3580
3581
3582 static comp_cost
3583 get_address_cost (bool symbol_present, bool var_present,
3584                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3585                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3586                   addr_space_t as, bool speed,
3587                   bool stmt_after_inc, bool *may_autoinc)
3588 {
3589   machine_mode address_mode = targetm.addr_space.address_mode (as);
3590   static vec<address_cost_data> address_cost_data_list;
3591   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3592   address_cost_data data;
3593   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3594   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3595   unsigned cost, acost, complexity;
3596   enum ainc_type autoinc_type;
3597   bool offset_p, ratio_p, autoinc;
3598   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3599   unsigned HOST_WIDE_INT mask;
3600   unsigned bits;
3601
3602   if (data_index >= address_cost_data_list.length ())
3603     address_cost_data_list.safe_grow_cleared (data_index + 1);
3604
3605   data = address_cost_data_list[data_index];
3606   if (!data)
3607     {
3608       HOST_WIDE_INT i;
3609       HOST_WIDE_INT rat, off = 0;
3610       int old_cse_not_expected, width;
3611       unsigned sym_p, var_p, off_p, rat_p, add_c;
3612       rtx_insn *seq;
3613       rtx addr, base;
3614       rtx reg0, reg1;
3615
3616       data = (address_cost_data) xcalloc (1, sizeof (*data));
3617
3618       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3619
3620       width = GET_MODE_BITSIZE (address_mode) - 1;
3621       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3622         width = HOST_BITS_PER_WIDE_INT - 1;
3623       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3624
3625       for (i = width; i >= 0; i--)
3626         {
3627           off = -((unsigned HOST_WIDE_INT) 1 << i);
3628           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3629           if (memory_address_addr_space_p (mem_mode, addr, as))
3630             break;
3631         }
3632       data->min_offset = (i == -1? 0 : off);
3633
3634       for (i = width; i >= 0; i--)
3635         {
3636           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3637           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3638           if (memory_address_addr_space_p (mem_mode, addr, as))
3639             break;
3640           /* For some strict-alignment targets, the offset must be naturally
3641              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3642           off = mem_mode != QImode
3643                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3644                     - GET_MODE_SIZE (mem_mode)
3645                 : 0;
3646           if (off > 0)
3647             {
3648               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3649               if (memory_address_addr_space_p (mem_mode, addr, as))
3650                 break;
3651             }
3652         }
3653       if (i == -1)
3654         off = 0;
3655       data->max_offset = off;
3656
3657       if (dump_file && (dump_flags & TDF_DETAILS))
3658         {
3659           fprintf (dump_file, "get_address_cost:\n");
3660           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3661                    GET_MODE_NAME (mem_mode),
3662                    data->min_offset);
3663           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3664                    GET_MODE_NAME (mem_mode),
3665                    data->max_offset);
3666         }
3667
3668       rat = 1;
3669       for (i = 2; i <= MAX_RATIO; i++)
3670         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3671           {
3672             rat = i;
3673             break;
3674           }
3675
3676       /* Compute the cost of various addressing modes.  */
3677       acost = 0;
3678       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3679       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3680
3681       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3682           || USE_STORE_PRE_DECREMENT (mem_mode))
3683         {
3684           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3685           has_predec[mem_mode]
3686             = memory_address_addr_space_p (mem_mode, addr, as);
3687
3688           if (has_predec[mem_mode])
3689             data->ainc_costs[AINC_PRE_DEC]
3690               = address_cost (addr, mem_mode, as, speed);
3691         }
3692       if (USE_LOAD_POST_DECREMENT (mem_mode)
3693           || USE_STORE_POST_DECREMENT (mem_mode))
3694         {
3695           addr = gen_rtx_POST_DEC (address_mode, reg0);
3696           has_postdec[mem_mode]
3697             = memory_address_addr_space_p (mem_mode, addr, as);
3698
3699           if (has_postdec[mem_mode])
3700             data->ainc_costs[AINC_POST_DEC]
3701               = address_cost (addr, mem_mode, as, speed);
3702         }
3703       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3704           || USE_STORE_PRE_DECREMENT (mem_mode))
3705         {
3706           addr = gen_rtx_PRE_INC (address_mode, reg0);
3707           has_preinc[mem_mode]
3708             = memory_address_addr_space_p (mem_mode, addr, as);
3709
3710           if (has_preinc[mem_mode])
3711             data->ainc_costs[AINC_PRE_INC]
3712               = address_cost (addr, mem_mode, as, speed);
3713         }
3714       if (USE_LOAD_POST_INCREMENT (mem_mode)
3715           || USE_STORE_POST_INCREMENT (mem_mode))
3716         {
3717           addr = gen_rtx_POST_INC (address_mode, reg0);
3718           has_postinc[mem_mode]
3719             = memory_address_addr_space_p (mem_mode, addr, as);
3720
3721           if (has_postinc[mem_mode])
3722             data->ainc_costs[AINC_POST_INC]
3723               = address_cost (addr, mem_mode, as, speed);
3724         }
3725       for (i = 0; i < 16; i++)
3726         {
3727           sym_p = i & 1;
3728           var_p = (i >> 1) & 1;
3729           off_p = (i >> 2) & 1;
3730           rat_p = (i >> 3) & 1;
3731
3732           addr = reg0;
3733           if (rat_p)
3734             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3735                                    gen_int_mode (rat, address_mode));
3736
3737           if (var_p)
3738             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3739
3740           if (sym_p)
3741             {
3742               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3743               /* ??? We can run into trouble with some backends by presenting
3744                  it with symbols which haven't been properly passed through
3745                  targetm.encode_section_info.  By setting the local bit, we
3746                  enhance the probability of things working.  */
3747               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3748
3749               if (off_p)
3750                 base = gen_rtx_fmt_e (CONST, address_mode,
3751                                       gen_rtx_fmt_ee
3752                                         (PLUS, address_mode, base,
3753                                          gen_int_mode (off, address_mode)));
3754             }
3755           else if (off_p)
3756             base = gen_int_mode (off, address_mode);
3757           else
3758             base = NULL_RTX;
3759
3760           if (base)
3761             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3762
3763           start_sequence ();
3764           /* To avoid splitting addressing modes, pretend that no cse will
3765              follow.  */
3766           old_cse_not_expected = cse_not_expected;
3767           cse_not_expected = true;
3768           addr = memory_address_addr_space (mem_mode, addr, as);
3769           cse_not_expected = old_cse_not_expected;
3770           seq = get_insns ();
3771           end_sequence ();
3772
3773           acost = seq_cost (seq, speed);
3774           acost += address_cost (addr, mem_mode, as, speed);
3775
3776           if (!acost)
3777             acost = 1;
3778           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3779         }
3780
3781       /* On some targets, it is quite expensive to load symbol to a register,
3782          which makes addresses that contain symbols look much more expensive.
3783          However, the symbol will have to be loaded in any case before the
3784          loop (and quite likely we have it in register already), so it does not
3785          make much sense to penalize them too heavily.  So make some final
3786          tweaks for the SYMBOL_PRESENT modes:
3787
3788          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3789          var is cheaper, use this mode with small penalty.
3790          If VAR_PRESENT is true, try whether the mode with
3791          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3792          if this is the case, use it.  */
3793       add_c = add_cost (speed, address_mode);
3794       for (i = 0; i < 8; i++)
3795         {
3796           var_p = i & 1;
3797           off_p = (i >> 1) & 1;
3798           rat_p = (i >> 2) & 1;
3799
3800           acost = data->costs[0][1][off_p][rat_p] + 1;
3801           if (var_p)
3802             acost += add_c;
3803
3804           if (acost < data->costs[1][var_p][off_p][rat_p])
3805             data->costs[1][var_p][off_p][rat_p] = acost;
3806         }
3807
3808       if (dump_file && (dump_flags & TDF_DETAILS))
3809         {
3810           fprintf (dump_file, "Address costs:\n");
3811
3812           for (i = 0; i < 16; i++)
3813             {
3814               sym_p = i & 1;
3815               var_p = (i >> 1) & 1;
3816               off_p = (i >> 2) & 1;
3817               rat_p = (i >> 3) & 1;
3818
3819               fprintf (dump_file, "  ");
3820               if (sym_p)
3821                 fprintf (dump_file, "sym + ");
3822               if (var_p)
3823                 fprintf (dump_file, "var + ");
3824               if (off_p)
3825                 fprintf (dump_file, "cst + ");
3826               if (rat_p)
3827                 fprintf (dump_file, "rat * ");
3828
3829               acost = data->costs[sym_p][var_p][off_p][rat_p];
3830               fprintf (dump_file, "index costs %d\n", acost);
3831             }
3832           if (has_predec[mem_mode] || has_postdec[mem_mode]
3833               || has_preinc[mem_mode] || has_postinc[mem_mode])
3834             fprintf (dump_file, "  May include autoinc/dec\n");
3835           fprintf (dump_file, "\n");
3836         }
3837
3838       address_cost_data_list[data_index] = data;
3839     }
3840
3841   bits = GET_MODE_BITSIZE (address_mode);
3842   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3843   offset &= mask;
3844   if ((offset >> (bits - 1) & 1))
3845     offset |= ~mask;
3846   s_offset = offset;
3847
3848   autoinc = false;
3849   autoinc_type = AINC_NONE;
3850   msize = GET_MODE_SIZE (mem_mode);
3851   autoinc_offset = offset;
3852   if (stmt_after_inc)
3853     autoinc_offset += ratio * cstep;
3854   if (symbol_present || var_present || ratio != 1)
3855     autoinc = false;
3856   else
3857     {
3858       if (has_postinc[mem_mode] && autoinc_offset == 0
3859           && msize == cstep)
3860         autoinc_type = AINC_POST_INC;
3861       else if (has_postdec[mem_mode] && autoinc_offset == 0
3862                && msize == -cstep)
3863         autoinc_type = AINC_POST_DEC;
3864       else if (has_preinc[mem_mode] && autoinc_offset == msize
3865                && msize == cstep)
3866         autoinc_type = AINC_PRE_INC;
3867       else if (has_predec[mem_mode] && autoinc_offset == -msize
3868                && msize == -cstep)
3869         autoinc_type = AINC_PRE_DEC;
3870
3871       if (autoinc_type != AINC_NONE)
3872         autoinc = true;
3873     }
3874
3875   cost = 0;
3876   offset_p = (s_offset != 0
3877               && data->min_offset <= s_offset
3878               && s_offset <= data->max_offset);
3879   ratio_p = (ratio != 1
3880              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3881
3882   if (ratio != 1 && !ratio_p)
3883     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3884
3885   if (s_offset && !offset_p && !symbol_present)
3886     cost += add_cost (speed, address_mode);
3887
3888   if (may_autoinc)
3889     *may_autoinc = autoinc;
3890   if (autoinc)
3891     acost = data->ainc_costs[autoinc_type];
3892   else
3893     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3894   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3895   return new_cost (cost + acost, complexity);
3896 }
3897
3898  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3899     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3900     calculating the operands of EXPR.  Returns true if successful, and returns
3901     the cost in COST.  */
3902
3903 static bool
3904 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3905                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3906 {
3907   comp_cost res;
3908   tree op1 = TREE_OPERAND (expr, 1);
3909   tree cst = TREE_OPERAND (mult, 1);
3910   tree multop = TREE_OPERAND (mult, 0);
3911   int m = exact_log2 (int_cst_value (cst));
3912   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3913   int as_cost, sa_cost;
3914   bool mult_in_op1;
3915
3916   if (!(m >= 0 && m < maxm))
3917     return false;
3918
3919   mult_in_op1 = operand_equal_p (op1, mult, 0);
3920
3921   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3922
3923   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3924      use that in preference to a shift insn followed by an add insn.  */
3925   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3926              ? shiftadd_cost (speed, mode, m)
3927              : (mult_in_op1
3928                 ? shiftsub1_cost (speed, mode, m)
3929                 : shiftsub0_cost (speed, mode, m)));
3930
3931   res = new_cost (MIN (as_cost, sa_cost), 0);
3932   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3933
3934   STRIP_NOPS (multop);
3935   if (!is_gimple_val (multop))
3936     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3937
3938   *cost = res;
3939   return true;
3940 }
3941
3942 /* Estimates cost of forcing expression EXPR into a variable.  */
3943
3944 static comp_cost
3945 force_expr_to_var_cost (tree expr, bool speed)
3946 {
3947   static bool costs_initialized = false;
3948   static unsigned integer_cost [2];
3949   static unsigned symbol_cost [2];
3950   static unsigned address_cost [2];
3951   tree op0, op1;
3952   comp_cost cost0, cost1, cost;
3953   machine_mode mode;
3954
3955   if (!costs_initialized)
3956     {
3957       tree type = build_pointer_type (integer_type_node);
3958       tree var, addr;
3959       rtx x;
3960       int i;
3961
3962       var = create_tmp_var_raw (integer_type_node, "test_var");
3963       TREE_STATIC (var) = 1;
3964       x = produce_memory_decl_rtl (var, NULL);
3965       SET_DECL_RTL (var, x);
3966
3967       addr = build1 (ADDR_EXPR, type, var);
3968
3969
3970       for (i = 0; i < 2; i++)
3971         {
3972           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3973                                                              2000), i);
3974
3975           symbol_cost[i] = computation_cost (addr, i) + 1;
3976
3977           address_cost[i]
3978             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3979           if (dump_file && (dump_flags & TDF_DETAILS))
3980             {
3981               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3982               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3983               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3984               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3985               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3986               fprintf (dump_file, "\n");
3987             }
3988         }
3989
3990       costs_initialized = true;
3991     }
3992
3993   STRIP_NOPS (expr);
3994
3995   if (SSA_VAR_P (expr))
3996     return no_cost;
3997
3998   if (is_gimple_min_invariant (expr))
3999     {
4000       if (TREE_CODE (expr) == INTEGER_CST)
4001         return new_cost (integer_cost [speed], 0);
4002
4003       if (TREE_CODE (expr) == ADDR_EXPR)
4004         {
4005           tree obj = TREE_OPERAND (expr, 0);
4006
4007           if (TREE_CODE (obj) == VAR_DECL
4008               || TREE_CODE (obj) == PARM_DECL
4009               || TREE_CODE (obj) == RESULT_DECL)
4010             return new_cost (symbol_cost [speed], 0);
4011         }
4012
4013       return new_cost (address_cost [speed], 0);
4014     }
4015
4016   switch (TREE_CODE (expr))
4017     {
4018     case POINTER_PLUS_EXPR:
4019     case PLUS_EXPR:
4020     case MINUS_EXPR:
4021     case MULT_EXPR:
4022       op0 = TREE_OPERAND (expr, 0);
4023       op1 = TREE_OPERAND (expr, 1);
4024       STRIP_NOPS (op0);
4025       STRIP_NOPS (op1);
4026       break;
4027
4028     CASE_CONVERT:
4029     case NEGATE_EXPR:
4030       op0 = TREE_OPERAND (expr, 0);
4031       STRIP_NOPS (op0);
4032       op1 = NULL_TREE;
4033       break;
4034
4035     default:
4036       /* Just an arbitrary value, FIXME.  */
4037       return new_cost (target_spill_cost[speed], 0);
4038     }
4039
4040   if (op0 == NULL_TREE
4041       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4042     cost0 = no_cost;
4043   else
4044     cost0 = force_expr_to_var_cost (op0, speed);
4045
4046   if (op1 == NULL_TREE
4047       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4048     cost1 = no_cost;
4049   else
4050     cost1 = force_expr_to_var_cost (op1, speed);
4051
4052   mode = TYPE_MODE (TREE_TYPE (expr));
4053   switch (TREE_CODE (expr))
4054     {
4055     case POINTER_PLUS_EXPR:
4056     case PLUS_EXPR:
4057     case MINUS_EXPR:
4058     case NEGATE_EXPR:
4059       cost = new_cost (add_cost (speed, mode), 0);
4060       if (TREE_CODE (expr) != NEGATE_EXPR)
4061         {
4062           tree mult = NULL_TREE;
4063           comp_cost sa_cost;
4064           if (TREE_CODE (op1) == MULT_EXPR)
4065             mult = op1;
4066           else if (TREE_CODE (op0) == MULT_EXPR)
4067             mult = op0;
4068
4069           if (mult != NULL_TREE
4070               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4071               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4072                                     speed, &sa_cost))
4073             return sa_cost;
4074         }
4075       break;
4076
4077     CASE_CONVERT:
4078       {
4079         tree inner_mode, outer_mode;
4080         outer_mode = TREE_TYPE (expr);
4081         inner_mode = TREE_TYPE (op0);
4082         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4083                                        TYPE_MODE (inner_mode), speed), 0);
4084       }
4085       break;
4086
4087     case MULT_EXPR:
4088       if (cst_and_fits_in_hwi (op0))
4089         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4090                                              mode, speed), 0);
4091       else if (cst_and_fits_in_hwi (op1))
4092         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4093                                              mode, speed), 0);
4094       else
4095         return new_cost (target_spill_cost [speed], 0);
4096       break;
4097
4098     default:
4099       gcc_unreachable ();
4100     }
4101
4102   cost = add_costs (cost, cost0);
4103   cost = add_costs (cost, cost1);
4104
4105   /* Bound the cost by target_spill_cost.  The parts of complicated
4106      computations often are either loop invariant or at least can
4107      be shared between several iv uses, so letting this grow without
4108      limits would not give reasonable results.  */
4109   if (cost.cost > (int) target_spill_cost [speed])
4110     cost.cost = target_spill_cost [speed];
4111
4112   return cost;
4113 }
4114
4115 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4116    invariants the computation depends on.  */
4117
4118 static comp_cost
4119 force_var_cost (struct ivopts_data *data,
4120                 tree expr, bitmap *depends_on)
4121 {
4122   if (depends_on)
4123     {
4124       fd_ivopts_data = data;
4125       walk_tree (&expr, find_depends, depends_on, NULL);
4126     }
4127
4128   return force_expr_to_var_cost (expr, data->speed);
4129 }
4130
4131 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4132    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4133    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4134    invariants the computation depends on.  */
4135
4136 static comp_cost
4137 split_address_cost (struct ivopts_data *data,
4138                     tree addr, bool *symbol_present, bool *var_present,
4139                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4140 {
4141   tree core;
4142   HOST_WIDE_INT bitsize;
4143   HOST_WIDE_INT bitpos;
4144   tree toffset;
4145   machine_mode mode;
4146   int unsignedp, volatilep;
4147
4148   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4149                               &unsignedp, &volatilep, false);
4150
4151   if (toffset != 0
4152       || bitpos % BITS_PER_UNIT != 0
4153       || TREE_CODE (core) != VAR_DECL)
4154     {
4155       *symbol_present = false;
4156       *var_present = true;
4157       fd_ivopts_data = data;
4158       walk_tree (&addr, find_depends, depends_on, NULL);
4159       return new_cost (target_spill_cost[data->speed], 0);
4160     }
4161
4162   *offset += bitpos / BITS_PER_UNIT;
4163   if (TREE_STATIC (core)
4164       || DECL_EXTERNAL (core))
4165     {
4166       *symbol_present = true;
4167       *var_present = false;
4168       return no_cost;
4169     }
4170
4171   *symbol_present = false;
4172   *var_present = true;
4173   return no_cost;
4174 }
4175
4176 /* Estimates cost of expressing difference of addresses E1 - E2 as
4177    var + symbol + offset.  The value of offset is added to OFFSET,
4178    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4179    part is missing.  DEPENDS_ON is a set of the invariants the computation
4180    depends on.  */
4181
4182 static comp_cost
4183 ptr_difference_cost (struct ivopts_data *data,
4184                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4185                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4186 {
4187   HOST_WIDE_INT diff = 0;
4188   aff_tree aff_e1, aff_e2;
4189   tree type;
4190
4191   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4192
4193   if (ptr_difference_const (e1, e2, &diff))
4194     {
4195       *offset += diff;
4196       *symbol_present = false;
4197       *var_present = false;
4198       return no_cost;
4199     }
4200
4201   if (integer_zerop (e2))
4202     return split_address_cost (data, TREE_OPERAND (e1, 0),
4203                                symbol_present, var_present, offset, depends_on);
4204
4205   *symbol_present = false;
4206   *var_present = true;
4207
4208   type = signed_type_for (TREE_TYPE (e1));
4209   tree_to_aff_combination (e1, type, &aff_e1);
4210   tree_to_aff_combination (e2, type, &aff_e2);
4211   aff_combination_scale (&aff_e2, -1);
4212   aff_combination_add (&aff_e1, &aff_e2);
4213
4214   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4215 }
4216
4217 /* Estimates cost of expressing difference E1 - E2 as
4218    var + symbol + offset.  The value of offset is added to OFFSET,
4219    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4220    part is missing.  DEPENDS_ON is a set of the invariants the computation
4221    depends on.  */
4222
4223 static comp_cost
4224 difference_cost (struct ivopts_data *data,
4225                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4226                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4227 {
4228   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4229   unsigned HOST_WIDE_INT off1, off2;
4230   aff_tree aff_e1, aff_e2;
4231   tree type;
4232
4233   e1 = strip_offset (e1, &off1);
4234   e2 = strip_offset (e2, &off2);
4235   *offset += off1 - off2;
4236
4237   STRIP_NOPS (e1);
4238   STRIP_NOPS (e2);
4239
4240   if (TREE_CODE (e1) == ADDR_EXPR)
4241     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4242                                 offset, depends_on);
4243   *symbol_present = false;
4244
4245   if (operand_equal_p (e1, e2, 0))
4246     {
4247       *var_present = false;
4248       return no_cost;
4249     }
4250
4251   *var_present = true;
4252
4253   if (integer_zerop (e2))
4254     return force_var_cost (data, e1, depends_on);
4255
4256   if (integer_zerop (e1))
4257     {
4258       comp_cost cost = force_var_cost (data, e2, depends_on);
4259       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4260       return cost;
4261     }
4262
4263   type = signed_type_for (TREE_TYPE (e1));
4264   tree_to_aff_combination (e1, type, &aff_e1);
4265   tree_to_aff_combination (e2, type, &aff_e2);
4266   aff_combination_scale (&aff_e2, -1);
4267   aff_combination_add (&aff_e1, &aff_e2);
4268
4269   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4270 }
4271
4272 /* Returns true if AFF1 and AFF2 are identical.  */
4273
4274 static bool
4275 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4276 {
4277   unsigned i;
4278
4279   if (aff1->n != aff2->n)
4280     return false;
4281
4282   for (i = 0; i < aff1->n; i++)
4283     {
4284       if (aff1->elts[i].coef != aff2->elts[i].coef)
4285         return false;
4286
4287       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4288         return false;
4289     }
4290   return true;
4291 }
4292
4293 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4294
4295 static int
4296 get_expr_id (struct ivopts_data *data, tree expr)
4297 {
4298   struct iv_inv_expr_ent ent;
4299   struct iv_inv_expr_ent **slot;
4300
4301   ent.expr = expr;
4302   ent.hash = iterative_hash_expr (expr, 0);
4303   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4304   if (*slot)
4305     return (*slot)->id;
4306
4307   *slot = XNEW (struct iv_inv_expr_ent);
4308   (*slot)->expr = expr;
4309   (*slot)->hash = ent.hash;
4310   (*slot)->id = data->inv_expr_id++;
4311   return (*slot)->id;
4312 }
4313
4314 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4315    requires a new compiler generated temporary.  Returns -1 otherwise.
4316    ADDRESS_P is a flag indicating if the expression is for address
4317    computation.  */
4318
4319 static int
4320 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4321                             tree cbase, HOST_WIDE_INT ratio,
4322                             bool address_p)
4323 {
4324   aff_tree ubase_aff, cbase_aff;
4325   tree expr, ub, cb;
4326
4327   STRIP_NOPS (ubase);
4328   STRIP_NOPS (cbase);
4329   ub = ubase;
4330   cb = cbase;
4331
4332   if ((TREE_CODE (ubase) == INTEGER_CST)
4333       && (TREE_CODE (cbase) == INTEGER_CST))
4334     return -1;
4335
4336   /* Strips the constant part. */
4337   if (TREE_CODE (ubase) == PLUS_EXPR
4338       || TREE_CODE (ubase) == MINUS_EXPR
4339       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4340     {
4341       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4342         ubase = TREE_OPERAND (ubase, 0);
4343     }
4344
4345   /* Strips the constant part. */
4346   if (TREE_CODE (cbase) == PLUS_EXPR
4347       || TREE_CODE (cbase) == MINUS_EXPR
4348       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4349     {
4350       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4351         cbase = TREE_OPERAND (cbase, 0);
4352     }
4353
4354   if (address_p)
4355     {
4356       if (((TREE_CODE (ubase) == SSA_NAME)
4357            || (TREE_CODE (ubase) == ADDR_EXPR
4358                && is_gimple_min_invariant (ubase)))
4359           && (TREE_CODE (cbase) == INTEGER_CST))
4360         return -1;
4361
4362       if (((TREE_CODE (cbase) == SSA_NAME)
4363            || (TREE_CODE (cbase) == ADDR_EXPR
4364                && is_gimple_min_invariant (cbase)))
4365           && (TREE_CODE (ubase) == INTEGER_CST))
4366         return -1;
4367     }
4368
4369   if (ratio == 1)
4370     {
4371       if (operand_equal_p (ubase, cbase, 0))
4372         return -1;
4373
4374       if (TREE_CODE (ubase) == ADDR_EXPR
4375           && TREE_CODE (cbase) == ADDR_EXPR)
4376         {
4377           tree usym, csym;
4378
4379           usym = TREE_OPERAND (ubase, 0);
4380           csym = TREE_OPERAND (cbase, 0);
4381           if (TREE_CODE (usym) == ARRAY_REF)
4382             {
4383               tree ind = TREE_OPERAND (usym, 1);
4384               if (TREE_CODE (ind) == INTEGER_CST
4385                   && tree_fits_shwi_p (ind)
4386                   && tree_to_shwi (ind) == 0)
4387                 usym = TREE_OPERAND (usym, 0);
4388             }
4389           if (TREE_CODE (csym) == ARRAY_REF)
4390             {
4391               tree ind = TREE_OPERAND (csym, 1);
4392               if (TREE_CODE (ind) == INTEGER_CST
4393                   && tree_fits_shwi_p (ind)
4394                   && tree_to_shwi (ind) == 0)
4395                 csym = TREE_OPERAND (csym, 0);
4396             }
4397           if (operand_equal_p (usym, csym, 0))
4398             return -1;
4399         }
4400       /* Now do more complex comparison  */
4401       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4402       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4403       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4404         return -1;
4405     }
4406
4407   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4408   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4409
4410   aff_combination_scale (&cbase_aff, -1 * ratio);
4411   aff_combination_add (&ubase_aff, &cbase_aff);
4412   expr = aff_combination_to_tree (&ubase_aff);
4413   return get_expr_id (data, expr);
4414 }
4415
4416
4417
4418 /* Determines the cost of the computation by that USE is expressed
4419    from induction variable CAND.  If ADDRESS_P is true, we just need
4420    to create an address from it, otherwise we want to get it into
4421    register.  A set of invariants we depend on is stored in
4422    DEPENDS_ON.  AT is the statement at that the value is computed.
4423    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4424    addressing is likely.  */
4425
4426 static comp_cost
4427 get_computation_cost_at (struct ivopts_data *data,
4428                          struct iv_use *use, struct iv_cand *cand,
4429                          bool address_p, bitmap *depends_on, gimple at,
4430                          bool *can_autoinc,
4431                          int *inv_expr_id)
4432 {
4433   tree ubase = use->iv->base, ustep = use->iv->step;
4434   tree cbase, cstep;
4435   tree utype = TREE_TYPE (ubase), ctype;
4436   unsigned HOST_WIDE_INT cstepi, offset = 0;
4437   HOST_WIDE_INT ratio, aratio;
4438   bool var_present, symbol_present, stmt_is_after_inc;
4439   comp_cost cost;
4440   widest_int rat;
4441   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4442   machine_mode mem_mode = (address_p
4443                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4444                                 : VOIDmode);
4445
4446   *depends_on = NULL;
4447
4448   /* Only consider real candidates.  */
4449   if (!cand->iv)
4450     return infinite_cost;
4451
4452   cbase = cand->iv->base;
4453   cstep = cand->iv->step;
4454   ctype = TREE_TYPE (cbase);
4455
4456   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4457     {
4458       /* We do not have a precision to express the values of use.  */
4459       return infinite_cost;
4460     }
4461
4462   if (address_p
4463       || (use->iv->base_object
4464           && cand->iv->base_object
4465           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4466           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4467     {
4468       /* Do not try to express address of an object with computation based
4469          on address of a different object.  This may cause problems in rtl
4470          level alias analysis (that does not expect this to be happening,
4471          as this is illegal in C), and would be unlikely to be useful
4472          anyway.  */
4473       if (use->iv->base_object
4474           && cand->iv->base_object
4475           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4476         return infinite_cost;
4477     }
4478
4479   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4480     {
4481       /* TODO -- add direct handling of this case.  */
4482       goto fallback;
4483     }
4484
4485   /* CSTEPI is removed from the offset in case statement is after the
4486      increment.  If the step is not constant, we use zero instead.
4487      This is a bit imprecise (there is the extra addition), but
4488      redundancy elimination is likely to transform the code so that
4489      it uses value of the variable before increment anyway,
4490      so it is not that much unrealistic.  */
4491   if (cst_and_fits_in_hwi (cstep))
4492     cstepi = int_cst_value (cstep);
4493   else
4494     cstepi = 0;
4495
4496   if (!constant_multiple_of (ustep, cstep, &rat))
4497     return infinite_cost;
4498
4499   if (wi::fits_shwi_p (rat))
4500     ratio = rat.to_shwi ();
4501   else
4502     return infinite_cost;
4503
4504   STRIP_NOPS (cbase);
4505   ctype = TREE_TYPE (cbase);
4506
4507   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4508
4509   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4510      or ratio == 1, it is better to handle this like
4511
4512      ubase - ratio * cbase + ratio * var
4513
4514      (also holds in the case ratio == -1, TODO.  */
4515
4516   if (cst_and_fits_in_hwi (cbase))
4517     {
4518       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4519       cost = difference_cost (data,
4520                               ubase, build_int_cst (utype, 0),
4521                               &symbol_present, &var_present, &offset,
4522                               depends_on);
4523       cost.cost /= avg_loop_niter (data->current_loop);
4524     }
4525   else if (ratio == 1)
4526     {
4527       tree real_cbase = cbase;
4528
4529       /* Check to see if any adjustment is needed.  */
4530       if (cstepi == 0 && stmt_is_after_inc)
4531         {
4532           aff_tree real_cbase_aff;
4533           aff_tree cstep_aff;
4534
4535           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4536                                    &real_cbase_aff);
4537           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4538
4539           aff_combination_add (&real_cbase_aff, &cstep_aff);
4540           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4541         }
4542
4543       cost = difference_cost (data,
4544                               ubase, real_cbase,
4545                               &symbol_present, &var_present, &offset,
4546                               depends_on);
4547       cost.cost /= avg_loop_niter (data->current_loop);
4548     }
4549   else if (address_p
4550            && !POINTER_TYPE_P (ctype)
4551            && multiplier_allowed_in_address_p
4552                 (ratio, mem_mode,
4553                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4554     {
4555       cbase
4556         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4557       cost = difference_cost (data,
4558                               ubase, cbase,
4559                               &symbol_present, &var_present, &offset,
4560                               depends_on);
4561       cost.cost /= avg_loop_niter (data->current_loop);
4562     }
4563   else
4564     {
4565       cost = force_var_cost (data, cbase, depends_on);
4566       cost = add_costs (cost,
4567                         difference_cost (data,
4568                                          ubase, build_int_cst (utype, 0),
4569                                          &symbol_present, &var_present,
4570                                          &offset, depends_on));
4571       cost.cost /= avg_loop_niter (data->current_loop);
4572       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4573     }
4574
4575   /* Set of invariants depended on by sub use has already been computed
4576      for the first use in the group.  */
4577   if (use->sub_id)
4578     {
4579       cost.cost = 0;
4580       if (depends_on && *depends_on)
4581         bitmap_clear (*depends_on);
4582     }
4583   else if (inv_expr_id)
4584     {
4585       *inv_expr_id =
4586           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4587       /* Clear depends on.  */
4588       if (*inv_expr_id != -1 && depends_on && *depends_on)
4589         bitmap_clear (*depends_on);
4590     }
4591
4592   /* If we are after the increment, the value of the candidate is higher by
4593      one iteration.  */
4594   if (stmt_is_after_inc)
4595     offset -= ratio * cstepi;
4596
4597   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4598      (symbol/var1/const parts may be omitted).  If we are looking for an
4599      address, find the cost of addressing this.  */
4600   if (address_p)
4601     return add_costs (cost,
4602                       get_address_cost (symbol_present, var_present,
4603                                         offset, ratio, cstepi,
4604                                         mem_mode,
4605                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4606                                         speed, stmt_is_after_inc,
4607                                         can_autoinc));
4608
4609   /* Otherwise estimate the costs for computing the expression.  */
4610   if (!symbol_present && !var_present && !offset)
4611     {
4612       if (ratio != 1)
4613         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4614       return cost;
4615     }
4616
4617   /* Symbol + offset should be compile-time computable so consider that they
4618       are added once to the variable, if present.  */
4619   if (var_present && (symbol_present || offset))
4620     cost.cost += adjust_setup_cost (data,
4621                                     add_cost (speed, TYPE_MODE (ctype)));
4622
4623   /* Having offset does not affect runtime cost in case it is added to
4624      symbol, but it increases complexity.  */
4625   if (offset)
4626     cost.complexity++;
4627
4628   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4629
4630   aratio = ratio > 0 ? ratio : -ratio;
4631   if (aratio != 1)
4632     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4633   return cost;
4634
4635 fallback:
4636   if (can_autoinc)
4637     *can_autoinc = false;
4638
4639   {
4640     /* Just get the expression, expand it and measure the cost.  */
4641     tree comp = get_computation_at (data->current_loop, use, cand, at);
4642
4643     if (!comp)
4644       return infinite_cost;
4645
4646     if (address_p)
4647       comp = build_simple_mem_ref (comp);
4648
4649     return new_cost (computation_cost (comp, speed), 0);
4650   }
4651 }
4652
4653 /* Determines the cost of the computation by that USE is expressed
4654    from induction variable CAND.  If ADDRESS_P is true, we just need
4655    to create an address from it, otherwise we want to get it into
4656    register.  A set of invariants we depend on is stored in
4657    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4658    autoinc addressing is likely.  */
4659
4660 static comp_cost
4661 get_computation_cost (struct ivopts_data *data,
4662                       struct iv_use *use, struct iv_cand *cand,
4663                       bool address_p, bitmap *depends_on,
4664                       bool *can_autoinc, int *inv_expr_id)
4665 {
4666   return get_computation_cost_at (data,
4667                                   use, cand, address_p, depends_on, use->stmt,
4668                                   can_autoinc, inv_expr_id);
4669 }
4670
4671 /* Determines cost of basing replacement of USE on CAND in a generic
4672    expression.  */
4673
4674 static bool
4675 determine_use_iv_cost_generic (struct ivopts_data *data,
4676                                struct iv_use *use, struct iv_cand *cand)
4677 {
4678   bitmap depends_on;
4679   comp_cost cost;
4680   int inv_expr_id = -1;
4681
4682   /* The simple case first -- if we need to express value of the preserved
4683      original biv, the cost is 0.  This also prevents us from counting the
4684      cost of increment twice -- once at this use and once in the cost of
4685      the candidate.  */
4686   if (cand->pos == IP_ORIGINAL
4687       && cand->incremented_at == use->stmt)
4688     {
4689       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4690                        ERROR_MARK, -1);
4691       return true;
4692     }
4693
4694   cost = get_computation_cost (data, use, cand, false, &depends_on,
4695                                NULL, &inv_expr_id);
4696
4697   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4698                    inv_expr_id);
4699
4700   return !infinite_cost_p (cost);
4701 }
4702
4703 /* Determines cost of basing replacement of USE on CAND in an address.  */
4704
4705 static bool
4706 determine_use_iv_cost_address (struct ivopts_data *data,
4707                                struct iv_use *use, struct iv_cand *cand)
4708 {
4709   bitmap depends_on;
4710   bool can_autoinc;
4711   int inv_expr_id = -1;
4712   struct iv_use *sub_use;
4713   comp_cost sub_cost;
4714   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4715                                          &can_autoinc, &inv_expr_id);
4716
4717   if (cand->ainc_use == use)
4718     {
4719       if (can_autoinc)
4720         cost.cost -= cand->cost_step;
4721       /* If we generated the candidate solely for exploiting autoincrement
4722          opportunities, and it turns out it can't be used, set the cost to
4723          infinity to make sure we ignore it.  */
4724       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4725         cost = infinite_cost;
4726     }
4727   for (sub_use = use->next;
4728        sub_use && !infinite_cost_p (cost);
4729        sub_use = sub_use->next)
4730     {
4731        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4732                                         &can_autoinc, &inv_expr_id);
4733        cost = add_costs (cost, sub_cost);
4734     }
4735
4736   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4737                    inv_expr_id);
4738
4739   return !infinite_cost_p (cost);
4740 }
4741
4742 /* Computes value of candidate CAND at position AT in iteration NITER, and
4743    stores it to VAL.  */
4744
4745 static void
4746 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4747                aff_tree *val)
4748 {
4749   aff_tree step, delta, nit;
4750   struct iv *iv = cand->iv;
4751   tree type = TREE_TYPE (iv->base);
4752   tree steptype = type;
4753   if (POINTER_TYPE_P (type))
4754     steptype = sizetype;
4755   steptype = unsigned_type_for (type);
4756
4757   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4758   aff_combination_convert (&step, steptype);
4759   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4760   aff_combination_convert (&nit, steptype);
4761   aff_combination_mult (&nit, &step, &delta);
4762   if (stmt_after_increment (loop, cand, at))
4763     aff_combination_add (&delta, &step);
4764
4765   tree_to_aff_combination (iv->base, type, val);
4766   if (!POINTER_TYPE_P (type))
4767     aff_combination_convert (val, steptype);
4768   aff_combination_add (val, &delta);
4769 }
4770
4771 /* Returns period of induction variable iv.  */
4772
4773 static tree
4774 iv_period (struct iv *iv)
4775 {
4776   tree step = iv->step, period, type;
4777   tree pow2div;
4778
4779   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4780
4781   type = unsigned_type_for (TREE_TYPE (step));
4782   /* Period of the iv is lcm (step, type_range)/step -1,
4783      i.e., N*type_range/step - 1. Since type range is power
4784      of two, N == (step >> num_of_ending_zeros_binary (step),
4785      so the final result is
4786
4787        (type_range >> num_of_ending_zeros_binary (step)) - 1
4788
4789   */
4790   pow2div = num_ending_zeros (step);
4791
4792   period = build_low_bits_mask (type,
4793                                 (TYPE_PRECISION (type)
4794                                  - tree_to_uhwi (pow2div)));
4795
4796   return period;
4797 }
4798
4799 /* Returns the comparison operator used when eliminating the iv USE.  */
4800
4801 static enum tree_code
4802 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4803 {
4804   struct loop *loop = data->current_loop;
4805   basic_block ex_bb;
4806   edge exit;
4807
4808   ex_bb = gimple_bb (use->stmt);
4809   exit = EDGE_SUCC (ex_bb, 0);
4810   if (flow_bb_inside_loop_p (loop, exit->dest))
4811     exit = EDGE_SUCC (ex_bb, 1);
4812
4813   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4814 }
4815
4816 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4817    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4818    calculation is performed in non-wrapping type.
4819
4820    TODO: More generally, we could test for the situation that
4821          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4822          This would require knowing the sign of OFFSET.  */
4823
4824 static bool
4825 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4826 {
4827   enum tree_code code;
4828   tree e1, e2;
4829   aff_tree aff_e1, aff_e2, aff_offset;
4830
4831   if (!nowrap_type_p (TREE_TYPE (base)))
4832     return false;
4833
4834   base = expand_simple_operations (base);
4835
4836   if (TREE_CODE (base) == SSA_NAME)
4837     {
4838       gimple stmt = SSA_NAME_DEF_STMT (base);
4839
4840       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4841         return false;
4842
4843       code = gimple_assign_rhs_code (stmt);
4844       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4845         return false;
4846
4847       e1 = gimple_assign_rhs1 (stmt);
4848       e2 = gimple_assign_rhs2 (stmt);
4849     }
4850   else
4851     {
4852       code = TREE_CODE (base);
4853       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4854         return false;
4855       e1 = TREE_OPERAND (base, 0);
4856       e2 = TREE_OPERAND (base, 1);
4857     }
4858
4859   /* Use affine expansion as deeper inspection to prove the equality.  */
4860   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4861                                   &aff_e2, &data->name_expansion_cache);
4862   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4863                                   &aff_offset, &data->name_expansion_cache);
4864   aff_combination_scale (&aff_offset, -1);
4865   switch (code)
4866     {
4867     case PLUS_EXPR:
4868       aff_combination_add (&aff_e2, &aff_offset);
4869       if (aff_combination_zero_p (&aff_e2))
4870         return true;
4871
4872       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4873                                       &aff_e1, &data->name_expansion_cache);
4874       aff_combination_add (&aff_e1, &aff_offset);
4875       return aff_combination_zero_p (&aff_e1);
4876
4877     case POINTER_PLUS_EXPR:
4878       aff_combination_add (&aff_e2, &aff_offset);
4879       return aff_combination_zero_p (&aff_e2);
4880
4881     default:
4882       return false;
4883     }
4884 }
4885
4886 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4887    comparison with CAND.  NITER describes the number of iterations of
4888    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4889
4890    We aim to handle the following situation:
4891
4892    sometype *base, *p;
4893    int a, b, i;
4894
4895    i = a;
4896    p = p_0 = base + a;
4897
4898    do
4899      {
4900        bla (*p);
4901        p++;
4902        i++;
4903      }
4904    while (i < b);
4905
4906    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4907    We aim to optimize this to
4908
4909    p = p_0 = base + a;
4910    do
4911      {
4912        bla (*p);
4913        p++;
4914      }
4915    while (p < p_0 - a + b);
4916
4917    This preserves the correctness, since the pointer arithmetics does not
4918    overflow.  More precisely:
4919
4920    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4921       overflow in computing it or the values of p.
4922    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4923       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4924
4925 static bool
4926 iv_elimination_compare_lt (struct ivopts_data *data,
4927                            struct iv_cand *cand, enum tree_code *comp_p,
4928                            struct tree_niter_desc *niter)
4929 {
4930   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4931   struct aff_tree nit, tmpa, tmpb;
4932   enum tree_code comp;
4933   HOST_WIDE_INT step;
4934
4935   /* We need to know that the candidate induction variable does not overflow.
4936      While more complex analysis may be used to prove this, for now just
4937      check that the variable appears in the original program and that it
4938      is computed in a type that guarantees no overflows.  */
4939   cand_type = TREE_TYPE (cand->iv->base);
4940   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4941     return false;
4942
4943   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4944      the calculation of the BOUND could overflow, making the comparison
4945      invalid.  */
4946   if (!data->loop_single_exit_p)
4947     return false;
4948
4949   /* We need to be able to decide whether candidate is increasing or decreasing
4950      in order to choose the right comparison operator.  */
4951   if (!cst_and_fits_in_hwi (cand->iv->step))
4952     return false;
4953   step = int_cst_value (cand->iv->step);
4954
4955   /* Check that the number of iterations matches the expected pattern:
4956      a + 1 > b ? 0 : b - a - 1.  */
4957   mbz = niter->may_be_zero;
4958   if (TREE_CODE (mbz) == GT_EXPR)
4959     {
4960       /* Handle a + 1 > b.  */
4961       tree op0 = TREE_OPERAND (mbz, 0);
4962       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4963         {
4964           a = TREE_OPERAND (op0, 0);
4965           b = TREE_OPERAND (mbz, 1);
4966         }
4967       else
4968         return false;
4969     }
4970   else if (TREE_CODE (mbz) == LT_EXPR)
4971     {
4972       tree op1 = TREE_OPERAND (mbz, 1);
4973
4974       /* Handle b < a + 1.  */
4975       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4976         {
4977           a = TREE_OPERAND (op1, 0);
4978           b = TREE_OPERAND (mbz, 0);
4979         }
4980       else
4981         return false;
4982     }
4983   else
4984     return false;
4985
4986   /* Expected number of iterations is B - A - 1.  Check that it matches
4987      the actual number, i.e., that B - A - NITER = 1.  */
4988   tree_to_aff_combination (niter->niter, nit_type, &nit);
4989   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4990   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4991   aff_combination_scale (&nit, -1);
4992   aff_combination_scale (&tmpa, -1);
4993   aff_combination_add (&tmpb, &tmpa);
4994   aff_combination_add (&tmpb, &nit);
4995   if (tmpb.n != 0 || tmpb.offset != 1)
4996     return false;
4997
4998   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4999      overflow.  */
5000   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5001                         cand->iv->step,
5002                         fold_convert (TREE_TYPE (cand->iv->step), a));
5003   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5004     return false;
5005
5006   /* Determine the new comparison operator.  */
5007   comp = step < 0 ? GT_EXPR : LT_EXPR;
5008   if (*comp_p == NE_EXPR)
5009     *comp_p = comp;
5010   else if (*comp_p == EQ_EXPR)
5011     *comp_p = invert_tree_comparison (comp, false);
5012   else
5013     gcc_unreachable ();
5014
5015   return true;
5016 }
5017
5018 /* Check whether it is possible to express the condition in USE by comparison
5019    of candidate CAND.  If so, store the value compared with to BOUND, and the
5020    comparison operator to COMP.  */
5021
5022 static bool
5023 may_eliminate_iv (struct ivopts_data *data,
5024                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5025                   enum tree_code *comp)
5026 {
5027   basic_block ex_bb;
5028   edge exit;
5029   tree period;
5030   struct loop *loop = data->current_loop;
5031   aff_tree bnd;
5032   struct tree_niter_desc *desc = NULL;
5033
5034   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5035     return false;
5036
5037   /* For now works only for exits that dominate the loop latch.
5038      TODO: extend to other conditions inside loop body.  */
5039   ex_bb = gimple_bb (use->stmt);
5040   if (use->stmt != last_stmt (ex_bb)
5041       || gimple_code (use->stmt) != GIMPLE_COND
5042       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5043     return false;
5044
5045   exit = EDGE_SUCC (ex_bb, 0);
5046   if (flow_bb_inside_loop_p (loop, exit->dest))
5047     exit = EDGE_SUCC (ex_bb, 1);
5048   if (flow_bb_inside_loop_p (loop, exit->dest))
5049     return false;
5050
5051   desc = niter_for_exit (data, exit);
5052   if (!desc)
5053     return false;
5054
5055   /* Determine whether we can use the variable to test the exit condition.
5056      This is the case iff the period of the induction variable is greater
5057      than the number of iterations for which the exit condition is true.  */
5058   period = iv_period (cand->iv);
5059
5060   /* If the number of iterations is constant, compare against it directly.  */
5061   if (TREE_CODE (desc->niter) == INTEGER_CST)
5062     {
5063       /* See cand_value_at.  */
5064       if (stmt_after_increment (loop, cand, use->stmt))
5065         {
5066           if (!tree_int_cst_lt (desc->niter, period))
5067             return false;
5068         }
5069       else
5070         {
5071           if (tree_int_cst_lt (period, desc->niter))
5072             return false;
5073         }
5074     }
5075
5076   /* If not, and if this is the only possible exit of the loop, see whether
5077      we can get a conservative estimate on the number of iterations of the
5078      entire loop and compare against that instead.  */
5079   else
5080     {
5081       widest_int period_value, max_niter;
5082
5083       max_niter = desc->max;
5084       if (stmt_after_increment (loop, cand, use->stmt))
5085         max_niter += 1;
5086       period_value = wi::to_widest (period);
5087       if (wi::gtu_p (max_niter, period_value))
5088         {
5089           /* See if we can take advantage of inferred loop bound information.  */
5090           if (data->loop_single_exit_p)
5091             {
5092               if (!max_loop_iterations (loop, &max_niter))
5093                 return false;
5094               /* The loop bound is already adjusted by adding 1.  */
5095               if (wi::gtu_p (max_niter, period_value))
5096                 return false;
5097             }
5098           else
5099             return false;
5100         }
5101     }
5102
5103   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5104
5105   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5106                          aff_combination_to_tree (&bnd));
5107   *comp = iv_elimination_compare (data, use);
5108
5109   /* It is unlikely that computing the number of iterations using division
5110      would be more profitable than keeping the original induction variable.  */
5111   if (expression_expensive_p (*bound))
5112     return false;
5113
5114   /* Sometimes, it is possible to handle the situation that the number of
5115      iterations may be zero unless additional assumtions by using <
5116      instead of != in the exit condition.
5117
5118      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5119            base the exit condition on it.  However, that is often too
5120            expensive.  */
5121   if (!integer_zerop (desc->may_be_zero))
5122     return iv_elimination_compare_lt (data, cand, comp, desc);
5123
5124   return true;
5125 }
5126
5127  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5128     be copied, if is is used in the loop body and DATA->body_includes_call.  */
5129
5130 static int
5131 parm_decl_cost (struct ivopts_data *data, tree bound)
5132 {
5133   tree sbound = bound;
5134   STRIP_NOPS (sbound);
5135
5136   if (TREE_CODE (sbound) == SSA_NAME
5137       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5138       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5139       && data->body_includes_call)
5140     return COSTS_N_INSNS (1);
5141
5142   return 0;
5143 }
5144
5145 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5146
5147 static bool
5148 determine_use_iv_cost_condition (struct ivopts_data *data,
5149                                  struct iv_use *use, struct iv_cand *cand)
5150 {
5151   tree bound = NULL_TREE;
5152   struct iv *cmp_iv;
5153   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5154   comp_cost elim_cost, express_cost, cost, bound_cost;
5155   bool ok;
5156   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5157   tree *control_var, *bound_cst;
5158   enum tree_code comp = ERROR_MARK;
5159
5160   /* Only consider real candidates.  */
5161   if (!cand->iv)
5162     {
5163       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5164                        ERROR_MARK, -1);
5165       return false;
5166     }
5167
5168   /* Try iv elimination.  */
5169   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5170     {
5171       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5172       if (elim_cost.cost == 0)
5173         elim_cost.cost = parm_decl_cost (data, bound);
5174       else if (TREE_CODE (bound) == INTEGER_CST)
5175         elim_cost.cost = 0;
5176       /* If we replace a loop condition 'i < n' with 'p < base + n',
5177          depends_on_elim will have 'base' and 'n' set, which implies
5178          that both 'base' and 'n' will be live during the loop.  More likely,
5179          'base + n' will be loop invariant, resulting in only one live value
5180          during the loop.  So in that case we clear depends_on_elim and set
5181         elim_inv_expr_id instead.  */
5182       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5183         {
5184           elim_inv_expr_id = get_expr_id (data, bound);
5185           bitmap_clear (depends_on_elim);
5186         }
5187       /* The bound is a loop invariant, so it will be only computed
5188          once.  */
5189       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5190     }
5191   else
5192     elim_cost = infinite_cost;
5193
5194   /* Try expressing the original giv.  If it is compared with an invariant,
5195      note that we cannot get rid of it.  */
5196   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5197                               NULL, &cmp_iv);
5198   gcc_assert (ok);
5199
5200   /* When the condition is a comparison of the candidate IV against
5201      zero, prefer this IV.
5202
5203      TODO: The constant that we're subtracting from the cost should
5204      be target-dependent.  This information should be added to the
5205      target costs for each backend.  */
5206   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5207       && integer_zerop (*bound_cst)
5208       && (operand_equal_p (*control_var, cand->var_after, 0)
5209           || operand_equal_p (*control_var, cand->var_before, 0)))
5210     elim_cost.cost -= 1;
5211
5212   express_cost = get_computation_cost (data, use, cand, false,
5213                                        &depends_on_express, NULL,
5214                                        &express_inv_expr_id);
5215   fd_ivopts_data = data;
5216   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5217
5218   /* Count the cost of the original bound as well.  */
5219   bound_cost = force_var_cost (data, *bound_cst, NULL);
5220   if (bound_cost.cost == 0)
5221     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5222   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5223     bound_cost.cost = 0;
5224   express_cost.cost += bound_cost.cost;
5225
5226   /* Choose the better approach, preferring the eliminated IV. */
5227   if (compare_costs (elim_cost, express_cost) <= 0)
5228     {
5229       cost = elim_cost;
5230       depends_on = depends_on_elim;
5231       depends_on_elim = NULL;
5232       inv_expr_id = elim_inv_expr_id;
5233     }
5234   else
5235     {
5236       cost = express_cost;
5237       depends_on = depends_on_express;
5238       depends_on_express = NULL;
5239       bound = NULL_TREE;
5240       comp = ERROR_MARK;
5241       inv_expr_id = express_inv_expr_id;
5242     }
5243
5244   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5245
5246   if (depends_on_elim)
5247     BITMAP_FREE (depends_on_elim);
5248   if (depends_on_express)
5249     BITMAP_FREE (depends_on_express);
5250
5251   return !infinite_cost_p (cost);
5252 }
5253
5254 /* Determines cost of basing replacement of USE on CAND.  Returns false
5255    if USE cannot be based on CAND.  */
5256
5257 static bool
5258 determine_use_iv_cost (struct ivopts_data *data,
5259                        struct iv_use *use, struct iv_cand *cand)
5260 {
5261   switch (use->type)
5262     {
5263     case USE_NONLINEAR_EXPR:
5264       return determine_use_iv_cost_generic (data, use, cand);
5265
5266     case USE_ADDRESS:
5267       return determine_use_iv_cost_address (data, use, cand);
5268
5269     case USE_COMPARE:
5270       return determine_use_iv_cost_condition (data, use, cand);
5271
5272     default:
5273       gcc_unreachable ();
5274     }
5275 }
5276
5277 /* Return true if get_computation_cost indicates that autoincrement is
5278    a possibility for the pair of USE and CAND, false otherwise.  */
5279
5280 static bool
5281 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5282                            struct iv_cand *cand)
5283 {
5284   bitmap depends_on;
5285   bool can_autoinc;
5286   comp_cost cost;
5287
5288   if (use->type != USE_ADDRESS)
5289     return false;
5290
5291   cost = get_computation_cost (data, use, cand, true, &depends_on,
5292                                &can_autoinc, NULL);
5293
5294   BITMAP_FREE (depends_on);
5295
5296   return !infinite_cost_p (cost) && can_autoinc;
5297 }
5298
5299 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5300    use that allows autoincrement, and set their AINC_USE if possible.  */
5301
5302 static void
5303 set_autoinc_for_original_candidates (struct ivopts_data *data)
5304 {
5305   unsigned i, j;
5306
5307   for (i = 0; i < n_iv_cands (data); i++)
5308     {
5309       struct iv_cand *cand = iv_cand (data, i);
5310       struct iv_use *closest_before = NULL;
5311       struct iv_use *closest_after = NULL;
5312       if (cand->pos != IP_ORIGINAL)
5313         continue;
5314
5315       for (j = 0; j < n_iv_uses (data); j++)
5316         {
5317           struct iv_use *use = iv_use (data, j);
5318           unsigned uid = gimple_uid (use->stmt);
5319
5320           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5321             continue;
5322
5323           if (uid < gimple_uid (cand->incremented_at)
5324               && (closest_before == NULL
5325                   || uid > gimple_uid (closest_before->stmt)))
5326             closest_before = use;
5327
5328           if (uid > gimple_uid (cand->incremented_at)
5329               && (closest_after == NULL
5330                   || uid < gimple_uid (closest_after->stmt)))
5331             closest_after = use;
5332         }
5333
5334       if (closest_before != NULL
5335           && autoinc_possible_for_pair (data, closest_before, cand))
5336         cand->ainc_use = closest_before;
5337       else if (closest_after != NULL
5338                && autoinc_possible_for_pair (data, closest_after, cand))
5339         cand->ainc_use = closest_after;
5340     }
5341 }
5342
5343 /* Finds the candidates for the induction variables.  */
5344
5345 static void
5346 find_iv_candidates (struct ivopts_data *data)
5347 {
5348   /* Add commonly used ivs.  */
5349   add_standard_iv_candidates (data);
5350
5351   /* Add old induction variables.  */
5352   add_old_ivs_candidates (data);
5353
5354   /* Add induction variables derived from uses.  */
5355   add_derived_ivs_candidates (data);
5356
5357   set_autoinc_for_original_candidates (data);
5358
5359   /* Record the important candidates.  */
5360   record_important_candidates (data);
5361 }
5362
5363 /* Determines costs of basing the use of the iv on an iv candidate.  */
5364
5365 static void
5366 determine_use_iv_costs (struct ivopts_data *data)
5367 {
5368   unsigned i, j;
5369   struct iv_use *use;
5370   struct iv_cand *cand;
5371   bitmap to_clear = BITMAP_ALLOC (NULL);
5372
5373   alloc_use_cost_map (data);
5374
5375   for (i = 0; i < n_iv_uses (data); i++)
5376     {
5377       use = iv_use (data, i);
5378
5379       if (data->consider_all_candidates)
5380         {
5381           for (j = 0; j < n_iv_cands (data); j++)
5382             {
5383               cand = iv_cand (data, j);
5384               determine_use_iv_cost (data, use, cand);
5385             }
5386         }
5387       else
5388         {
5389           bitmap_iterator bi;
5390
5391           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5392             {
5393               cand = iv_cand (data, j);
5394               if (!determine_use_iv_cost (data, use, cand))
5395                 bitmap_set_bit (to_clear, j);
5396             }
5397
5398           /* Remove the candidates for that the cost is infinite from
5399              the list of related candidates.  */
5400           bitmap_and_compl_into (use->related_cands, to_clear);
5401           bitmap_clear (to_clear);
5402         }
5403     }
5404
5405   BITMAP_FREE (to_clear);
5406
5407   if (dump_file && (dump_flags & TDF_DETAILS))
5408     {
5409       fprintf (dump_file, "Use-candidate costs:\n");
5410
5411       for (i = 0; i < n_iv_uses (data); i++)
5412         {
5413           use = iv_use (data, i);
5414
5415           fprintf (dump_file, "Use %d:\n", i);
5416           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5417           for (j = 0; j < use->n_map_members; j++)
5418             {
5419               if (!use->cost_map[j].cand
5420                   || infinite_cost_p (use->cost_map[j].cost))
5421                 continue;
5422
5423               fprintf (dump_file, "  %d\t%d\t%d\t",
5424                        use->cost_map[j].cand->id,
5425                        use->cost_map[j].cost.cost,
5426                        use->cost_map[j].cost.complexity);
5427               if (use->cost_map[j].depends_on)
5428                 bitmap_print (dump_file,
5429                               use->cost_map[j].depends_on, "","");
5430               if (use->cost_map[j].inv_expr_id != -1)
5431                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5432               fprintf (dump_file, "\n");
5433             }
5434
5435           fprintf (dump_file, "\n");
5436         }
5437       fprintf (dump_file, "\n");
5438     }
5439 }
5440
5441 /* Determines cost of the candidate CAND.  */
5442
5443 static void
5444 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5445 {
5446   comp_cost cost_base;
5447   unsigned cost, cost_step;
5448   tree base;
5449
5450   if (!cand->iv)
5451     {
5452       cand->cost = 0;
5453       return;
5454     }
5455
5456   /* There are two costs associated with the candidate -- its increment
5457      and its initialization.  The second is almost negligible for any loop
5458      that rolls enough, so we take it just very little into account.  */
5459
5460   base = cand->iv->base;
5461   cost_base = force_var_cost (data, base, NULL);
5462   /* It will be exceptional that the iv register happens to be initialized with
5463      the proper value at no cost.  In general, there will at least be a regcopy
5464      or a const set.  */
5465   if (cost_base.cost == 0)
5466     cost_base.cost = COSTS_N_INSNS (1);
5467   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5468
5469   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5470
5471   /* Prefer the original ivs unless we may gain something by replacing it.
5472      The reason is to make debugging simpler; so this is not relevant for
5473      artificial ivs created by other optimization passes.  */
5474   if (cand->pos != IP_ORIGINAL
5475       || !SSA_NAME_VAR (cand->var_before)
5476       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5477     cost++;
5478
5479   /* Prefer not to insert statements into latch unless there are some
5480      already (so that we do not create unnecessary jumps).  */
5481   if (cand->pos == IP_END
5482       && empty_block_p (ip_end_pos (data->current_loop)))
5483     cost++;
5484
5485   cand->cost = cost;
5486   cand->cost_step = cost_step;
5487 }
5488
5489 /* Determines costs of computation of the candidates.  */
5490
5491 static void
5492 determine_iv_costs (struct ivopts_data *data)
5493 {
5494   unsigned i;
5495
5496   if (dump_file && (dump_flags & TDF_DETAILS))
5497     {
5498       fprintf (dump_file, "Candidate costs:\n");
5499       fprintf (dump_file, "  cand\tcost\n");
5500     }
5501
5502   for (i = 0; i < n_iv_cands (data); i++)
5503     {
5504       struct iv_cand *cand = iv_cand (data, i);
5505
5506       determine_iv_cost (data, cand);
5507
5508       if (dump_file && (dump_flags & TDF_DETAILS))
5509         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5510     }
5511
5512   if (dump_file && (dump_flags & TDF_DETAILS))
5513     fprintf (dump_file, "\n");
5514 }
5515
5516 /* Calculates cost for having SIZE induction variables.  */
5517
5518 static unsigned
5519 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5520 {
5521   /* We add size to the cost, so that we prefer eliminating ivs
5522      if possible.  */
5523   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5524                                             data->body_includes_call);
5525 }
5526
5527 /* For each size of the induction variable set determine the penalty.  */
5528
5529 static void
5530 determine_set_costs (struct ivopts_data *data)
5531 {
5532   unsigned j, n;
5533   gphi *phi;
5534   gphi_iterator psi;
5535   tree op;
5536   struct loop *loop = data->current_loop;
5537   bitmap_iterator bi;
5538
5539   if (dump_file && (dump_flags & TDF_DETAILS))
5540     {
5541       fprintf (dump_file, "Global costs:\n");
5542       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5543       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5544       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5545       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5546     }
5547
5548   n = 0;
5549   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5550     {
5551       phi = psi.phi ();
5552       op = PHI_RESULT (phi);
5553
5554       if (virtual_operand_p (op))
5555         continue;
5556
5557       if (get_iv (data, op))
5558         continue;
5559
5560       n++;
5561     }
5562
5563   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5564     {
5565       struct version_info *info = ver_info (data, j);
5566
5567       if (info->inv_id && info->has_nonlin_use)
5568         n++;
5569     }
5570
5571   data->regs_used = n;
5572   if (dump_file && (dump_flags & TDF_DETAILS))
5573     fprintf (dump_file, "  regs_used %d\n", n);
5574
5575   if (dump_file && (dump_flags & TDF_DETAILS))
5576     {
5577       fprintf (dump_file, "  cost for size:\n");
5578       fprintf (dump_file, "  ivs\tcost\n");
5579       for (j = 0; j <= 2 * target_avail_regs; j++)
5580         fprintf (dump_file, "  %d\t%d\n", j,
5581                  ivopts_global_cost_for_size (data, j));
5582       fprintf (dump_file, "\n");
5583     }
5584 }
5585
5586 /* Returns true if A is a cheaper cost pair than B.  */
5587
5588 static bool
5589 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5590 {
5591   int cmp;
5592
5593   if (!a)
5594     return false;
5595
5596   if (!b)
5597     return true;
5598
5599   cmp = compare_costs (a->cost, b->cost);
5600   if (cmp < 0)
5601     return true;
5602
5603   if (cmp > 0)
5604     return false;
5605
5606   /* In case the costs are the same, prefer the cheaper candidate.  */
5607   if (a->cand->cost < b->cand->cost)
5608     return true;
5609
5610   return false;
5611 }
5612
5613
5614 /* Returns candidate by that USE is expressed in IVS.  */
5615
5616 static struct cost_pair *
5617 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5618 {
5619   return ivs->cand_for_use[use->id];
5620 }
5621
5622 /* Computes the cost field of IVS structure.  */
5623
5624 static void
5625 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5626 {
5627   comp_cost cost = ivs->cand_use_cost;
5628
5629   cost.cost += ivs->cand_cost;
5630
5631   cost.cost += ivopts_global_cost_for_size (data,
5632                                             ivs->n_regs + ivs->num_used_inv_expr);
5633
5634   ivs->cost = cost;
5635 }
5636
5637 /* Remove invariants in set INVS to set IVS.  */
5638
5639 static void
5640 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5641 {
5642   bitmap_iterator bi;
5643   unsigned iid;
5644
5645   if (!invs)
5646     return;
5647
5648   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5649     {
5650       ivs->n_invariant_uses[iid]--;
5651       if (ivs->n_invariant_uses[iid] == 0)
5652         ivs->n_regs--;
5653     }
5654 }
5655
5656 /* Set USE not to be expressed by any candidate in IVS.  */
5657
5658 static void
5659 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5660                  struct iv_use *use)
5661 {
5662   unsigned uid = use->id, cid;
5663   struct cost_pair *cp;
5664
5665   cp = ivs->cand_for_use[uid];
5666   if (!cp)
5667     return;
5668   cid = cp->cand->id;
5669
5670   ivs->bad_uses++;
5671   ivs->cand_for_use[uid] = NULL;
5672   ivs->n_cand_uses[cid]--;
5673
5674   if (ivs->n_cand_uses[cid] == 0)
5675     {
5676       bitmap_clear_bit (ivs->cands, cid);
5677       /* Do not count the pseudocandidates.  */
5678       if (cp->cand->iv)
5679         ivs->n_regs--;
5680       ivs->n_cands--;
5681       ivs->cand_cost -= cp->cand->cost;
5682
5683       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5684     }
5685
5686   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5687
5688   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5689
5690   if (cp->inv_expr_id != -1)
5691     {
5692       ivs->used_inv_expr[cp->inv_expr_id]--;
5693       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5694         ivs->num_used_inv_expr--;
5695     }
5696   iv_ca_recount_cost (data, ivs);
5697 }
5698
5699 /* Add invariants in set INVS to set IVS.  */
5700
5701 static void
5702 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5703 {
5704   bitmap_iterator bi;
5705   unsigned iid;
5706
5707   if (!invs)
5708     return;
5709
5710   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5711     {
5712       ivs->n_invariant_uses[iid]++;
5713       if (ivs->n_invariant_uses[iid] == 1)
5714         ivs->n_regs++;
5715     }
5716 }
5717
5718 /* Set cost pair for USE in set IVS to CP.  */
5719
5720 static void
5721 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5722               struct iv_use *use, struct cost_pair *cp)
5723 {
5724   unsigned uid = use->id, cid;
5725
5726   if (ivs->cand_for_use[uid] == cp)
5727     return;
5728
5729   if (ivs->cand_for_use[uid])
5730     iv_ca_set_no_cp (data, ivs, use);
5731
5732   if (cp)
5733     {
5734       cid = cp->cand->id;
5735
5736       ivs->bad_uses--;
5737       ivs->cand_for_use[uid] = cp;
5738       ivs->n_cand_uses[cid]++;
5739       if (ivs->n_cand_uses[cid] == 1)
5740         {
5741           bitmap_set_bit (ivs->cands, cid);
5742           /* Do not count the pseudocandidates.  */
5743           if (cp->cand->iv)
5744             ivs->n_regs++;
5745           ivs->n_cands++;
5746           ivs->cand_cost += cp->cand->cost;
5747
5748           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5749         }
5750
5751       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5752       iv_ca_set_add_invariants (ivs, cp->depends_on);
5753
5754       if (cp->inv_expr_id != -1)
5755         {
5756           ivs->used_inv_expr[cp->inv_expr_id]++;
5757           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5758             ivs->num_used_inv_expr++;
5759         }
5760       iv_ca_recount_cost (data, ivs);
5761     }
5762 }
5763
5764 /* Extend set IVS by expressing USE by some of the candidates in it
5765    if possible.  Consider all important candidates if candidates in
5766    set IVS don't give any result.  */
5767
5768 static void
5769 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5770                struct iv_use *use)
5771 {
5772   struct cost_pair *best_cp = NULL, *cp;
5773   bitmap_iterator bi;
5774   unsigned i;
5775   struct iv_cand *cand;
5776
5777   gcc_assert (ivs->upto >= use->id);
5778   ivs->upto++;
5779   ivs->bad_uses++;
5780
5781   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5782     {
5783       cand = iv_cand (data, i);
5784       cp = get_use_iv_cost (data, use, cand);
5785       if (cheaper_cost_pair (cp, best_cp))
5786         best_cp = cp;
5787     }
5788
5789   if (best_cp == NULL)
5790     {
5791       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5792         {
5793           cand = iv_cand (data, i);
5794           cp = get_use_iv_cost (data, use, cand);
5795           if (cheaper_cost_pair (cp, best_cp))
5796             best_cp = cp;
5797         }
5798     }
5799
5800   iv_ca_set_cp (data, ivs, use, best_cp);
5801 }
5802
5803 /* Get cost for assignment IVS.  */
5804
5805 static comp_cost
5806 iv_ca_cost (struct iv_ca *ivs)
5807 {
5808   /* This was a conditional expression but it triggered a bug in
5809      Sun C 5.5.  */
5810   if (ivs->bad_uses)
5811     return infinite_cost;
5812   else
5813     return ivs->cost;
5814 }
5815
5816 /* Returns true if all dependences of CP are among invariants in IVS.  */
5817
5818 static bool
5819 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5820 {
5821   unsigned i;
5822   bitmap_iterator bi;
5823
5824   if (!cp->depends_on)
5825     return true;
5826
5827   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5828     {
5829       if (ivs->n_invariant_uses[i] == 0)
5830         return false;
5831     }
5832
5833   return true;
5834 }
5835
5836 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5837    it before NEXT_CHANGE.  */
5838
5839 static struct iv_ca_delta *
5840 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5841                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5842 {
5843   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5844
5845   change->use = use;
5846   change->old_cp = old_cp;
5847   change->new_cp = new_cp;
5848   change->next_change = next_change;
5849
5850   return change;
5851 }
5852
5853 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5854    are rewritten.  */
5855
5856 static struct iv_ca_delta *
5857 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5858 {
5859   struct iv_ca_delta *last;
5860
5861   if (!l2)
5862     return l1;
5863
5864   if (!l1)
5865     return l2;
5866
5867   for (last = l1; last->next_change; last = last->next_change)
5868     continue;
5869   last->next_change = l2;
5870
5871   return l1;
5872 }
5873
5874 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5875
5876 static struct iv_ca_delta *
5877 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5878 {
5879   struct iv_ca_delta *act, *next, *prev = NULL;
5880
5881   for (act = delta; act; act = next)
5882     {
5883       next = act->next_change;
5884       act->next_change = prev;
5885       prev = act;
5886
5887       std::swap (act->old_cp, act->new_cp);
5888     }
5889
5890   return prev;
5891 }
5892
5893 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5894    reverted instead.  */
5895
5896 static void
5897 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5898                     struct iv_ca_delta *delta, bool forward)
5899 {
5900   struct cost_pair *from, *to;
5901   struct iv_ca_delta *act;
5902
5903   if (!forward)
5904     delta = iv_ca_delta_reverse (delta);
5905
5906   for (act = delta; act; act = act->next_change)
5907     {
5908       from = act->old_cp;
5909       to = act->new_cp;
5910       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5911       iv_ca_set_cp (data, ivs, act->use, to);
5912     }
5913
5914   if (!forward)
5915     iv_ca_delta_reverse (delta);
5916 }
5917
5918 /* Returns true if CAND is used in IVS.  */
5919
5920 static bool
5921 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5922 {
5923   return ivs->n_cand_uses[cand->id] > 0;
5924 }
5925
5926 /* Returns number of induction variable candidates in the set IVS.  */
5927
5928 static unsigned
5929 iv_ca_n_cands (struct iv_ca *ivs)
5930 {
5931   return ivs->n_cands;
5932 }
5933
5934 /* Free the list of changes DELTA.  */
5935
5936 static void
5937 iv_ca_delta_free (struct iv_ca_delta **delta)
5938 {
5939   struct iv_ca_delta *act, *next;
5940
5941   for (act = *delta; act; act = next)
5942     {
5943       next = act->next_change;
5944       free (act);
5945     }
5946
5947   *delta = NULL;
5948 }
5949
5950 /* Allocates new iv candidates assignment.  */
5951
5952 static struct iv_ca *
5953 iv_ca_new (struct ivopts_data *data)
5954 {
5955   struct iv_ca *nw = XNEW (struct iv_ca);
5956
5957   nw->upto = 0;
5958   nw->bad_uses = 0;
5959   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5960   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5961   nw->cands = BITMAP_ALLOC (NULL);
5962   nw->n_cands = 0;
5963   nw->n_regs = 0;
5964   nw->cand_use_cost = no_cost;
5965   nw->cand_cost = 0;
5966   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5967   nw->cost = no_cost;
5968   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5969   nw->num_used_inv_expr = 0;
5970
5971   return nw;
5972 }
5973
5974 /* Free memory occupied by the set IVS.  */
5975
5976 static void
5977 iv_ca_free (struct iv_ca **ivs)
5978 {
5979   free ((*ivs)->cand_for_use);
5980   free ((*ivs)->n_cand_uses);
5981   BITMAP_FREE ((*ivs)->cands);
5982   free ((*ivs)->n_invariant_uses);
5983   free ((*ivs)->used_inv_expr);
5984   free (*ivs);
5985   *ivs = NULL;
5986 }
5987
5988 /* Dumps IVS to FILE.  */
5989
5990 static void
5991 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5992 {
5993   const char *pref = "  invariants ";
5994   unsigned i;
5995   comp_cost cost = iv_ca_cost (ivs);
5996
5997   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5998   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5999            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6000   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6001
6002    for (i = 0; i < ivs->upto; i++)
6003     {
6004       struct iv_use *use = iv_use (data, i);
6005       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6006       if (cp)
6007         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6008                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6009       else
6010         fprintf (file, "   use:%d --> ??\n", use->id);
6011     }
6012
6013   for (i = 1; i <= data->max_inv_id; i++)
6014     if (ivs->n_invariant_uses[i])
6015       {
6016         fprintf (file, "%s%d", pref, i);
6017         pref = ", ";
6018       }
6019   fprintf (file, "\n\n");
6020 }
6021
6022 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6023    new set, and store differences in DELTA.  Number of induction variables
6024    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6025    the function will try to find a solution with mimimal iv candidates.  */
6026
6027 static comp_cost
6028 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6029               struct iv_cand *cand, struct iv_ca_delta **delta,
6030               unsigned *n_ivs, bool min_ncand)
6031 {
6032   unsigned i;
6033   comp_cost cost;
6034   struct iv_use *use;
6035   struct cost_pair *old_cp, *new_cp;
6036
6037   *delta = NULL;
6038   for (i = 0; i < ivs->upto; i++)
6039     {
6040       use = iv_use (data, i);
6041       old_cp = iv_ca_cand_for_use (ivs, use);
6042
6043       if (old_cp
6044           && old_cp->cand == cand)
6045         continue;
6046
6047       new_cp = get_use_iv_cost (data, use, cand);
6048       if (!new_cp)
6049         continue;
6050
6051       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6052         continue;
6053
6054       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6055         continue;
6056
6057       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6058     }
6059
6060   iv_ca_delta_commit (data, ivs, *delta, true);
6061   cost = iv_ca_cost (ivs);
6062   if (n_ivs)
6063     *n_ivs = iv_ca_n_cands (ivs);
6064   iv_ca_delta_commit (data, ivs, *delta, false);
6065
6066   return cost;
6067 }
6068
6069 /* Try narrowing set IVS by removing CAND.  Return the cost of
6070    the new set and store the differences in DELTA.  START is
6071    the candidate with which we start narrowing.  */
6072
6073 static comp_cost
6074 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6075               struct iv_cand *cand, struct iv_cand *start,
6076               struct iv_ca_delta **delta)
6077 {
6078   unsigned i, ci;
6079   struct iv_use *use;
6080   struct cost_pair *old_cp, *new_cp, *cp;
6081   bitmap_iterator bi;
6082   struct iv_cand *cnd;
6083   comp_cost cost, best_cost, acost;
6084
6085   *delta = NULL;
6086   for (i = 0; i < n_iv_uses (data); i++)
6087     {
6088       use = iv_use (data, i);
6089
6090       old_cp = iv_ca_cand_for_use (ivs, use);
6091       if (old_cp->cand != cand)
6092         continue;
6093
6094       best_cost = iv_ca_cost (ivs);
6095       /* Start narrowing with START.  */
6096       new_cp = get_use_iv_cost (data, use, start);
6097
6098       if (data->consider_all_candidates)
6099         {
6100           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6101             {
6102               if (ci == cand->id || (start && ci == start->id))
6103                 continue;
6104
6105               cnd = iv_cand (data, ci);
6106
6107               cp = get_use_iv_cost (data, use, cnd);
6108               if (!cp)
6109                 continue;
6110
6111               iv_ca_set_cp (data, ivs, use, cp);
6112               acost = iv_ca_cost (ivs);
6113
6114               if (compare_costs (acost, best_cost) < 0)
6115                 {
6116                   best_cost = acost;
6117                   new_cp = cp;
6118                 }
6119             }
6120         }
6121       else
6122         {
6123           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6124             {
6125               if (ci == cand->id || (start && ci == start->id))
6126                 continue;
6127
6128               cnd = iv_cand (data, ci);
6129
6130               cp = get_use_iv_cost (data, use, cnd);
6131               if (!cp)
6132                 continue;
6133
6134               iv_ca_set_cp (data, ivs, use, cp);
6135               acost = iv_ca_cost (ivs);
6136
6137               if (compare_costs (acost, best_cost) < 0)
6138                 {
6139                   best_cost = acost;
6140                   new_cp = cp;
6141                 }
6142             }
6143         }
6144       /* Restore to old cp for use.  */
6145       iv_ca_set_cp (data, ivs, use, old_cp);
6146
6147       if (!new_cp)
6148         {
6149           iv_ca_delta_free (delta);
6150           return infinite_cost;
6151         }
6152
6153       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6154     }
6155
6156   iv_ca_delta_commit (data, ivs, *delta, true);
6157   cost = iv_ca_cost (ivs);
6158   iv_ca_delta_commit (data, ivs, *delta, false);
6159
6160   return cost;
6161 }
6162
6163 /* Try optimizing the set of candidates IVS by removing candidates different
6164    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6165    differences in DELTA.  */
6166
6167 static comp_cost
6168 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6169              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6170 {
6171   bitmap_iterator bi;
6172   struct iv_ca_delta *act_delta, *best_delta;
6173   unsigned i;
6174   comp_cost best_cost, acost;
6175   struct iv_cand *cand;
6176
6177   best_delta = NULL;
6178   best_cost = iv_ca_cost (ivs);
6179
6180   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6181     {
6182       cand = iv_cand (data, i);
6183
6184       if (cand == except_cand)
6185         continue;
6186
6187       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6188
6189       if (compare_costs (acost, best_cost) < 0)
6190         {
6191           best_cost = acost;
6192           iv_ca_delta_free (&best_delta);
6193           best_delta = act_delta;
6194         }
6195       else
6196         iv_ca_delta_free (&act_delta);
6197     }
6198
6199   if (!best_delta)
6200     {
6201       *delta = NULL;
6202       return best_cost;
6203     }
6204
6205   /* Recurse to possibly remove other unnecessary ivs.  */
6206   iv_ca_delta_commit (data, ivs, best_delta, true);
6207   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6208   iv_ca_delta_commit (data, ivs, best_delta, false);
6209   *delta = iv_ca_delta_join (best_delta, *delta);
6210   return best_cost;
6211 }
6212
6213 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6214    cheaper local cost for USE than BEST_CP.  Return pointer to
6215    the corresponding cost_pair, otherwise just return BEST_CP.  */
6216
6217 static struct cost_pair*
6218 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6219                         unsigned int cand_idx, struct iv_cand *old_cand,
6220                         struct cost_pair *best_cp)
6221 {
6222   struct iv_cand *cand;
6223   struct cost_pair *cp;
6224
6225   gcc_assert (old_cand != NULL && best_cp != NULL);
6226   if (cand_idx == old_cand->id)
6227     return best_cp;
6228
6229   cand = iv_cand (data, cand_idx);
6230   cp = get_use_iv_cost (data, use, cand);
6231   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6232     return cp;
6233
6234   return best_cp;
6235 }
6236
6237 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6238    which are used by more than one iv uses.  For each of those candidates,
6239    this function tries to represent iv uses under that candidate using
6240    other ones with lower local cost, then tries to prune the new set.
6241    If the new set has lower cost, It returns the new cost after recording
6242    candidate replacement in list DELTA.  */
6243
6244 static comp_cost
6245 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6246                struct iv_ca_delta **delta)
6247 {
6248   bitmap_iterator bi, bj;
6249   unsigned int i, j, k;
6250   struct iv_use *use;
6251   struct iv_cand *cand;
6252   comp_cost orig_cost, acost;
6253   struct iv_ca_delta *act_delta, *tmp_delta;
6254   struct cost_pair *old_cp, *best_cp = NULL;
6255
6256   *delta = NULL;
6257   orig_cost = iv_ca_cost (ivs);
6258
6259   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6260     {
6261       if (ivs->n_cand_uses[i] == 1
6262           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6263         continue;
6264
6265       cand = iv_cand (data, i);
6266
6267       act_delta = NULL;
6268       /*  Represent uses under current candidate using other ones with
6269           lower local cost.  */
6270       for (j = 0; j < ivs->upto; j++)
6271         {
6272           use = iv_use (data, j);
6273           old_cp = iv_ca_cand_for_use (ivs, use);
6274
6275           if (old_cp->cand != cand)
6276             continue;
6277
6278           best_cp = old_cp;
6279           if (data->consider_all_candidates)
6280             for (k = 0; k < n_iv_cands (data); k++)
6281               best_cp = cheaper_cost_with_cand (data, use, k,
6282                                                 old_cp->cand, best_cp);
6283           else
6284             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6285               best_cp = cheaper_cost_with_cand (data, use, k,
6286                                                 old_cp->cand, best_cp);
6287
6288           if (best_cp == old_cp)
6289             continue;
6290
6291           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6292         }
6293       /* No need for further prune.  */
6294       if (!act_delta)
6295         continue;
6296
6297       /* Prune the new candidate set.  */
6298       iv_ca_delta_commit (data, ivs, act_delta, true);
6299       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6300       iv_ca_delta_commit (data, ivs, act_delta, false);
6301       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6302
6303       if (compare_costs (acost, orig_cost) < 0)
6304         {
6305           *delta = act_delta;
6306           return acost;
6307         }
6308       else
6309         iv_ca_delta_free (&act_delta);
6310     }
6311
6312   return orig_cost;
6313 }
6314
6315 /* Tries to extend the sets IVS in the best possible way in order
6316    to express the USE.  If ORIGINALP is true, prefer candidates from
6317    the original set of IVs, otherwise favor important candidates not
6318    based on any memory object.  */
6319
6320 static bool
6321 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6322                   struct iv_use *use, bool originalp)
6323 {
6324   comp_cost best_cost, act_cost;
6325   unsigned i;
6326   bitmap_iterator bi;
6327   struct iv_cand *cand;
6328   struct iv_ca_delta *best_delta = NULL, *act_delta;
6329   struct cost_pair *cp;
6330
6331   iv_ca_add_use (data, ivs, use);
6332   best_cost = iv_ca_cost (ivs);
6333   cp = iv_ca_cand_for_use (ivs, use);
6334   if (cp)
6335     {
6336       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6337       iv_ca_set_no_cp (data, ivs, use);
6338     }
6339
6340   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6341      first try important candidates not based on any memory object.  Only if
6342      this fails, try the specific ones.  Rationale -- in loops with many
6343      variables the best choice often is to use just one generic biv.  If we
6344      added here many ivs specific to the uses, the optimization algorithm later
6345      would be likely to get stuck in a local minimum, thus causing us to create
6346      too many ivs.  The approach from few ivs to more seems more likely to be
6347      successful -- starting from few ivs, replacing an expensive use by a
6348      specific iv should always be a win.  */
6349   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6350     {
6351       cand = iv_cand (data, i);
6352
6353       if (originalp && cand->pos !=IP_ORIGINAL)
6354         continue;
6355
6356       if (!originalp && cand->iv->base_object != NULL_TREE)
6357         continue;
6358
6359       if (iv_ca_cand_used_p (ivs, cand))
6360         continue;
6361
6362       cp = get_use_iv_cost (data, use, cand);
6363       if (!cp)
6364         continue;
6365
6366       iv_ca_set_cp (data, ivs, use, cp);
6367       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6368                                true);
6369       iv_ca_set_no_cp (data, ivs, use);
6370       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6371
6372       if (compare_costs (act_cost, best_cost) < 0)
6373         {
6374           best_cost = act_cost;
6375
6376           iv_ca_delta_free (&best_delta);
6377           best_delta = act_delta;
6378         }
6379       else
6380         iv_ca_delta_free (&act_delta);
6381     }
6382
6383   if (infinite_cost_p (best_cost))
6384     {
6385       for (i = 0; i < use->n_map_members; i++)
6386         {
6387           cp = use->cost_map + i;
6388           cand = cp->cand;
6389           if (!cand)
6390             continue;
6391
6392           /* Already tried this.  */
6393           if (cand->important)
6394             {
6395               if (originalp && cand->pos == IP_ORIGINAL)
6396                 continue;
6397               if (!originalp && cand->iv->base_object == NULL_TREE)
6398                 continue;
6399             }
6400
6401           if (iv_ca_cand_used_p (ivs, cand))
6402             continue;
6403
6404           act_delta = NULL;
6405           iv_ca_set_cp (data, ivs, use, cp);
6406           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6407           iv_ca_set_no_cp (data, ivs, use);
6408           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6409                                        cp, act_delta);
6410
6411           if (compare_costs (act_cost, best_cost) < 0)
6412             {
6413               best_cost = act_cost;
6414
6415               if (best_delta)
6416                 iv_ca_delta_free (&best_delta);
6417               best_delta = act_delta;
6418             }
6419           else
6420             iv_ca_delta_free (&act_delta);
6421         }
6422     }
6423
6424   iv_ca_delta_commit (data, ivs, best_delta, true);
6425   iv_ca_delta_free (&best_delta);
6426
6427   return !infinite_cost_p (best_cost);
6428 }
6429
6430 /* Finds an initial assignment of candidates to uses.  */
6431
6432 static struct iv_ca *
6433 get_initial_solution (struct ivopts_data *data, bool originalp)
6434 {
6435   struct iv_ca *ivs = iv_ca_new (data);
6436   unsigned i;
6437
6438   for (i = 0; i < n_iv_uses (data); i++)
6439     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6440       {
6441         iv_ca_free (&ivs);
6442         return NULL;
6443       }
6444
6445   return ivs;
6446 }
6447
6448 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6449    points to a bool variable, this function tries to break local
6450    optimal fixed-point by replacing candidates in IVS if it's true.  */
6451
6452 static bool
6453 try_improve_iv_set (struct ivopts_data *data,
6454                     struct iv_ca *ivs, bool *try_replace_p)
6455 {
6456   unsigned i, n_ivs;
6457   comp_cost acost, best_cost = iv_ca_cost (ivs);
6458   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6459   struct iv_cand *cand;
6460
6461   /* Try extending the set of induction variables by one.  */
6462   for (i = 0; i < n_iv_cands (data); i++)
6463     {
6464       cand = iv_cand (data, i);
6465
6466       if (iv_ca_cand_used_p (ivs, cand))
6467         continue;
6468
6469       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6470       if (!act_delta)
6471         continue;
6472
6473       /* If we successfully added the candidate and the set is small enough,
6474          try optimizing it by removing other candidates.  */
6475       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6476         {
6477           iv_ca_delta_commit (data, ivs, act_delta, true);
6478           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6479           iv_ca_delta_commit (data, ivs, act_delta, false);
6480           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6481         }
6482
6483       if (compare_costs (acost, best_cost) < 0)
6484         {
6485           best_cost = acost;
6486           iv_ca_delta_free (&best_delta);
6487           best_delta = act_delta;
6488         }
6489       else
6490         iv_ca_delta_free (&act_delta);
6491     }
6492
6493   if (!best_delta)
6494     {
6495       /* Try removing the candidates from the set instead.  */
6496       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6497
6498       if (!best_delta && *try_replace_p)
6499         {
6500           *try_replace_p = false;
6501           /* So far candidate selecting algorithm tends to choose fewer IVs
6502              so that it can handle cases in which loops have many variables
6503              but the best choice is often to use only one general biv.  One
6504              weakness is it can't handle opposite cases, in which different
6505              candidates should be chosen with respect to each use.  To solve
6506              the problem, we replace candidates in a manner described by the
6507              comments of iv_ca_replace, thus give general algorithm a chance
6508              to break local optimal fixed-point in these cases.  */
6509           best_cost = iv_ca_replace (data, ivs, &best_delta);
6510         }
6511
6512       if (!best_delta)
6513         return false;
6514     }
6515
6516   iv_ca_delta_commit (data, ivs, best_delta, true);
6517   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6518   iv_ca_delta_free (&best_delta);
6519   return true;
6520 }
6521
6522 /* Attempts to find the optimal set of induction variables.  We do simple
6523    greedy heuristic -- we try to replace at most one candidate in the selected
6524    solution and remove the unused ivs while this improves the cost.  */
6525
6526 static struct iv_ca *
6527 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6528 {
6529   struct iv_ca *set;
6530   bool try_replace_p = true;
6531
6532   /* Get the initial solution.  */
6533   set = get_initial_solution (data, originalp);
6534   if (!set)
6535     {
6536       if (dump_file && (dump_flags & TDF_DETAILS))
6537         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6538       return NULL;
6539     }
6540
6541   if (dump_file && (dump_flags & TDF_DETAILS))
6542     {
6543       fprintf (dump_file, "Initial set of candidates:\n");
6544       iv_ca_dump (data, dump_file, set);
6545     }
6546
6547   while (try_improve_iv_set (data, set, &try_replace_p))
6548     {
6549       if (dump_file && (dump_flags & TDF_DETAILS))
6550         {
6551           fprintf (dump_file, "Improved to:\n");
6552           iv_ca_dump (data, dump_file, set);
6553         }
6554     }
6555
6556   return set;
6557 }
6558
6559 static struct iv_ca *
6560 find_optimal_iv_set (struct ivopts_data *data)
6561 {
6562   unsigned i;
6563   struct iv_ca *set, *origset;
6564   struct iv_use *use;
6565   comp_cost cost, origcost;
6566
6567   /* Determine the cost based on a strategy that starts with original IVs,
6568      and try again using a strategy that prefers candidates not based
6569      on any IVs.  */
6570   origset = find_optimal_iv_set_1 (data, true);
6571   set = find_optimal_iv_set_1 (data, false);
6572
6573   if (!origset && !set)
6574     return NULL;
6575
6576   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6577   cost = set ? iv_ca_cost (set) : infinite_cost;
6578
6579   if (dump_file && (dump_flags & TDF_DETAILS))
6580     {
6581       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6582                origcost.cost, origcost.complexity);
6583       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6584                cost.cost, cost.complexity);
6585     }
6586
6587   /* Choose the one with the best cost.  */
6588   if (compare_costs (origcost, cost) <= 0)
6589     {
6590       if (set)
6591         iv_ca_free (&set);
6592       set = origset;
6593     }
6594   else if (origset)
6595     iv_ca_free (&origset);
6596
6597   for (i = 0; i < n_iv_uses (data); i++)
6598     {
6599       use = iv_use (data, i);
6600       use->selected = iv_ca_cand_for_use (set, use)->cand;
6601     }
6602
6603   return set;
6604 }
6605
6606 /* Creates a new induction variable corresponding to CAND.  */
6607
6608 static void
6609 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6610 {
6611   gimple_stmt_iterator incr_pos;
6612   tree base;
6613   bool after = false;
6614
6615   if (!cand->iv)
6616     return;
6617
6618   switch (cand->pos)
6619     {
6620     case IP_NORMAL:
6621       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6622       break;
6623
6624     case IP_END:
6625       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6626       after = true;
6627       break;
6628
6629     case IP_AFTER_USE:
6630       after = true;
6631       /* fall through */
6632     case IP_BEFORE_USE:
6633       incr_pos = gsi_for_stmt (cand->incremented_at);
6634       break;
6635
6636     case IP_ORIGINAL:
6637       /* Mark that the iv is preserved.  */
6638       name_info (data, cand->var_before)->preserve_biv = true;
6639       name_info (data, cand->var_after)->preserve_biv = true;
6640
6641       /* Rewrite the increment so that it uses var_before directly.  */
6642       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6643       return;
6644     }
6645
6646   gimple_add_tmp_var (cand->var_before);
6647
6648   base = unshare_expr (cand->iv->base);
6649
6650   create_iv (base, unshare_expr (cand->iv->step),
6651              cand->var_before, data->current_loop,
6652              &incr_pos, after, &cand->var_before, &cand->var_after);
6653 }
6654
6655 /* Creates new induction variables described in SET.  */
6656
6657 static void
6658 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6659 {
6660   unsigned i;
6661   struct iv_cand *cand;
6662   bitmap_iterator bi;
6663
6664   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6665     {
6666       cand = iv_cand (data, i);
6667       create_new_iv (data, cand);
6668     }
6669
6670   if (dump_file && (dump_flags & TDF_DETAILS))
6671     {
6672       fprintf (dump_file, "Selected IV set for loop %d",
6673                data->current_loop->num);
6674       if (data->loop_loc != UNKNOWN_LOCATION)
6675         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6676                  LOCATION_LINE (data->loop_loc));
6677       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6678       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6679         {
6680           cand = iv_cand (data, i);
6681           dump_cand (dump_file, cand);
6682         }
6683       fprintf (dump_file, "\n");
6684     }
6685 }
6686
6687 /* Rewrites USE (definition of iv used in a nonlinear expression)
6688    using candidate CAND.  */
6689
6690 static void
6691 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6692                             struct iv_use *use, struct iv_cand *cand)
6693 {
6694   tree comp;
6695   tree op, tgt;
6696   gassign *ass;
6697   gimple_stmt_iterator bsi;
6698
6699   /* An important special case -- if we are asked to express value of
6700      the original iv by itself, just exit; there is no need to
6701      introduce a new computation (that might also need casting the
6702      variable to unsigned and back).  */
6703   if (cand->pos == IP_ORIGINAL
6704       && cand->incremented_at == use->stmt)
6705     {
6706       enum tree_code stmt_code;
6707
6708       gcc_assert (is_gimple_assign (use->stmt));
6709       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6710
6711       /* Check whether we may leave the computation unchanged.
6712          This is the case only if it does not rely on other
6713          computations in the loop -- otherwise, the computation
6714          we rely upon may be removed in remove_unused_ivs,
6715          thus leading to ICE.  */
6716       stmt_code = gimple_assign_rhs_code (use->stmt);
6717       if (stmt_code == PLUS_EXPR
6718           || stmt_code == MINUS_EXPR
6719           || stmt_code == POINTER_PLUS_EXPR)
6720         {
6721           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6722             op = gimple_assign_rhs2 (use->stmt);
6723           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6724             op = gimple_assign_rhs1 (use->stmt);
6725           else
6726             op = NULL_TREE;
6727         }
6728       else
6729         op = NULL_TREE;
6730
6731       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6732         return;
6733     }
6734
6735   comp = get_computation (data->current_loop, use, cand);
6736   gcc_assert (comp != NULL_TREE);
6737
6738   switch (gimple_code (use->stmt))
6739     {
6740     case GIMPLE_PHI:
6741       tgt = PHI_RESULT (use->stmt);
6742
6743       /* If we should keep the biv, do not replace it.  */
6744       if (name_info (data, tgt)->preserve_biv)
6745         return;
6746
6747       bsi = gsi_after_labels (gimple_bb (use->stmt));
6748       break;
6749
6750     case GIMPLE_ASSIGN:
6751       tgt = gimple_assign_lhs (use->stmt);
6752       bsi = gsi_for_stmt (use->stmt);
6753       break;
6754
6755     default:
6756       gcc_unreachable ();
6757     }
6758
6759   if (!valid_gimple_rhs_p (comp)
6760       || (gimple_code (use->stmt) != GIMPLE_PHI
6761           /* We can't allow re-allocating the stmt as it might be pointed
6762              to still.  */
6763           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6764               >= gimple_num_ops (gsi_stmt (bsi)))))
6765     {
6766       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6767                                        true, GSI_SAME_STMT);
6768       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6769         {
6770           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6771           /* As this isn't a plain copy we have to reset alignment
6772              information.  */
6773           if (SSA_NAME_PTR_INFO (comp))
6774             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6775         }
6776     }
6777
6778   if (gimple_code (use->stmt) == GIMPLE_PHI)
6779     {
6780       ass = gimple_build_assign (tgt, comp);
6781       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6782
6783       bsi = gsi_for_stmt (use->stmt);
6784       remove_phi_node (&bsi, false);
6785     }
6786   else
6787     {
6788       gimple_assign_set_rhs_from_tree (&bsi, comp);
6789       use->stmt = gsi_stmt (bsi);
6790     }
6791 }
6792
6793 /* Performs a peephole optimization to reorder the iv update statement with
6794    a mem ref to enable instruction combining in later phases. The mem ref uses
6795    the iv value before the update, so the reordering transformation requires
6796    adjustment of the offset. CAND is the selected IV_CAND.
6797
6798    Example:
6799
6800    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6801    iv2 = iv1 + 1;
6802
6803    if (t < val)      (1)
6804      goto L;
6805    goto Head;
6806
6807
6808    directly propagating t over to (1) will introduce overlapping live range
6809    thus increase register pressure. This peephole transform it into:
6810
6811
6812    iv2 = iv1 + 1;
6813    t = MEM_REF (base, iv2, 8, 8);
6814    if (t < val)
6815      goto L;
6816    goto Head;
6817 */
6818
6819 static void
6820 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6821 {
6822   tree var_after;
6823   gimple iv_update, stmt;
6824   basic_block bb;
6825   gimple_stmt_iterator gsi, gsi_iv;
6826
6827   if (cand->pos != IP_NORMAL)
6828     return;
6829
6830   var_after = cand->var_after;
6831   iv_update = SSA_NAME_DEF_STMT (var_after);
6832
6833   bb = gimple_bb (iv_update);
6834   gsi = gsi_last_nondebug_bb (bb);
6835   stmt = gsi_stmt (gsi);
6836
6837   /* Only handle conditional statement for now.  */
6838   if (gimple_code (stmt) != GIMPLE_COND)
6839     return;
6840
6841   gsi_prev_nondebug (&gsi);
6842   stmt = gsi_stmt (gsi);
6843   if (stmt != iv_update)
6844     return;
6845
6846   gsi_prev_nondebug (&gsi);
6847   if (gsi_end_p (gsi))
6848     return;
6849
6850   stmt = gsi_stmt (gsi);
6851   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6852     return;
6853
6854   if (stmt != use->stmt)
6855     return;
6856
6857   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6858     return;
6859
6860   if (dump_file && (dump_flags & TDF_DETAILS))
6861     {
6862       fprintf (dump_file, "Reordering \n");
6863       print_gimple_stmt (dump_file, iv_update, 0, 0);
6864       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6865       fprintf (dump_file, "\n");
6866     }
6867
6868   gsi = gsi_for_stmt (use->stmt);
6869   gsi_iv = gsi_for_stmt (iv_update);
6870   gsi_move_before (&gsi_iv, &gsi);
6871
6872   cand->pos = IP_BEFORE_USE;
6873   cand->incremented_at = use->stmt;
6874 }
6875
6876 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6877
6878 static void
6879 rewrite_use_address_1 (struct ivopts_data *data,
6880                        struct iv_use *use, struct iv_cand *cand)
6881 {
6882   aff_tree aff;
6883   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6884   tree base_hint = NULL_TREE;
6885   tree ref, iv;
6886   bool ok;
6887
6888   adjust_iv_update_pos (cand, use);
6889   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6890   gcc_assert (ok);
6891   unshare_aff_combination (&aff);
6892
6893   /* To avoid undefined overflow problems, all IV candidates use unsigned
6894      integer types.  The drawback is that this makes it impossible for
6895      create_mem_ref to distinguish an IV that is based on a memory object
6896      from one that represents simply an offset.
6897
6898      To work around this problem, we pass a hint to create_mem_ref that
6899      indicates which variable (if any) in aff is an IV based on a memory
6900      object.  Note that we only consider the candidate.  If this is not
6901      based on an object, the base of the reference is in some subexpression
6902      of the use -- but these will use pointer types, so they are recognized
6903      by the create_mem_ref heuristics anyway.  */
6904   if (cand->iv->base_object)
6905     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6906
6907   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6908   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6909                         reference_alias_ptr_type (*use->op_p),
6910                         iv, base_hint, data->speed);
6911   copy_ref_info (ref, *use->op_p);
6912   *use->op_p = ref;
6913 }
6914
6915 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6916    first use of a group, rewrites sub uses in the group too.  */
6917
6918 static void
6919 rewrite_use_address (struct ivopts_data *data,
6920                       struct iv_use *use, struct iv_cand *cand)
6921 {
6922   struct iv_use *next;
6923
6924   gcc_assert (use->sub_id == 0);
6925   rewrite_use_address_1 (data, use, cand);
6926   update_stmt (use->stmt);
6927
6928   for (next = use->next; next != NULL; next = next->next)
6929     {
6930       rewrite_use_address_1 (data, next, cand);
6931       update_stmt (next->stmt);
6932     }
6933
6934   return;
6935 }
6936
6937 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6938    candidate CAND.  */
6939
6940 static void
6941 rewrite_use_compare (struct ivopts_data *data,
6942                      struct iv_use *use, struct iv_cand *cand)
6943 {
6944   tree comp, *var_p, op, bound;
6945   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6946   enum tree_code compare;
6947   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6948   bool ok;
6949
6950   bound = cp->value;
6951   if (bound)
6952     {
6953       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6954       tree var_type = TREE_TYPE (var);
6955       gimple_seq stmts;
6956
6957       if (dump_file && (dump_flags & TDF_DETAILS))
6958         {
6959           fprintf (dump_file, "Replacing exit test: ");
6960           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6961         }
6962       compare = cp->comp;
6963       bound = unshare_expr (fold_convert (var_type, bound));
6964       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6965       if (stmts)
6966         gsi_insert_seq_on_edge_immediate (
6967                 loop_preheader_edge (data->current_loop),
6968                 stmts);
6969
6970       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6971       gimple_cond_set_lhs (cond_stmt, var);
6972       gimple_cond_set_code (cond_stmt, compare);
6973       gimple_cond_set_rhs (cond_stmt, op);
6974       return;
6975     }
6976
6977   /* The induction variable elimination failed; just express the original
6978      giv.  */
6979   comp = get_computation (data->current_loop, use, cand);
6980   gcc_assert (comp != NULL_TREE);
6981
6982   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6983   gcc_assert (ok);
6984
6985   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6986                                      true, GSI_SAME_STMT);
6987 }
6988
6989 /* Rewrites USE using candidate CAND.  */
6990
6991 static void
6992 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6993 {
6994   switch (use->type)
6995     {
6996       case USE_NONLINEAR_EXPR:
6997         rewrite_use_nonlinear_expr (data, use, cand);
6998         break;
6999
7000       case USE_ADDRESS:
7001         rewrite_use_address (data, use, cand);
7002         break;
7003
7004       case USE_COMPARE:
7005         rewrite_use_compare (data, use, cand);
7006         break;
7007
7008       default:
7009         gcc_unreachable ();
7010     }
7011
7012   update_stmt (use->stmt);
7013 }
7014
7015 /* Rewrite the uses using the selected induction variables.  */
7016
7017 static void
7018 rewrite_uses (struct ivopts_data *data)
7019 {
7020   unsigned i;
7021   struct iv_cand *cand;
7022   struct iv_use *use;
7023
7024   for (i = 0; i < n_iv_uses (data); i++)
7025     {
7026       use = iv_use (data, i);
7027       cand = use->selected;
7028       gcc_assert (cand);
7029
7030       rewrite_use (data, use, cand);
7031     }
7032 }
7033
7034 /* Removes the ivs that are not used after rewriting.  */
7035
7036 static void
7037 remove_unused_ivs (struct ivopts_data *data)
7038 {
7039   unsigned j;
7040   bitmap_iterator bi;
7041   bitmap toremove = BITMAP_ALLOC (NULL);
7042
7043   /* Figure out an order in which to release SSA DEFs so that we don't
7044      release something that we'd have to propagate into a debug stmt
7045      afterwards.  */
7046   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7047     {
7048       struct version_info *info;
7049
7050       info = ver_info (data, j);
7051       if (info->iv
7052           && !integer_zerop (info->iv->step)
7053           && !info->inv_id
7054           && !info->iv->have_use_for
7055           && !info->preserve_biv)
7056         {
7057           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7058
7059           tree def = info->iv->ssa_name;
7060
7061           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7062             {
7063               imm_use_iterator imm_iter;
7064               use_operand_p use_p;
7065               gimple stmt;
7066               int count = 0;
7067
7068               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7069                 {
7070                   if (!gimple_debug_bind_p (stmt))
7071                     continue;
7072
7073                   /* We just want to determine whether to do nothing
7074                      (count == 0), to substitute the computed
7075                      expression into a single use of the SSA DEF by
7076                      itself (count == 1), or to use a debug temp
7077                      because the SSA DEF is used multiple times or as
7078                      part of a larger expression (count > 1). */
7079                   count++;
7080                   if (gimple_debug_bind_get_value (stmt) != def)
7081                     count++;
7082
7083                   if (count > 1)
7084                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7085                 }
7086
7087               if (!count)
7088                 continue;
7089
7090               struct iv_use dummy_use;
7091               struct iv_cand *best_cand = NULL, *cand;
7092               unsigned i, best_pref = 0, cand_pref;
7093
7094               memset (&dummy_use, 0, sizeof (dummy_use));
7095               dummy_use.iv = info->iv;
7096               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7097                 {
7098                   cand = iv_use (data, i)->selected;
7099                   if (cand == best_cand)
7100                     continue;
7101                   cand_pref = operand_equal_p (cand->iv->step,
7102                                                info->iv->step, 0)
7103                     ? 4 : 0;
7104                   cand_pref
7105                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7106                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7107                     ? 2 : 0;
7108                   cand_pref
7109                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7110                     ? 1 : 0;
7111                   if (best_cand == NULL || best_pref < cand_pref)
7112                     {
7113                       best_cand = cand;
7114                       best_pref = cand_pref;
7115                     }
7116                 }
7117
7118               if (!best_cand)
7119                 continue;
7120
7121               tree comp = get_computation_at (data->current_loop,
7122                                               &dummy_use, best_cand,
7123                                               SSA_NAME_DEF_STMT (def));
7124               if (!comp)
7125                 continue;
7126
7127               if (count > 1)
7128                 {
7129                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7130                   DECL_ARTIFICIAL (vexpr) = 1;
7131                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7132                   if (SSA_NAME_VAR (def))
7133                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7134                   else
7135                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7136                   gdebug *def_temp
7137                     = gimple_build_debug_bind (vexpr, comp, NULL);
7138                   gimple_stmt_iterator gsi;
7139
7140                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7141                     gsi = gsi_after_labels (gimple_bb
7142                                             (SSA_NAME_DEF_STMT (def)));
7143                   else
7144                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7145
7146                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7147                   comp = vexpr;
7148                 }
7149
7150               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7151                 {
7152                   if (!gimple_debug_bind_p (stmt))
7153                     continue;
7154
7155                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7156                     SET_USE (use_p, comp);
7157
7158                   update_stmt (stmt);
7159                 }
7160             }
7161         }
7162     }
7163
7164   release_defs_bitset (toremove);
7165
7166   BITMAP_FREE (toremove);
7167 }
7168
7169 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7170    for hash_map::traverse.  */
7171
7172 bool
7173 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7174 {
7175   free (value);
7176   return true;
7177 }
7178
7179 /* Frees data allocated by the optimization of a single loop.  */
7180
7181 static void
7182 free_loop_data (struct ivopts_data *data)
7183 {
7184   unsigned i, j;
7185   bitmap_iterator bi;
7186   tree obj;
7187
7188   if (data->niters)
7189     {
7190       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7191       delete data->niters;
7192       data->niters = NULL;
7193     }
7194
7195   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7196     {
7197       struct version_info *info;
7198
7199       info = ver_info (data, i);
7200       free (info->iv);
7201       info->iv = NULL;
7202       info->has_nonlin_use = false;
7203       info->preserve_biv = false;
7204       info->inv_id = 0;
7205     }
7206   bitmap_clear (data->relevant);
7207   bitmap_clear (data->important_candidates);
7208
7209   for (i = 0; i < n_iv_uses (data); i++)
7210     {
7211       struct iv_use *use = iv_use (data, i);
7212       struct iv_use *pre = use, *sub = use->next;
7213
7214       while (sub)
7215         {
7216           gcc_assert (sub->related_cands == NULL);
7217           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7218
7219           free (sub->iv);
7220           pre = sub;
7221           sub = sub->next;
7222           free (pre);
7223         }
7224
7225       free (use->iv);
7226       BITMAP_FREE (use->related_cands);
7227       for (j = 0; j < use->n_map_members; j++)
7228         if (use->cost_map[j].depends_on)
7229           BITMAP_FREE (use->cost_map[j].depends_on);
7230       free (use->cost_map);
7231       free (use);
7232     }
7233   data->iv_uses.truncate (0);
7234
7235   for (i = 0; i < n_iv_cands (data); i++)
7236     {
7237       struct iv_cand *cand = iv_cand (data, i);
7238
7239       free (cand->iv);
7240       if (cand->depends_on)
7241         BITMAP_FREE (cand->depends_on);
7242       free (cand);
7243     }
7244   data->iv_candidates.truncate (0);
7245
7246   if (data->version_info_size < num_ssa_names)
7247     {
7248       data->version_info_size = 2 * num_ssa_names;
7249       free (data->version_info);
7250       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7251     }
7252
7253   data->max_inv_id = 0;
7254
7255   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7256     SET_DECL_RTL (obj, NULL_RTX);
7257
7258   decl_rtl_to_reset.truncate (0);
7259
7260   data->inv_expr_tab->empty ();
7261   data->inv_expr_id = 0;
7262 }
7263
7264 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7265    loop tree.  */
7266
7267 static void
7268 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7269 {
7270   free_loop_data (data);
7271   free (data->version_info);
7272   BITMAP_FREE (data->relevant);
7273   BITMAP_FREE (data->important_candidates);
7274
7275   decl_rtl_to_reset.release ();
7276   data->iv_uses.release ();
7277   data->iv_candidates.release ();
7278   delete data->inv_expr_tab;
7279   data->inv_expr_tab = NULL;
7280   free_affine_expand_cache (&data->name_expansion_cache);
7281 }
7282
7283 /* Returns true if the loop body BODY includes any function calls.  */
7284
7285 static bool
7286 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7287 {
7288   gimple_stmt_iterator gsi;
7289   unsigned i;
7290
7291   for (i = 0; i < num_nodes; i++)
7292     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7293       {
7294         gimple stmt = gsi_stmt (gsi);
7295         if (is_gimple_call (stmt)
7296             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7297           return true;
7298       }
7299   return false;
7300 }
7301
7302 /* Optimizes the LOOP.  Returns true if anything changed.  */
7303
7304 static bool
7305 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7306 {
7307   bool changed = false;
7308   struct iv_ca *iv_ca;
7309   edge exit = single_dom_exit (loop);
7310   basic_block *body;
7311
7312   gcc_assert (!data->niters);
7313   data->current_loop = loop;
7314   data->loop_loc = find_loop_location (loop);
7315   data->speed = optimize_loop_for_speed_p (loop);
7316
7317   if (dump_file && (dump_flags & TDF_DETAILS))
7318     {
7319       fprintf (dump_file, "Processing loop %d", loop->num);
7320       if (data->loop_loc != UNKNOWN_LOCATION)
7321         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7322                  LOCATION_LINE (data->loop_loc));
7323       fprintf (dump_file, "\n");
7324
7325       if (exit)
7326         {
7327           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7328                    exit->src->index, exit->dest->index);
7329           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7330           fprintf (dump_file, "\n");
7331         }
7332
7333       fprintf (dump_file, "\n");
7334     }
7335
7336   body = get_loop_body (loop);
7337   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7338   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7339   free (body);
7340
7341   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7342
7343   /* For each ssa name determines whether it behaves as an induction variable
7344      in some loop.  */
7345   if (!find_induction_variables (data))
7346     goto finish;
7347
7348   /* Finds interesting uses (item 1).  */
7349   find_interesting_uses (data);
7350   group_address_uses (data);
7351   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7352     goto finish;
7353
7354   /* Finds candidates for the induction variables (item 2).  */
7355   find_iv_candidates (data);
7356
7357   /* Calculates the costs (item 3, part 1).  */
7358   determine_iv_costs (data);
7359   determine_use_iv_costs (data);
7360   determine_set_costs (data);
7361
7362   /* Find the optimal set of induction variables (item 3, part 2).  */
7363   iv_ca = find_optimal_iv_set (data);
7364   if (!iv_ca)
7365     goto finish;
7366   changed = true;
7367
7368   /* Create the new induction variables (item 4, part 1).  */
7369   create_new_ivs (data, iv_ca);
7370   iv_ca_free (&iv_ca);
7371
7372   /* Rewrite the uses (item 4, part 2).  */
7373   rewrite_uses (data);
7374
7375   /* Remove the ivs that are unused after rewriting.  */
7376   remove_unused_ivs (data);
7377
7378   /* We have changed the structure of induction variables; it might happen
7379      that definitions in the scev database refer to some of them that were
7380      eliminated.  */
7381   scev_reset ();
7382
7383 finish:
7384   free_loop_data (data);
7385
7386   return changed;
7387 }
7388
7389 /* Main entry point.  Optimizes induction variables in loops.  */
7390
7391 void
7392 tree_ssa_iv_optimize (void)
7393 {
7394   struct loop *loop;
7395   struct ivopts_data data;
7396
7397   tree_ssa_iv_optimize_init (&data);
7398
7399   /* Optimize the loops starting with the innermost ones.  */
7400   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7401     {
7402       if (dump_file && (dump_flags & TDF_DETAILS))
7403         flow_loop_dump (loop, dump_file, NULL, 1);
7404
7405       tree_ssa_iv_optimize_loop (&data, loop);
7406     }
7407
7408   tree_ssa_iv_optimize_finalize (&data);
7409 }