gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "tree-pretty-print.h"
  74 #include "gimple-pretty-print.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "tree-pass.h"
  80 #include "ggc.h"
  81 #include "insn-config.h"
  82 #include "recog.h"
  83 #include "pointer-set.h"
  84 #include "hashtab.h"
  85 #include "tree-chrec.h"
  86 #include "tree-scalar-evolution.h"
  87 #include "cfgloop.h"
  88 #include "params.h"
  89 #include "langhooks.h"
  90 #include "tree-affine.h"
  91 #include "target.h"
  92 #include "tree-ssa-propagate.h"
  93
  94 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  95    cost of different addressing modes.  This should be moved to a TBD
  96    interface between the GIMPLE and RTL worlds.  */
  97 #include "expr.h"
  98
  99 /* The infinite cost.  */
 100 #define INFTY 10000000
 101
 102 /* The expected number of loop iterations.  TODO -- use profiling instead of
 103    this.  */
 104 #define AVG_LOOP_NITER(LOOP) 5
 105
 106
 107 /* Representation of the induction variable.  */
 108 struct iv
 109 {
 110   tree base;            /* Initial value of the iv.  */
 111   tree base_object;     /* A memory object to that the induction variable points.  */
 112   tree step;            /* Step of the iv (constant only).  */
 113   tree ssa_name;        /* The ssa name with the value.  */
 114   bool biv_p;           /* Is it a biv?  */
 115   bool have_use_for;    /* Do we already have a use for it?  */
 116   unsigned use_id;      /* The identifier in the use if it is the case.  */
 117 };
 118
 119 /* Per-ssa version information (induction variable descriptions, etc.).  */
 120 struct version_info
 121 {
 122   tree name;            /* The ssa name.  */
 123   struct iv *iv;        /* Induction variable description.  */
 124   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 125                            an expression that is not an induction variable.  */
 126   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 127   unsigned inv_id;      /* Id of an invariant.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_ADDRESS,          /* Use in an address.  */
 135   USE_COMPARE           /* Use is a compare.  */
 136 };
 137
 138 /* Cost of a computation.  */
 139 typedef struct
 140 {
 141   int cost;             /* The runtime cost.  */
 142   unsigned complexity;  /* The estimate of the complexity of the code for
 143                            the computation (in no concrete units --
 144                            complexity field should be larger for more
 145                            complex expressions and addressing modes).  */
 146 } comp_cost;
 147
 148 static const comp_cost zero_cost = {0, 0};
 149 static const comp_cost infinite_cost = {INFTY, INFTY};
 150
 151 /* The candidate - cost pair.  */
 152 struct cost_pair
 153 {
 154   struct iv_cand *cand; /* The candidate.  */
 155   comp_cost cost;       /* The cost.  */
 156   bitmap depends_on;    /* The list of invariants that have to be
 157                            preserved.  */
 158   tree value;           /* For final value elimination, the expression for
 159                            the final value of the iv.  For iv elimination,
 160                            the new bound to compare with.  */
 161 };
 162
 163 /* Use.  */
 164 struct iv_use
 165 {
 166   unsigned id;          /* The id of the use.  */
 167   enum use_type type;   /* Type of the use.  */
 168   struct iv *iv;        /* The induction variable it is based on.  */
 169   gimple stmt;          /* Statement in that it occurs.  */
 170   tree *op_p;           /* The place where it occurs.  */
 171   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 172                            important ones.  */
 173
 174   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 175   struct cost_pair *cost_map;
 176                         /* The costs wrto the iv candidates.  */
 177
 178   struct iv_cand *selected;
 179                         /* The selected candidate.  */
 180 };
 181
 182 /* The position where the iv is computed.  */
 183 enum iv_position
 184 {
 185   IP_NORMAL,            /* At the end, just before the exit condition.  */
 186   IP_END,               /* At the end of the latch block.  */
 187   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 188   IP_AFTER_USE,         /* Immediately after a specific use.  */
 189   IP_ORIGINAL           /* The original biv.  */
 190 };
 191
 192 /* The induction variable candidate.  */
 193 struct iv_cand
 194 {
 195   unsigned id;          /* The number of the candidate.  */
 196   bool important;       /* Whether this is an "important" candidate, i.e. such
 197                            that it should be considered by all uses.  */
 198   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 199   gimple incremented_at;/* For original biv, the statement where it is
 200                            incremented.  */
 201   tree var_before;      /* The variable used for it before increment.  */
 202   tree var_after;       /* The variable used for it after increment.  */
 203   struct iv *iv;        /* The value of the candidate.  NULL for
 204                            "pseudocandidate" used to indicate the possibility
 205                            to replace the final value of an iv by direct
 206                            computation of the value.  */
 207   unsigned cost;        /* Cost of the candidate.  */
 208   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 209   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 210                               where it is incremented.  */
 211   bitmap depends_on;    /* The list of invariants that are used in step of the
 212                            biv.  */
 213 };
 214
 215 /* The data used by the induction variable optimizations.  */
 216
 217 typedef struct iv_use *iv_use_p;
 218 DEF_VEC_P(iv_use_p);
 219 DEF_VEC_ALLOC_P(iv_use_p,heap);
 220
 221 typedef struct iv_cand *iv_cand_p;
 222 DEF_VEC_P(iv_cand_p);
 223 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 224
 225 struct ivopts_data
 226 {
 227   /* The currently optimized loop.  */
 228   struct loop *current_loop;
 229
 230   /* Numbers of iterations for all exits of the current loop.  */
 231   struct pointer_map_t *niters;
 232
 233   /* Number of registers used in it.  */
 234   unsigned regs_used;
 235
 236   /* The size of version_info array allocated.  */
 237   unsigned version_info_size;
 238
 239   /* The array of information for the ssa names.  */
 240   struct version_info *version_info;
 241
 242   /* The bitmap of indices in version_info whose value was changed.  */
 243   bitmap relevant;
 244
 245   /* The uses of induction variables.  */
 246   VEC(iv_use_p,heap) *iv_uses;
 247
 248   /* The candidates.  */
 249   VEC(iv_cand_p,heap) *iv_candidates;
 250
 251   /* A bitmap of important candidates.  */
 252   bitmap important_candidates;
 253
 254   /* The maximum invariant id.  */
 255   unsigned max_inv_id;
 256
 257   /* Whether to consider just related and important candidates when replacing a
 258      use.  */
 259   bool consider_all_candidates;
 260
 261   /* Are we optimizing for speed?  */
 262   bool speed;
 263
 264   /* Whether the loop body includes any function calls.  */
 265   bool body_includes_call;
 266 };
 267
 268 /* An assignment of iv candidates to uses.  */
 269
 270 struct iv_ca
 271 {
 272   /* The number of uses covered by the assignment.  */
 273   unsigned upto;
 274
 275   /* Number of uses that cannot be expressed by the candidates in the set.  */
 276   unsigned bad_uses;
 277
 278   /* Candidate assigned to a use, together with the related costs.  */
 279   struct cost_pair **cand_for_use;
 280
 281   /* Number of times each candidate is used.  */
 282   unsigned *n_cand_uses;
 283
 284   /* The candidates used.  */
 285   bitmap cands;
 286
 287   /* The number of candidates in the set.  */
 288   unsigned n_cands;
 289
 290   /* Total number of registers needed.  */
 291   unsigned n_regs;
 292
 293   /* Total cost of expressing uses.  */
 294   comp_cost cand_use_cost;
 295
 296   /* Total cost of candidates.  */
 297   unsigned cand_cost;
 298
 299   /* Number of times each invariant is used.  */
 300   unsigned *n_invariant_uses;
 301
 302   /* Total cost of the assignment.  */
 303   comp_cost cost;
 304 };
 305
 306 /* Difference of two iv candidate assignments.  */
 307
 308 struct iv_ca_delta
 309 {
 310   /* Changed use.  */
 311   struct iv_use *use;
 312
 313   /* An old assignment (for rollback purposes).  */
 314   struct cost_pair *old_cp;
 315
 316   /* A new assignment.  */
 317   struct cost_pair *new_cp;
 318
 319   /* Next change in the list.  */
 320   struct iv_ca_delta *next_change;
 321 };
 322
 323 /* Bound on number of candidates below that all candidates are considered.  */
 324
 325 #define CONSIDER_ALL_CANDIDATES_BOUND \
 326   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 327
 328 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 329    optimizing such a loop would help, and it would take ages).  */
 330
 331 #define MAX_CONSIDERED_USES \
 332   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 333
 334 /* If there are at most this number of ivs in the set, try removing unnecessary
 335    ivs from the set always.  */
 336
 337 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 338   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 339
 340 /* The list of trees for that the decl_rtl field must be reset is stored
 341    here.  */
 342
 343 static VEC(tree,heap) *decl_rtl_to_reset;
 344
 345 /* Number of uses recorded in DATA.  */
 346
 347 static inline unsigned
 348 n_iv_uses (struct ivopts_data *data)
 349 {
 350   return VEC_length (iv_use_p, data->iv_uses);
 351 }
 352
 353 /* Ith use recorded in DATA.  */
 354
 355 static inline struct iv_use *
 356 iv_use (struct ivopts_data *data, unsigned i)
 357 {
 358   return VEC_index (iv_use_p, data->iv_uses, i);
 359 }
 360
 361 /* Number of candidates recorded in DATA.  */
 362
 363 static inline unsigned
 364 n_iv_cands (struct ivopts_data *data)
 365 {
 366   return VEC_length (iv_cand_p, data->iv_candidates);
 367 }
 368
 369 /* Ith candidate recorded in DATA.  */
 370
 371 static inline struct iv_cand *
 372 iv_cand (struct ivopts_data *data, unsigned i)
 373 {
 374   return VEC_index (iv_cand_p, data->iv_candidates, i);
 375 }
 376
 377 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 378
 379 edge
 380 single_dom_exit (struct loop *loop)
 381 {
 382   edge exit = single_exit (loop);
 383
 384   if (!exit)
 385     return NULL;
 386
 387   if (!just_once_each_iteration_p (loop, exit->src))
 388     return NULL;
 389
 390   return exit;
 391 }
 392
 393 /* Dumps information about the induction variable IV to FILE.  */
 394
 395 extern void dump_iv (FILE *, struct iv *);
 396 void
 397 dump_iv (FILE *file, struct iv *iv)
 398 {
 399   if (iv->ssa_name)
 400     {
 401       fprintf (file, "ssa name ");
 402       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405
 406   fprintf (file, "  type ");
 407   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 408   fprintf (file, "\n");
 409
 410   if (iv->step)
 411     {
 412       fprintf (file, "  base ");
 413       print_generic_expr (file, iv->base, TDF_SLIM);
 414       fprintf (file, "\n");
 415
 416       fprintf (file, "  step ");
 417       print_generic_expr (file, iv->step, TDF_SLIM);
 418       fprintf (file, "\n");
 419     }
 420   else
 421     {
 422       fprintf (file, "  invariant ");
 423       print_generic_expr (file, iv->base, TDF_SLIM);
 424       fprintf (file, "\n");
 425     }
 426
 427   if (iv->base_object)
 428     {
 429       fprintf (file, "  base object ");
 430       print_generic_expr (file, iv->base_object, TDF_SLIM);
 431       fprintf (file, "\n");
 432     }
 433
 434   if (iv->biv_p)
 435     fprintf (file, "  is a biv\n");
 436 }
 437
 438 /* Dumps information about the USE to FILE.  */
 439
 440 extern void dump_use (FILE *, struct iv_use *);
 441 void
 442 dump_use (FILE *file, struct iv_use *use)
 443 {
 444   fprintf (file, "use %d\n", use->id);
 445
 446   switch (use->type)
 447     {
 448     case USE_NONLINEAR_EXPR:
 449       fprintf (file, "  generic\n");
 450       break;
 451
 452     case USE_ADDRESS:
 453       fprintf (file, "  address\n");
 454       break;
 455
 456     case USE_COMPARE:
 457       fprintf (file, "  compare\n");
 458       break;
 459
 460     default:
 461       gcc_unreachable ();
 462     }
 463
 464   fprintf (file, "  in statement ");
 465   print_gimple_stmt (file, use->stmt, 0, 0);
 466   fprintf (file, "\n");
 467
 468   fprintf (file, "  at position ");
 469   if (use->op_p)
 470     print_generic_expr (file, *use->op_p, TDF_SLIM);
 471   fprintf (file, "\n");
 472
 473   dump_iv (file, use->iv);
 474
 475   if (use->related_cands)
 476     {
 477       fprintf (file, "  related candidates ");
 478       dump_bitmap (file, use->related_cands);
 479     }
 480 }
 481
 482 /* Dumps information about the uses to FILE.  */
 483
 484 extern void dump_uses (FILE *, struct ivopts_data *);
 485 void
 486 dump_uses (FILE *file, struct ivopts_data *data)
 487 {
 488   unsigned i;
 489   struct iv_use *use;
 490
 491   for (i = 0; i < n_iv_uses (data); i++)
 492     {
 493       use = iv_use (data, i);
 494
 495       dump_use (file, use);
 496       fprintf (file, "\n");
 497     }
 498 }
 499
 500 /* Dumps information about induction variable candidate CAND to FILE.  */
 501
 502 extern void dump_cand (FILE *, struct iv_cand *);
 503 void
 504 dump_cand (FILE *file, struct iv_cand *cand)
 505 {
 506   struct iv *iv = cand->iv;
 507
 508   fprintf (file, "candidate %d%s\n",
 509            cand->id, cand->important ? " (important)" : "");
 510
 511   if (cand->depends_on)
 512     {
 513       fprintf (file, "  depends on ");
 514       dump_bitmap (file, cand->depends_on);
 515     }
 516
 517   if (!iv)
 518     {
 519       fprintf (file, "  final value replacement\n");
 520       return;
 521     }
 522
 523   switch (cand->pos)
 524     {
 525     case IP_NORMAL:
 526       fprintf (file, "  incremented before exit test\n");
 527       break;
 528
 529     case IP_BEFORE_USE:
 530       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 531       break;
 532
 533     case IP_AFTER_USE:
 534       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 535       break;
 536
 537     case IP_END:
 538       fprintf (file, "  incremented at end\n");
 539       break;
 540
 541     case IP_ORIGINAL:
 542       fprintf (file, "  original biv\n");
 543       break;
 544     }
 545
 546   dump_iv (file, iv);
 547 }
 548
 549 /* Returns the info for ssa version VER.  */
 550
 551 static inline struct version_info *
 552 ver_info (struct ivopts_data *data, unsigned ver)
 553 {
 554   return data->version_info + ver;
 555 }
 556
 557 /* Returns the info for ssa name NAME.  */
 558
 559 static inline struct version_info *
 560 name_info (struct ivopts_data *data, tree name)
 561 {
 562   return ver_info (data, SSA_NAME_VERSION (name));
 563 }
 564
 565 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 566    emitted in LOOP.  */
 567
 568 static bool
 569 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 570 {
 571   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 572
 573   gcc_assert (bb);
 574
 575   if (sbb == loop->latch)
 576     return true;
 577
 578   if (sbb != bb)
 579     return false;
 580
 581   return stmt == last_stmt (bb);
 582 }
 583
 584 /* Returns true if STMT if after the place where the original induction
 585    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 586    if the positions are identical.  */
 587
 588 static bool
 589 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 590 {
 591   basic_block cand_bb = gimple_bb (cand->incremented_at);
 592   basic_block stmt_bb = gimple_bb (stmt);
 593
 594   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 595     return false;
 596
 597   if (stmt_bb != cand_bb)
 598     return true;
 599
 600   if (true_if_equal
 601       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 602     return true;
 603   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 604 }
 605
 606 /* Returns true if STMT if after the place where the induction variable
 607    CAND is incremented in LOOP.  */
 608
 609 static bool
 610 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 611 {
 612   switch (cand->pos)
 613     {
 614     case IP_END:
 615       return false;
 616
 617     case IP_NORMAL:
 618       return stmt_after_ip_normal_pos (loop, stmt);
 619
 620     case IP_ORIGINAL:
 621     case IP_AFTER_USE:
 622       return stmt_after_inc_pos (cand, stmt, false);
 623
 624     case IP_BEFORE_USE:
 625       return stmt_after_inc_pos (cand, stmt, true);
 626
 627     default:
 628       gcc_unreachable ();
 629     }
 630 }
 631
 632 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 633
 634 static bool
 635 abnormal_ssa_name_p (tree exp)
 636 {
 637   if (!exp)
 638     return false;
 639
 640   if (TREE_CODE (exp) != SSA_NAME)
 641     return false;
 642
 643   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 644 }
 645
 646 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 647    abnormal phi node.  Callback for for_each_index.  */
 648
 649 static bool
 650 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 651                                   void *data ATTRIBUTE_UNUSED)
 652 {
 653   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 654     {
 655       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 656         return false;
 657       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 658         return false;
 659     }
 660
 661   return !abnormal_ssa_name_p (*index);
 662 }
 663
 664 /* Returns true if EXPR contains a ssa name that occurs in an
 665    abnormal phi node.  */
 666
 667 bool
 668 contains_abnormal_ssa_name_p (tree expr)
 669 {
 670   enum tree_code code;
 671   enum tree_code_class codeclass;
 672
 673   if (!expr)
 674     return false;
 675
 676   code = TREE_CODE (expr);
 677   codeclass = TREE_CODE_CLASS (code);
 678
 679   if (code == SSA_NAME)
 680     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 681
 682   if (code == INTEGER_CST
 683       || is_gimple_min_invariant (expr))
 684     return false;
 685
 686   if (code == ADDR_EXPR)
 687     return !for_each_index (&TREE_OPERAND (expr, 0),
 688                             idx_contains_abnormal_ssa_name_p,
 689                             NULL);
 690
 691   if (code == COND_EXPR)
 692     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 693       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 694       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 695
 696   switch (codeclass)
 697     {
 698     case tcc_binary:
 699     case tcc_comparison:
 700       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 701         return true;
 702
 703       /* Fallthru.  */
 704     case tcc_unary:
 705       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 706         return true;
 707
 708       break;
 709
 710     default:
 711       gcc_unreachable ();
 712     }
 713
 714   return false;
 715 }
 716
 717 /*  Returns tree describing number of iterations determined from
 718     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 719
 720 static tree
 721 niter_for_exit (struct ivopts_data *data, edge exit,
 722                 struct tree_niter_desc **desc_p)
 723 {
 724   struct tree_niter_desc* desc = NULL;
 725   tree niter;
 726   void **slot;
 727
 728   if (!data->niters)
 729     {
 730       data->niters = pointer_map_create ();
 731       slot = NULL;
 732     }
 733   else
 734     slot = pointer_map_contains (data->niters, exit);
 735
 736   if (!slot)
 737     {
 738       /* Try to determine number of iterations.  We must know it
 739          unconditionally (i.e., without possibility of # of iterations
 740          being zero).  Also, we cannot safely work with ssa names that
 741          appear in phi nodes on abnormal edges, so that we do not create
 742          overlapping life ranges for them (PR 27283).  */
 743       desc = XNEW (struct tree_niter_desc);
 744       if (number_of_iterations_exit (data->current_loop,
 745                                      exit, desc, true)
 746           && integer_zerop (desc->may_be_zero)
 747           && !contains_abnormal_ssa_name_p (desc->niter))
 748         niter = desc->niter;
 749       else
 750         niter = NULL_TREE;
 751
 752       desc->niter = niter;
 753       slot = pointer_map_insert (data->niters, exit);
 754       *slot = desc;
 755     }
 756   else
 757     niter = ((struct tree_niter_desc *) *slot)->niter;
 758
 759   if (desc_p)
 760     *desc_p = (struct tree_niter_desc *) *slot;
 761   return niter;
 762 }
 763
 764 /* Returns tree describing number of iterations determined from
 765    single dominating exit of DATA->current_loop, or NULL if something
 766    goes wrong.  */
 767
 768 static tree
 769 niter_for_single_dom_exit (struct ivopts_data *data)
 770 {
 771   edge exit = single_dom_exit (data->current_loop);
 772
 773   if (!exit)
 774     return NULL;
 775
 776   return niter_for_exit (data, exit, NULL);
 777 }
 778
 779 /* Initializes data structures used by the iv optimization pass, stored
 780    in DATA.  */
 781
 782 static void
 783 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 784 {
 785   data->version_info_size = 2 * num_ssa_names;
 786   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 787   data->relevant = BITMAP_ALLOC (NULL);
 788   data->important_candidates = BITMAP_ALLOC (NULL);
 789   data->max_inv_id = 0;
 790   data->niters = NULL;
 791   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 792   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 793   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 794 }
 795
 796 /* Returns a memory object to that EXPR points.  In case we are able to
 797    determine that it does not point to any such object, NULL is returned.  */
 798
 799 static tree
 800 determine_base_object (tree expr)
 801 {
 802   enum tree_code code = TREE_CODE (expr);
 803   tree base, obj;
 804
 805   /* If this is a pointer casted to any type, we need to determine
 806      the base object for the pointer; so handle conversions before
 807      throwing away non-pointer expressions.  */
 808   if (CONVERT_EXPR_P (expr))
 809     return determine_base_object (TREE_OPERAND (expr, 0));
 810
 811   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 812     return NULL_TREE;
 813
 814   switch (code)
 815     {
 816     case INTEGER_CST:
 817       return NULL_TREE;
 818
 819     case ADDR_EXPR:
 820       obj = TREE_OPERAND (expr, 0);
 821       base = get_base_address (obj);
 822
 823       if (!base)
 824         return expr;
 825
 826       if (TREE_CODE (base) == MEM_REF)
 827         return determine_base_object (TREE_OPERAND (base, 0));
 828
 829       return fold_convert (ptr_type_node,
 830                            build_fold_addr_expr (base));
 831
 832     case POINTER_PLUS_EXPR:
 833       return determine_base_object (TREE_OPERAND (expr, 0));
 834
 835     case PLUS_EXPR:
 836     case MINUS_EXPR:
 837       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 838       gcc_unreachable ();
 839
 840     default:
 841       return fold_convert (ptr_type_node, expr);
 842     }
 843 }
 844
 845 /* Allocates an induction variable with given initial value BASE and step STEP
 846    for loop LOOP.  */
 847
 848 static struct iv *
 849 alloc_iv (tree base, tree step)
 850 {
 851   struct iv *iv = XCNEW (struct iv);
 852   gcc_assert (step != NULL_TREE);
 853
 854   iv->base = base;
 855   iv->base_object = determine_base_object (base);
 856   iv->step = step;
 857   iv->biv_p = false;
 858   iv->have_use_for = false;
 859   iv->use_id = 0;
 860   iv->ssa_name = NULL_TREE;
 861
 862   return iv;
 863 }
 864
 865 /* Sets STEP and BASE for induction variable IV.  */
 866
 867 static void
 868 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 869 {
 870   struct version_info *info = name_info (data, iv);
 871
 872   gcc_assert (!info->iv);
 873
 874   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 875   info->iv = alloc_iv (base, step);
 876   info->iv->ssa_name = iv;
 877 }
 878
 879 /* Finds induction variable declaration for VAR.  */
 880
 881 static struct iv *
 882 get_iv (struct ivopts_data *data, tree var)
 883 {
 884   basic_block bb;
 885   tree type = TREE_TYPE (var);
 886
 887   if (!POINTER_TYPE_P (type)
 888       && !INTEGRAL_TYPE_P (type))
 889     return NULL;
 890
 891   if (!name_info (data, var)->iv)
 892     {
 893       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 894
 895       if (!bb
 896           || !flow_bb_inside_loop_p (data->current_loop, bb))
 897         set_iv (data, var, var, build_int_cst (type, 0));
 898     }
 899
 900   return name_info (data, var)->iv;
 901 }
 902
 903 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 904    not define a simple affine biv with nonzero step.  */
 905
 906 static tree
 907 determine_biv_step (gimple phi)
 908 {
 909   struct loop *loop = gimple_bb (phi)->loop_father;
 910   tree name = PHI_RESULT (phi);
 911   affine_iv iv;
 912
 913   if (!is_gimple_reg (name))
 914     return NULL_TREE;
 915
 916   if (!simple_iv (loop, loop, name, &iv, true))
 917     return NULL_TREE;
 918
 919   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 920 }
 921
 922 /* Finds basic ivs.  */
 923
 924 static bool
 925 find_bivs (struct ivopts_data *data)
 926 {
 927   gimple phi;
 928   tree step, type, base;
 929   bool found = false;
 930   struct loop *loop = data->current_loop;
 931   gimple_stmt_iterator psi;
 932
 933   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 934     {
 935       phi = gsi_stmt (psi);
 936
 937       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 938         continue;
 939
 940       step = determine_biv_step (phi);
 941       if (!step)
 942         continue;
 943
 944       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 945       base = expand_simple_operations (base);
 946       if (contains_abnormal_ssa_name_p (base)
 947           || contains_abnormal_ssa_name_p (step))
 948         continue;
 949
 950       type = TREE_TYPE (PHI_RESULT (phi));
 951       base = fold_convert (type, base);
 952       if (step)
 953         {
 954           if (POINTER_TYPE_P (type))
 955             step = fold_convert (sizetype, step);
 956           else
 957             step = fold_convert (type, step);
 958         }
 959
 960       set_iv (data, PHI_RESULT (phi), base, step);
 961       found = true;
 962     }
 963
 964   return found;
 965 }
 966
 967 /* Marks basic ivs.  */
 968
 969 static void
 970 mark_bivs (struct ivopts_data *data)
 971 {
 972   gimple phi;
 973   tree var;
 974   struct iv *iv, *incr_iv;
 975   struct loop *loop = data->current_loop;
 976   basic_block incr_bb;
 977   gimple_stmt_iterator psi;
 978
 979   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 980     {
 981       phi = gsi_stmt (psi);
 982
 983       iv = get_iv (data, PHI_RESULT (phi));
 984       if (!iv)
 985         continue;
 986
 987       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 988       incr_iv = get_iv (data, var);
 989       if (!incr_iv)
 990         continue;
 991
 992       /* If the increment is in the subloop, ignore it.  */
 993       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 994       if (incr_bb->loop_father != data->current_loop
 995           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 996         continue;
 997
 998       iv->biv_p = true;
 999       incr_iv->biv_p = true;
1000     }
1001 }
1002
1003 /* Checks whether STMT defines a linear induction variable and stores its
1004    parameters to IV.  */
1005
1006 static bool
1007 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1008 {
1009   tree lhs;
1010   struct loop *loop = data->current_loop;
1011
1012   iv->base = NULL_TREE;
1013   iv->step = NULL_TREE;
1014
1015   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1016     return false;
1017
1018   lhs = gimple_assign_lhs (stmt);
1019   if (TREE_CODE (lhs) != SSA_NAME)
1020     return false;
1021
1022   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1023     return false;
1024   iv->base = expand_simple_operations (iv->base);
1025
1026   if (contains_abnormal_ssa_name_p (iv->base)
1027       || contains_abnormal_ssa_name_p (iv->step))
1028     return false;
1029
1030   return true;
1031 }
1032
1033 /* Finds general ivs in statement STMT.  */
1034
1035 static void
1036 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1037 {
1038   affine_iv iv;
1039
1040   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1041     return;
1042
1043   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1044 }
1045
1046 /* Finds general ivs in basic block BB.  */
1047
1048 static void
1049 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1050 {
1051   gimple_stmt_iterator bsi;
1052
1053   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1054     find_givs_in_stmt (data, gsi_stmt (bsi));
1055 }
1056
1057 /* Finds general ivs.  */
1058
1059 static void
1060 find_givs (struct ivopts_data *data)
1061 {
1062   struct loop *loop = data->current_loop;
1063   basic_block *body = get_loop_body_in_dom_order (loop);
1064   unsigned i;
1065
1066   for (i = 0; i < loop->num_nodes; i++)
1067     find_givs_in_bb (data, body[i]);
1068   free (body);
1069 }
1070
1071 /* For each ssa name defined in LOOP determines whether it is an induction
1072    variable and if so, its initial value and step.  */
1073
1074 static bool
1075 find_induction_variables (struct ivopts_data *data)
1076 {
1077   unsigned i;
1078   bitmap_iterator bi;
1079
1080   if (!find_bivs (data))
1081     return false;
1082
1083   find_givs (data);
1084   mark_bivs (data);
1085
1086   if (dump_file && (dump_flags & TDF_DETAILS))
1087     {
1088       tree niter = niter_for_single_dom_exit (data);
1089
1090       if (niter)
1091         {
1092           fprintf (dump_file, "  number of iterations ");
1093           print_generic_expr (dump_file, niter, TDF_SLIM);
1094           fprintf (dump_file, "\n\n");
1095         };
1096
1097       fprintf (dump_file, "Induction variables:\n\n");
1098
1099       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1100         {
1101           if (ver_info (data, i)->iv)
1102             dump_iv (dump_file, ver_info (data, i)->iv);
1103         }
1104     }
1105
1106   return true;
1107 }
1108
1109 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1110
1111 static struct iv_use *
1112 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1113             gimple stmt, enum use_type use_type)
1114 {
1115   struct iv_use *use = XCNEW (struct iv_use);
1116
1117   use->id = n_iv_uses (data);
1118   use->type = use_type;
1119   use->iv = iv;
1120   use->stmt = stmt;
1121   use->op_p = use_p;
1122   use->related_cands = BITMAP_ALLOC (NULL);
1123
1124   /* To avoid showing ssa name in the dumps, if it was not reset by the
1125      caller.  */
1126   iv->ssa_name = NULL_TREE;
1127
1128   if (dump_file && (dump_flags & TDF_DETAILS))
1129     dump_use (dump_file, use);
1130
1131   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1132
1133   return use;
1134 }
1135
1136 /* Checks whether OP is a loop-level invariant and if so, records it.
1137    NONLINEAR_USE is true if the invariant is used in a way we do not
1138    handle specially.  */
1139
1140 static void
1141 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1142 {
1143   basic_block bb;
1144   struct version_info *info;
1145
1146   if (TREE_CODE (op) != SSA_NAME
1147       || !is_gimple_reg (op))
1148     return;
1149
1150   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1151   if (bb
1152       && flow_bb_inside_loop_p (data->current_loop, bb))
1153     return;
1154
1155   info = name_info (data, op);
1156   info->name = op;
1157   info->has_nonlin_use |= nonlinear_use;
1158   if (!info->inv_id)
1159     info->inv_id = ++data->max_inv_id;
1160   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1161 }
1162
1163 /* Checks whether the use OP is interesting and if so, records it.  */
1164
1165 static struct iv_use *
1166 find_interesting_uses_op (struct ivopts_data *data, tree op)
1167 {
1168   struct iv *iv;
1169   struct iv *civ;
1170   gimple stmt;
1171   struct iv_use *use;
1172
1173   if (TREE_CODE (op) != SSA_NAME)
1174     return NULL;
1175
1176   iv = get_iv (data, op);
1177   if (!iv)
1178     return NULL;
1179
1180   if (iv->have_use_for)
1181     {
1182       use = iv_use (data, iv->use_id);
1183
1184       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1185       return use;
1186     }
1187
1188   if (integer_zerop (iv->step))
1189     {
1190       record_invariant (data, op, true);
1191       return NULL;
1192     }
1193   iv->have_use_for = true;
1194
1195   civ = XNEW (struct iv);
1196   *civ = *iv;
1197
1198   stmt = SSA_NAME_DEF_STMT (op);
1199   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1200               || is_gimple_assign (stmt));
1201
1202   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1203   iv->use_id = use->id;
1204
1205   return use;
1206 }
1207
1208 /* Given a condition in statement STMT, checks whether it is a compare
1209    of an induction variable and an invariant.  If this is the case,
1210    CONTROL_VAR is set to location of the iv, BOUND to the location of
1211    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1212    induction variable descriptions, and true is returned.  If this is not
1213    the case, CONTROL_VAR and BOUND are set to the arguments of the
1214    condition and false is returned.  */
1215
1216 static bool
1217 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1218                        tree **control_var, tree **bound,
1219                        struct iv **iv_var, struct iv **iv_bound)
1220 {
1221   /* The objects returned when COND has constant operands.  */
1222   static struct iv const_iv;
1223   static tree zero;
1224   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1225   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1226   bool ret = false;
1227
1228   if (gimple_code (stmt) == GIMPLE_COND)
1229     {
1230       op0 = gimple_cond_lhs_ptr (stmt);
1231       op1 = gimple_cond_rhs_ptr (stmt);
1232     }
1233   else
1234     {
1235       op0 = gimple_assign_rhs1_ptr (stmt);
1236       op1 = gimple_assign_rhs2_ptr (stmt);
1237     }
1238
1239   zero = integer_zero_node;
1240   const_iv.step = integer_zero_node;
1241
1242   if (TREE_CODE (*op0) == SSA_NAME)
1243     iv0 = get_iv (data, *op0);
1244   if (TREE_CODE (*op1) == SSA_NAME)
1245     iv1 = get_iv (data, *op1);
1246
1247   /* Exactly one of the compared values must be an iv, and the other one must
1248      be an invariant.  */
1249   if (!iv0 || !iv1)
1250     goto end;
1251
1252   if (integer_zerop (iv0->step))
1253     {
1254       /* Control variable may be on the other side.  */
1255       tmp_op = op0; op0 = op1; op1 = tmp_op;
1256       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1257     }
1258   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1259
1260 end:
1261   if (control_var)
1262     *control_var = op0;;
1263   if (iv_var)
1264     *iv_var = iv0;;
1265   if (bound)
1266     *bound = op1;
1267   if (iv_bound)
1268     *iv_bound = iv1;
1269
1270   return ret;
1271 }
1272
1273 /* Checks whether the condition in STMT is interesting and if so,
1274    records it.  */
1275
1276 static void
1277 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1278 {
1279   tree *var_p, *bound_p;
1280   struct iv *var_iv, *civ;
1281
1282   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1283     {
1284       find_interesting_uses_op (data, *var_p);
1285       find_interesting_uses_op (data, *bound_p);
1286       return;
1287     }
1288
1289   civ = XNEW (struct iv);
1290   *civ = *var_iv;
1291   record_use (data, NULL, civ, stmt, USE_COMPARE);
1292 }
1293
1294 /* Returns true if expression EXPR is obviously invariant in LOOP,
1295    i.e. if all its operands are defined outside of the LOOP.  LOOP
1296    should not be the function body.  */
1297
1298 bool
1299 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 {
1301   basic_block def_bb;
1302   unsigned i, len;
1303
1304   gcc_assert (loop_depth (loop) > 0);
1305
1306   if (is_gimple_min_invariant (expr))
1307     return true;
1308
1309   if (TREE_CODE (expr) == SSA_NAME)
1310     {
1311       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1312       if (def_bb
1313           && flow_bb_inside_loop_p (loop, def_bb))
1314         return false;
1315
1316       return true;
1317     }
1318
1319   if (!EXPR_P (expr))
1320     return false;
1321
1322   len = TREE_OPERAND_LENGTH (expr);
1323   for (i = 0; i < len; i++)
1324     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1325       return false;
1326
1327   return true;
1328 }
1329
1330 /* Returns true if statement STMT is obviously invariant in LOOP,
1331    i.e. if all its operands on the RHS are defined outside of the LOOP.
1332    LOOP should not be the function body.  */
1333
1334 bool
1335 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1336 {
1337   unsigned i;
1338   tree lhs;
1339
1340   gcc_assert (loop_depth (loop) > 0);
1341
1342   lhs = gimple_get_lhs (stmt);
1343   for (i = 0; i < gimple_num_ops (stmt); i++)
1344     {
1345       tree op = gimple_op (stmt, i);
1346       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1347         return false;
1348     }
1349
1350   return true;
1351 }
1352
1353 /* Cumulates the steps of indices into DATA and replaces their values with the
1354    initial ones.  Returns false when the value of the index cannot be determined.
1355    Callback for for_each_index.  */
1356
1357 struct ifs_ivopts_data
1358 {
1359   struct ivopts_data *ivopts_data;
1360   gimple stmt;
1361   tree step;
1362 };
1363
1364 static bool
1365 idx_find_step (tree base, tree *idx, void *data)
1366 {
1367   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1368   struct iv *iv;
1369   tree step, iv_base, iv_step, lbound, off;
1370   struct loop *loop = dta->ivopts_data->current_loop;
1371
1372   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1373     return false;
1374
1375   /* If base is a component ref, require that the offset of the reference
1376      be invariant.  */
1377   if (TREE_CODE (base) == COMPONENT_REF)
1378     {
1379       off = component_ref_field_offset (base);
1380       return expr_invariant_in_loop_p (loop, off);
1381     }
1382
1383   /* If base is array, first check whether we will be able to move the
1384      reference out of the loop (in order to take its address in strength
1385      reduction).  In order for this to work we need both lower bound
1386      and step to be loop invariants.  */
1387   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1388     {
1389       /* Moreover, for a range, the size needs to be invariant as well.  */
1390       if (TREE_CODE (base) == ARRAY_RANGE_REF
1391           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1392         return false;
1393
1394       step = array_ref_element_size (base);
1395       lbound = array_ref_low_bound (base);
1396
1397       if (!expr_invariant_in_loop_p (loop, step)
1398           || !expr_invariant_in_loop_p (loop, lbound))
1399         return false;
1400     }
1401
1402   if (TREE_CODE (*idx) != SSA_NAME)
1403     return true;
1404
1405   iv = get_iv (dta->ivopts_data, *idx);
1406   if (!iv)
1407     return false;
1408
1409   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1410           *&x[0], which is not folded and does not trigger the
1411           ARRAY_REF path below.  */
1412   *idx = iv->base;
1413
1414   if (integer_zerop (iv->step))
1415     return true;
1416
1417   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1418     {
1419       step = array_ref_element_size (base);
1420
1421       /* We only handle addresses whose step is an integer constant.  */
1422       if (TREE_CODE (step) != INTEGER_CST)
1423         return false;
1424     }
1425   else
1426     /* The step for pointer arithmetics already is 1 byte.  */
1427     step = size_one_node;
1428
1429   iv_base = iv->base;
1430   iv_step = iv->step;
1431   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1432                             sizetype, &iv_base, &iv_step, dta->stmt,
1433                             false))
1434     {
1435       /* The index might wrap.  */
1436       return false;
1437     }
1438
1439   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1440   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1441
1442   return true;
1443 }
1444
1445 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1446    object is passed to it in DATA.  */
1447
1448 static bool
1449 idx_record_use (tree base, tree *idx,
1450                 void *vdata)
1451 {
1452   struct ivopts_data *data = (struct ivopts_data *) vdata;
1453   find_interesting_uses_op (data, *idx);
1454   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1455     {
1456       find_interesting_uses_op (data, array_ref_element_size (base));
1457       find_interesting_uses_op (data, array_ref_low_bound (base));
1458     }
1459   return true;
1460 }
1461
1462 /* If we can prove that TOP = cst * BOT for some constant cst,
1463    store cst to MUL and return true.  Otherwise return false.
1464    The returned value is always sign-extended, regardless of the
1465    signedness of TOP and BOT.  */
1466
1467 static bool
1468 constant_multiple_of (tree top, tree bot, double_int *mul)
1469 {
1470   tree mby;
1471   enum tree_code code;
1472   double_int res, p0, p1;
1473   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1474
1475   STRIP_NOPS (top);
1476   STRIP_NOPS (bot);
1477
1478   if (operand_equal_p (top, bot, 0))
1479     {
1480       *mul = double_int_one;
1481       return true;
1482     }
1483
1484   code = TREE_CODE (top);
1485   switch (code)
1486     {
1487     case MULT_EXPR:
1488       mby = TREE_OPERAND (top, 1);
1489       if (TREE_CODE (mby) != INTEGER_CST)
1490         return false;
1491
1492       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1493         return false;
1494
1495       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1496                               precision);
1497       return true;
1498
1499     case PLUS_EXPR:
1500     case MINUS_EXPR:
1501       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1502           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1503         return false;
1504
1505       if (code == MINUS_EXPR)
1506         p1 = double_int_neg (p1);
1507       *mul = double_int_sext (double_int_add (p0, p1), precision);
1508       return true;
1509
1510     case INTEGER_CST:
1511       if (TREE_CODE (bot) != INTEGER_CST)
1512         return false;
1513
1514       p0 = double_int_sext (tree_to_double_int (top), precision);
1515       p1 = double_int_sext (tree_to_double_int (bot), precision);
1516       if (double_int_zero_p (p1))
1517         return false;
1518       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1519                               precision);
1520       return double_int_zero_p (res);
1521
1522     default:
1523       return false;
1524     }
1525 }
1526
1527 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1528
1529 static bool
1530 may_be_unaligned_p (tree ref, tree step)
1531 {
1532   tree base;
1533   tree base_type;
1534   HOST_WIDE_INT bitsize;
1535   HOST_WIDE_INT bitpos;
1536   tree toffset;
1537   enum machine_mode mode;
1538   int unsignedp, volatilep;
1539   unsigned base_align;
1540
1541   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1542      thus they are not misaligned.  */
1543   if (TREE_CODE (ref) == TARGET_MEM_REF)
1544     return false;
1545
1546   /* The test below is basically copy of what expr.c:normal_inner_ref
1547      does to check whether the object must be loaded by parts when
1548      STRICT_ALIGNMENT is true.  */
1549   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1550                               &unsignedp, &volatilep, true);
1551   base_type = TREE_TYPE (base);
1552   base_align = TYPE_ALIGN (base_type);
1553
1554   if (mode != BLKmode)
1555     {
1556       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1557
1558       if (base_align < mode_align
1559           || (bitpos % mode_align) != 0
1560           || (bitpos % BITS_PER_UNIT) != 0)
1561         return true;
1562
1563       if (toffset
1564           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1565         return true;
1566
1567       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1568         return true;
1569     }
1570
1571   return false;
1572 }
1573
1574 /* Return true if EXPR may be non-addressable.   */
1575
1576 static bool
1577 may_be_nonaddressable_p (tree expr)
1578 {
1579   switch (TREE_CODE (expr))
1580     {
1581     case TARGET_MEM_REF:
1582       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1583          target, thus they are always addressable.  */
1584       return false;
1585
1586     case COMPONENT_REF:
1587       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1588              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1589
1590     case VIEW_CONVERT_EXPR:
1591       /* This kind of view-conversions may wrap non-addressable objects
1592          and make them look addressable.  After some processing the
1593          non-addressability may be uncovered again, causing ADDR_EXPRs
1594          of inappropriate objects to be built.  */
1595       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1596           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1597         return true;
1598
1599       /* ... fall through ... */
1600
1601     case ARRAY_REF:
1602     case ARRAY_RANGE_REF:
1603       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1604
1605     CASE_CONVERT:
1606       return true;
1607
1608     default:
1609       break;
1610     }
1611
1612   return false;
1613 }
1614
1615 /* Finds addresses in *OP_P inside STMT.  */
1616
1617 static void
1618 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1619 {
1620   tree base = *op_p, step = size_zero_node;
1621   struct iv *civ;
1622   struct ifs_ivopts_data ifs_ivopts_data;
1623
1624   /* Do not play with volatile memory references.  A bit too conservative,
1625      perhaps, but safe.  */
1626   if (gimple_has_volatile_ops (stmt))
1627     goto fail;
1628
1629   /* Ignore bitfields for now.  Not really something terribly complicated
1630      to handle.  TODO.  */
1631   if (TREE_CODE (base) == BIT_FIELD_REF)
1632     goto fail;
1633
1634   base = unshare_expr (base);
1635
1636   if (TREE_CODE (base) == TARGET_MEM_REF)
1637     {
1638       tree type = build_pointer_type (TREE_TYPE (base));
1639       tree astep;
1640
1641       if (TMR_BASE (base)
1642           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1643         {
1644           civ = get_iv (data, TMR_BASE (base));
1645           if (!civ)
1646             goto fail;
1647
1648           TMR_BASE (base) = civ->base;
1649           step = civ->step;
1650         }
1651       if (TMR_INDEX (base)
1652           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1653         {
1654           civ = get_iv (data, TMR_INDEX (base));
1655           if (!civ)
1656             goto fail;
1657
1658           TMR_INDEX (base) = civ->base;
1659           astep = civ->step;
1660
1661           if (astep)
1662             {
1663               if (TMR_STEP (base))
1664                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1665
1666               step = fold_build2 (PLUS_EXPR, type, step, astep);
1667             }
1668         }
1669
1670       if (integer_zerop (step))
1671         goto fail;
1672       base = tree_mem_ref_addr (type, base);
1673     }
1674   else
1675     {
1676       ifs_ivopts_data.ivopts_data = data;
1677       ifs_ivopts_data.stmt = stmt;
1678       ifs_ivopts_data.step = size_zero_node;
1679       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1680           || integer_zerop (ifs_ivopts_data.step))
1681         goto fail;
1682       step = ifs_ivopts_data.step;
1683
1684       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1685
1686       /* Check that the base expression is addressable.  This needs
1687          to be done after substituting bases of IVs into it.  */
1688       if (may_be_nonaddressable_p (base))
1689         goto fail;
1690
1691       /* Moreover, on strict alignment platforms, check that it is
1692          sufficiently aligned.  */
1693       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1694         goto fail;
1695
1696       base = build_fold_addr_expr (base);
1697
1698       /* Substituting bases of IVs into the base expression might
1699          have caused folding opportunities.  */
1700       if (TREE_CODE (base) == ADDR_EXPR)
1701         {
1702           tree *ref = &TREE_OPERAND (base, 0);
1703           while (handled_component_p (*ref))
1704             ref = &TREE_OPERAND (*ref, 0);
1705           if (TREE_CODE (*ref) == MEM_REF)
1706             {
1707               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1708                                       TREE_OPERAND (*ref, 0),
1709                                       TREE_OPERAND (*ref, 1));
1710               if (tem)
1711                 *ref = tem;
1712             }
1713         }
1714     }
1715
1716   civ = alloc_iv (base, step);
1717   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1718   return;
1719
1720 fail:
1721   for_each_index (op_p, idx_record_use, data);
1722 }
1723
1724 /* Finds and records invariants used in STMT.  */
1725
1726 static void
1727 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1728 {
1729   ssa_op_iter iter;
1730   use_operand_p use_p;
1731   tree op;
1732
1733   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1734     {
1735       op = USE_FROM_PTR (use_p);
1736       record_invariant (data, op, false);
1737     }
1738 }
1739
1740 /* Finds interesting uses of induction variables in the statement STMT.  */
1741
1742 static void
1743 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1744 {
1745   struct iv *iv;
1746   tree op, *lhs, *rhs;
1747   ssa_op_iter iter;
1748   use_operand_p use_p;
1749   enum tree_code code;
1750
1751   find_invariants_stmt (data, stmt);
1752
1753   if (gimple_code (stmt) == GIMPLE_COND)
1754     {
1755       find_interesting_uses_cond (data, stmt);
1756       return;
1757     }
1758
1759   if (is_gimple_assign (stmt))
1760     {
1761       lhs = gimple_assign_lhs_ptr (stmt);
1762       rhs = gimple_assign_rhs1_ptr (stmt);
1763
1764       if (TREE_CODE (*lhs) == SSA_NAME)
1765         {
1766           /* If the statement defines an induction variable, the uses are not
1767              interesting by themselves.  */
1768
1769           iv = get_iv (data, *lhs);
1770
1771           if (iv && !integer_zerop (iv->step))
1772             return;
1773         }
1774
1775       code = gimple_assign_rhs_code (stmt);
1776       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1777           && (REFERENCE_CLASS_P (*rhs)
1778               || is_gimple_val (*rhs)))
1779         {
1780           if (REFERENCE_CLASS_P (*rhs))
1781             find_interesting_uses_address (data, stmt, rhs);
1782           else
1783             find_interesting_uses_op (data, *rhs);
1784
1785           if (REFERENCE_CLASS_P (*lhs))
1786             find_interesting_uses_address (data, stmt, lhs);
1787           return;
1788         }
1789       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1790         {
1791           find_interesting_uses_cond (data, stmt);
1792           return;
1793         }
1794
1795       /* TODO -- we should also handle address uses of type
1796
1797          memory = call (whatever);
1798
1799          and
1800
1801          call (memory).  */
1802     }
1803
1804   if (gimple_code (stmt) == GIMPLE_PHI
1805       && gimple_bb (stmt) == data->current_loop->header)
1806     {
1807       iv = get_iv (data, PHI_RESULT (stmt));
1808
1809       if (iv && !integer_zerop (iv->step))
1810         return;
1811     }
1812
1813   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1814     {
1815       op = USE_FROM_PTR (use_p);
1816
1817       if (TREE_CODE (op) != SSA_NAME)
1818         continue;
1819
1820       iv = get_iv (data, op);
1821       if (!iv)
1822         continue;
1823
1824       find_interesting_uses_op (data, op);
1825     }
1826 }
1827
1828 /* Finds interesting uses of induction variables outside of loops
1829    on loop exit edge EXIT.  */
1830
1831 static void
1832 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1833 {
1834   gimple phi;
1835   gimple_stmt_iterator psi;
1836   tree def;
1837
1838   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1839     {
1840       phi = gsi_stmt (psi);
1841       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1842       if (is_gimple_reg (def))
1843         find_interesting_uses_op (data, def);
1844     }
1845 }
1846
1847 /* Finds uses of the induction variables that are interesting.  */
1848
1849 static void
1850 find_interesting_uses (struct ivopts_data *data)
1851 {
1852   basic_block bb;
1853   gimple_stmt_iterator bsi;
1854   basic_block *body = get_loop_body (data->current_loop);
1855   unsigned i;
1856   struct version_info *info;
1857   edge e;
1858
1859   if (dump_file && (dump_flags & TDF_DETAILS))
1860     fprintf (dump_file, "Uses:\n\n");
1861
1862   for (i = 0; i < data->current_loop->num_nodes; i++)
1863     {
1864       edge_iterator ei;
1865       bb = body[i];
1866
1867       FOR_EACH_EDGE (e, ei, bb->succs)
1868         if (e->dest != EXIT_BLOCK_PTR
1869             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1870           find_interesting_uses_outside (data, e);
1871
1872       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1873         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1874       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1875         if (!is_gimple_debug (gsi_stmt (bsi)))
1876           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1877     }
1878
1879   if (dump_file && (dump_flags & TDF_DETAILS))
1880     {
1881       bitmap_iterator bi;
1882
1883       fprintf (dump_file, "\n");
1884
1885       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1886         {
1887           info = ver_info (data, i);
1888           if (info->inv_id)
1889             {
1890               fprintf (dump_file, "  ");
1891               print_generic_expr (dump_file, info->name, TDF_SLIM);
1892               fprintf (dump_file, " is invariant (%d)%s\n",
1893                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1894             }
1895         }
1896
1897       fprintf (dump_file, "\n");
1898     }
1899
1900   free (body);
1901 }
1902
1903 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1904    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1905    we are at the top-level of the processed address.  */
1906
1907 static tree
1908 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1909                 unsigned HOST_WIDE_INT *offset)
1910 {
1911   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1912   enum tree_code code;
1913   tree type, orig_type = TREE_TYPE (expr);
1914   unsigned HOST_WIDE_INT off0, off1, st;
1915   tree orig_expr = expr;
1916
1917   STRIP_NOPS (expr);
1918
1919   type = TREE_TYPE (expr);
1920   code = TREE_CODE (expr);
1921   *offset = 0;
1922
1923   switch (code)
1924     {
1925     case INTEGER_CST:
1926       if (!cst_and_fits_in_hwi (expr)
1927           || integer_zerop (expr))
1928         return orig_expr;
1929
1930       *offset = int_cst_value (expr);
1931       return build_int_cst (orig_type, 0);
1932
1933     case POINTER_PLUS_EXPR:
1934     case PLUS_EXPR:
1935     case MINUS_EXPR:
1936       op0 = TREE_OPERAND (expr, 0);
1937       op1 = TREE_OPERAND (expr, 1);
1938
1939       op0 = strip_offset_1 (op0, false, false, &off0);
1940       op1 = strip_offset_1 (op1, false, false, &off1);
1941
1942       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1943       if (op0 == TREE_OPERAND (expr, 0)
1944           && op1 == TREE_OPERAND (expr, 1))
1945         return orig_expr;
1946
1947       if (integer_zerop (op1))
1948         expr = op0;
1949       else if (integer_zerop (op0))
1950         {
1951           if (code == MINUS_EXPR)
1952             expr = fold_build1 (NEGATE_EXPR, type, op1);
1953           else
1954             expr = op1;
1955         }
1956       else
1957         expr = fold_build2 (code, type, op0, op1);
1958
1959       return fold_convert (orig_type, expr);
1960
1961     case MULT_EXPR:
1962       op1 = TREE_OPERAND (expr, 1);
1963       if (!cst_and_fits_in_hwi (op1))
1964         return orig_expr;
1965
1966       op0 = TREE_OPERAND (expr, 0);
1967       op0 = strip_offset_1 (op0, false, false, &off0);
1968       if (op0 == TREE_OPERAND (expr, 0))
1969         return orig_expr;
1970
1971       *offset = off0 * int_cst_value (op1);
1972       if (integer_zerop (op0))
1973         expr = op0;
1974       else
1975         expr = fold_build2 (MULT_EXPR, type, op0, op1);
1976
1977       return fold_convert (orig_type, expr);
1978
1979     case ARRAY_REF:
1980     case ARRAY_RANGE_REF:
1981       if (!inside_addr)
1982         return orig_expr;
1983
1984       step = array_ref_element_size (expr);
1985       if (!cst_and_fits_in_hwi (step))
1986         break;
1987
1988       st = int_cst_value (step);
1989       op1 = TREE_OPERAND (expr, 1);
1990       op1 = strip_offset_1 (op1, false, false, &off1);
1991       *offset = off1 * st;
1992
1993       if (top_compref
1994           && integer_zerop (op1))
1995         {
1996           /* Strip the component reference completely.  */
1997           op0 = TREE_OPERAND (expr, 0);
1998           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1999           *offset += off0;
2000           return op0;
2001         }
2002       break;
2003
2004     case COMPONENT_REF:
2005       if (!inside_addr)
2006         return orig_expr;
2007
2008       tmp = component_ref_field_offset (expr);
2009       if (top_compref
2010           && cst_and_fits_in_hwi (tmp))
2011         {
2012           /* Strip the component reference completely.  */
2013           op0 = TREE_OPERAND (expr, 0);
2014           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2015           *offset = off0 + int_cst_value (tmp);
2016           return op0;
2017         }
2018       break;
2019
2020     case ADDR_EXPR:
2021       op0 = TREE_OPERAND (expr, 0);
2022       op0 = strip_offset_1 (op0, true, true, &off0);
2023       *offset += off0;
2024
2025       if (op0 == TREE_OPERAND (expr, 0))
2026         return orig_expr;
2027
2028       expr = build_fold_addr_expr (op0);
2029       return fold_convert (orig_type, expr);
2030
2031     case MEM_REF:
2032       /* ???  Offset operand?  */
2033       inside_addr = false;
2034       break;
2035
2036     default:
2037       return orig_expr;
2038     }
2039
2040   /* Default handling of expressions for that we want to recurse into
2041      the first operand.  */
2042   op0 = TREE_OPERAND (expr, 0);
2043   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2044   *offset += off0;
2045
2046   if (op0 == TREE_OPERAND (expr, 0)
2047       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2048     return orig_expr;
2049
2050   expr = copy_node (expr);
2051   TREE_OPERAND (expr, 0) = op0;
2052   if (op1)
2053     TREE_OPERAND (expr, 1) = op1;
2054
2055   /* Inside address, we might strip the top level component references,
2056      thus changing type of the expression.  Handling of ADDR_EXPR
2057      will fix that.  */
2058   expr = fold_convert (orig_type, expr);
2059
2060   return expr;
2061 }
2062
2063 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2064
2065 static tree
2066 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2067 {
2068   return strip_offset_1 (expr, false, false, offset);
2069 }
2070
2071 /* Returns variant of TYPE that can be used as base for different uses.
2072    We return unsigned type with the same precision, which avoids problems
2073    with overflows.  */
2074
2075 static tree
2076 generic_type_for (tree type)
2077 {
2078   if (POINTER_TYPE_P (type))
2079     return unsigned_type_for (type);
2080
2081   if (TYPE_UNSIGNED (type))
2082     return type;
2083
2084   return unsigned_type_for (type);
2085 }
2086
2087 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2088    the bitmap to that we should store it.  */
2089
2090 static struct ivopts_data *fd_ivopts_data;
2091 static tree
2092 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2093 {
2094   bitmap *depends_on = (bitmap *) data;
2095   struct version_info *info;
2096
2097   if (TREE_CODE (*expr_p) != SSA_NAME)
2098     return NULL_TREE;
2099   info = name_info (fd_ivopts_data, *expr_p);
2100
2101   if (!info->inv_id || info->has_nonlin_use)
2102     return NULL_TREE;
2103
2104   if (!*depends_on)
2105     *depends_on = BITMAP_ALLOC (NULL);
2106   bitmap_set_bit (*depends_on, info->inv_id);
2107
2108   return NULL_TREE;
2109 }
2110
2111 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2112    position to POS.  If USE is not NULL, the candidate is set as related to
2113    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2114    replacement of the final value of the iv by a direct computation.  */
2115
2116 static struct iv_cand *
2117 add_candidate_1 (struct ivopts_data *data,
2118                  tree base, tree step, bool important, enum iv_position pos,
2119                  struct iv_use *use, gimple incremented_at)
2120 {
2121   unsigned i;
2122   struct iv_cand *cand = NULL;
2123   tree type, orig_type;
2124
2125   if (base)
2126     {
2127       orig_type = TREE_TYPE (base);
2128       type = generic_type_for (orig_type);
2129       if (type != orig_type)
2130         {
2131           base = fold_convert (type, base);
2132           step = fold_convert (type, step);
2133         }
2134     }
2135
2136   for (i = 0; i < n_iv_cands (data); i++)
2137     {
2138       cand = iv_cand (data, i);
2139
2140       if (cand->pos != pos)
2141         continue;
2142
2143       if (cand->incremented_at != incremented_at
2144           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2145               && cand->ainc_use != use))
2146         continue;
2147
2148       if (!cand->iv)
2149         {
2150           if (!base && !step)
2151             break;
2152
2153           continue;
2154         }
2155
2156       if (!base && !step)
2157         continue;
2158
2159       if (operand_equal_p (base, cand->iv->base, 0)
2160           && operand_equal_p (step, cand->iv->step, 0))
2161         break;
2162     }
2163
2164   if (i == n_iv_cands (data))
2165     {
2166       cand = XCNEW (struct iv_cand);
2167       cand->id = i;
2168
2169       if (!base && !step)
2170         cand->iv = NULL;
2171       else
2172         cand->iv = alloc_iv (base, step);
2173
2174       cand->pos = pos;
2175       if (pos != IP_ORIGINAL && cand->iv)
2176         {
2177           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2178           cand->var_after = cand->var_before;
2179         }
2180       cand->important = important;
2181       cand->incremented_at = incremented_at;
2182       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2183
2184       if (step
2185           && TREE_CODE (step) != INTEGER_CST)
2186         {
2187           fd_ivopts_data = data;
2188           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2189         }
2190
2191       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2192         cand->ainc_use = use;
2193       else
2194         cand->ainc_use = NULL;
2195
2196       if (dump_file && (dump_flags & TDF_DETAILS))
2197         dump_cand (dump_file, cand);
2198     }
2199
2200   if (important && !cand->important)
2201     {
2202       cand->important = true;
2203       if (dump_file && (dump_flags & TDF_DETAILS))
2204         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2205     }
2206
2207   if (use)
2208     {
2209       bitmap_set_bit (use->related_cands, i);
2210       if (dump_file && (dump_flags & TDF_DETAILS))
2211         fprintf (dump_file, "Candidate %d is related to use %d\n",
2212                  cand->id, use->id);
2213     }
2214
2215   return cand;
2216 }
2217
2218 /* Returns true if incrementing the induction variable at the end of the LOOP
2219    is allowed.
2220
2221    The purpose is to avoid splitting latch edge with a biv increment, thus
2222    creating a jump, possibly confusing other optimization passes and leaving
2223    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2224    is not available (so we do not have a better alternative), or if the latch
2225    edge is already nonempty.  */
2226
2227 static bool
2228 allow_ip_end_pos_p (struct loop *loop)
2229 {
2230   if (!ip_normal_pos (loop))
2231     return true;
2232
2233   if (!empty_block_p (ip_end_pos (loop)))
2234     return true;
2235
2236   return false;
2237 }
2238
2239 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2240    Important field is set to IMPORTANT.  */
2241
2242 static void
2243 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2244                         bool important, struct iv_use *use)
2245 {
2246   basic_block use_bb = gimple_bb (use->stmt);
2247   enum machine_mode mem_mode;
2248   unsigned HOST_WIDE_INT cstepi;
2249
2250   /* If we insert the increment in any position other than the standard
2251      ones, we must ensure that it is incremented once per iteration.
2252      It must not be in an inner nested loop, or one side of an if
2253      statement.  */
2254   if (use_bb->loop_father != data->current_loop
2255       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2256       || stmt_could_throw_p (use->stmt)
2257       || !cst_and_fits_in_hwi (step))
2258     return;
2259
2260   cstepi = int_cst_value (step);
2261
2262   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2263   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2264       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2265     {
2266       enum tree_code code = MINUS_EXPR;
2267       tree new_base;
2268       tree new_step = step;
2269
2270       if (POINTER_TYPE_P (TREE_TYPE (base)))
2271         {
2272           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2273           code = POINTER_PLUS_EXPR;
2274         }
2275       else
2276         new_step = fold_convert (TREE_TYPE (base), new_step);
2277       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2278       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2279                        use->stmt);
2280     }
2281   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2282       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2283     {
2284       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2285                        use->stmt);
2286     }
2287 }
2288
2289 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2290    position to POS.  If USE is not NULL, the candidate is set as related to
2291    it.  The candidate computation is scheduled on all available positions.  */
2292
2293 static void
2294 add_candidate (struct ivopts_data *data,
2295                tree base, tree step, bool important, struct iv_use *use)
2296 {
2297   if (ip_normal_pos (data->current_loop))
2298     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2299   if (ip_end_pos (data->current_loop)
2300       && allow_ip_end_pos_p (data->current_loop))
2301     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2302
2303   if (use != NULL && use->type == USE_ADDRESS)
2304     add_autoinc_candidates (data, base, step, important, use);
2305 }
2306
2307 /* Add a standard "0 + 1 * iteration" iv candidate for a
2308    type with SIZE bits.  */
2309
2310 static void
2311 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2312                                      unsigned int size)
2313 {
2314   tree type = lang_hooks.types.type_for_size (size, true);
2315   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2316                  true, NULL);
2317 }
2318
2319 /* Adds standard iv candidates.  */
2320
2321 static void
2322 add_standard_iv_candidates (struct ivopts_data *data)
2323 {
2324   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2325
2326   /* The same for a double-integer type if it is still fast enough.  */
2327   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2328     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2329 }
2330
2331
2332 /* Adds candidates bases on the old induction variable IV.  */
2333
2334 static void
2335 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2336 {
2337   gimple phi;
2338   tree def;
2339   struct iv_cand *cand;
2340
2341   add_candidate (data, iv->base, iv->step, true, NULL);
2342
2343   /* The same, but with initial value zero.  */
2344   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2345     add_candidate (data, size_int (0), iv->step, true, NULL);
2346   else
2347     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2348                    iv->step, true, NULL);
2349
2350   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2351   if (gimple_code (phi) == GIMPLE_PHI)
2352     {
2353       /* Additionally record the possibility of leaving the original iv
2354          untouched.  */
2355       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2356       cand = add_candidate_1 (data,
2357                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2358                               SSA_NAME_DEF_STMT (def));
2359       cand->var_before = iv->ssa_name;
2360       cand->var_after = def;
2361     }
2362 }
2363
2364 /* Adds candidates based on the old induction variables.  */
2365
2366 static void
2367 add_old_ivs_candidates (struct ivopts_data *data)
2368 {
2369   unsigned i;
2370   struct iv *iv;
2371   bitmap_iterator bi;
2372
2373   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2374     {
2375       iv = ver_info (data, i)->iv;
2376       if (iv && iv->biv_p && !integer_zerop (iv->step))
2377         add_old_iv_candidates (data, iv);
2378     }
2379 }
2380
2381 /* Adds candidates based on the value of the induction variable IV and USE.  */
2382
2383 static void
2384 add_iv_value_candidates (struct ivopts_data *data,
2385                          struct iv *iv, struct iv_use *use)
2386 {
2387   unsigned HOST_WIDE_INT offset;
2388   tree base;
2389   tree basetype;
2390
2391   add_candidate (data, iv->base, iv->step, false, use);
2392
2393   /* The same, but with initial value zero.  Make such variable important,
2394      since it is generic enough so that possibly many uses may be based
2395      on it.  */
2396   basetype = TREE_TYPE (iv->base);
2397   if (POINTER_TYPE_P (basetype))
2398     basetype = sizetype;
2399   add_candidate (data, build_int_cst (basetype, 0),
2400                  iv->step, true, use);
2401
2402   /* Third, try removing the constant offset.  Make sure to even
2403      add a candidate for &a[0] vs. (T *)&a.  */
2404   base = strip_offset (iv->base, &offset);
2405   if (offset
2406       || base != iv->base)
2407     add_candidate (data, base, iv->step, false, use);
2408 }
2409
2410 /* Adds candidates based on the uses.  */
2411
2412 static void
2413 add_derived_ivs_candidates (struct ivopts_data *data)
2414 {
2415   unsigned i;
2416
2417   for (i = 0; i < n_iv_uses (data); i++)
2418     {
2419       struct iv_use *use = iv_use (data, i);
2420
2421       if (!use)
2422         continue;
2423
2424       switch (use->type)
2425         {
2426         case USE_NONLINEAR_EXPR:
2427         case USE_COMPARE:
2428         case USE_ADDRESS:
2429           /* Just add the ivs based on the value of the iv used here.  */
2430           add_iv_value_candidates (data, use->iv, use);
2431           break;
2432
2433         default:
2434           gcc_unreachable ();
2435         }
2436     }
2437 }
2438
2439 /* Record important candidates and add them to related_cands bitmaps
2440    if needed.  */
2441
2442 static void
2443 record_important_candidates (struct ivopts_data *data)
2444 {
2445   unsigned i;
2446   struct iv_use *use;
2447
2448   for (i = 0; i < n_iv_cands (data); i++)
2449     {
2450       struct iv_cand *cand = iv_cand (data, i);
2451
2452       if (cand->important)
2453         bitmap_set_bit (data->important_candidates, i);
2454     }
2455
2456   data->consider_all_candidates = (n_iv_cands (data)
2457                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2458
2459   if (data->consider_all_candidates)
2460     {
2461       /* We will not need "related_cands" bitmaps in this case,
2462          so release them to decrease peak memory consumption.  */
2463       for (i = 0; i < n_iv_uses (data); i++)
2464         {
2465           use = iv_use (data, i);
2466           BITMAP_FREE (use->related_cands);
2467         }
2468     }
2469   else
2470     {
2471       /* Add important candidates to the related_cands bitmaps.  */
2472       for (i = 0; i < n_iv_uses (data); i++)
2473         bitmap_ior_into (iv_use (data, i)->related_cands,
2474                          data->important_candidates);
2475     }
2476 }
2477
2478 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2479    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2480    we allocate a simple list to every use.  */
2481
2482 static void
2483 alloc_use_cost_map (struct ivopts_data *data)
2484 {
2485   unsigned i, size, s, j;
2486
2487   for (i = 0; i < n_iv_uses (data); i++)
2488     {
2489       struct iv_use *use = iv_use (data, i);
2490       bitmap_iterator bi;
2491
2492       if (data->consider_all_candidates)
2493         size = n_iv_cands (data);
2494       else
2495         {
2496           s = 0;
2497           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2498             {
2499               s++;
2500             }
2501
2502           /* Round up to the power of two, so that moduling by it is fast.  */
2503           for (size = 1; size < s; size <<= 1)
2504             continue;
2505         }
2506
2507       use->n_map_members = size;
2508       use->cost_map = XCNEWVEC (struct cost_pair, size);
2509     }
2510 }
2511
2512 /* Returns description of computation cost of expression whose runtime
2513    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2514
2515 static comp_cost
2516 new_cost (unsigned runtime, unsigned complexity)
2517 {
2518   comp_cost cost;
2519
2520   cost.cost = runtime;
2521   cost.complexity = complexity;
2522
2523   return cost;
2524 }
2525
2526 /* Adds costs COST1 and COST2.  */
2527
2528 static comp_cost
2529 add_costs (comp_cost cost1, comp_cost cost2)
2530 {
2531   cost1.cost += cost2.cost;
2532   cost1.complexity += cost2.complexity;
2533
2534   return cost1;
2535 }
2536 /* Subtracts costs COST1 and COST2.  */
2537
2538 static comp_cost
2539 sub_costs (comp_cost cost1, comp_cost cost2)
2540 {
2541   cost1.cost -= cost2.cost;
2542   cost1.complexity -= cost2.complexity;
2543
2544   return cost1;
2545 }
2546
2547 /* Returns a negative number if COST1 < COST2, a positive number if
2548    COST1 > COST2, and 0 if COST1 = COST2.  */
2549
2550 static int
2551 compare_costs (comp_cost cost1, comp_cost cost2)
2552 {
2553   if (cost1.cost == cost2.cost)
2554     return cost1.complexity - cost2.complexity;
2555
2556   return cost1.cost - cost2.cost;
2557 }
2558
2559 /* Returns true if COST is infinite.  */
2560
2561 static bool
2562 infinite_cost_p (comp_cost cost)
2563 {
2564   return cost.cost == INFTY;
2565 }
2566
2567 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2568    on invariants DEPENDS_ON and that the value used in expressing it
2569    is VALUE.  */
2570
2571 static void
2572 set_use_iv_cost (struct ivopts_data *data,
2573                  struct iv_use *use, struct iv_cand *cand,
2574                  comp_cost cost, bitmap depends_on, tree value)
2575 {
2576   unsigned i, s;
2577
2578   if (infinite_cost_p (cost))
2579     {
2580       BITMAP_FREE (depends_on);
2581       return;
2582     }
2583
2584   if (data->consider_all_candidates)
2585     {
2586       use->cost_map[cand->id].cand = cand;
2587       use->cost_map[cand->id].cost = cost;
2588       use->cost_map[cand->id].depends_on = depends_on;
2589       use->cost_map[cand->id].value = value;
2590       return;
2591     }
2592
2593   /* n_map_members is a power of two, so this computes modulo.  */
2594   s = cand->id & (use->n_map_members - 1);
2595   for (i = s; i < use->n_map_members; i++)
2596     if (!use->cost_map[i].cand)
2597       goto found;
2598   for (i = 0; i < s; i++)
2599     if (!use->cost_map[i].cand)
2600       goto found;
2601
2602   gcc_unreachable ();
2603
2604 found:
2605   use->cost_map[i].cand = cand;
2606   use->cost_map[i].cost = cost;
2607   use->cost_map[i].depends_on = depends_on;
2608   use->cost_map[i].value = value;
2609 }
2610
2611 /* Gets cost of (USE, CANDIDATE) pair.  */
2612
2613 static struct cost_pair *
2614 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2615                  struct iv_cand *cand)
2616 {
2617   unsigned i, s;
2618   struct cost_pair *ret;
2619
2620   if (!cand)
2621     return NULL;
2622
2623   if (data->consider_all_candidates)
2624     {
2625       ret = use->cost_map + cand->id;
2626       if (!ret->cand)
2627         return NULL;
2628
2629       return ret;
2630     }
2631
2632   /* n_map_members is a power of two, so this computes modulo.  */
2633   s = cand->id & (use->n_map_members - 1);
2634   for (i = s; i < use->n_map_members; i++)
2635     if (use->cost_map[i].cand == cand)
2636       return use->cost_map + i;
2637
2638   for (i = 0; i < s; i++)
2639     if (use->cost_map[i].cand == cand)
2640       return use->cost_map + i;
2641
2642   return NULL;
2643 }
2644
2645 /* Returns estimate on cost of computing SEQ.  */
2646
2647 static unsigned
2648 seq_cost (rtx seq, bool speed)
2649 {
2650   unsigned cost = 0;
2651   rtx set;
2652
2653   for (; seq; seq = NEXT_INSN (seq))
2654     {
2655       set = single_set (seq);
2656       if (set)
2657         cost += rtx_cost (set, SET,speed);
2658       else
2659         cost++;
2660     }
2661
2662   return cost;
2663 }
2664
2665 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2666 static rtx
2667 produce_memory_decl_rtl (tree obj, int *regno)
2668 {
2669   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2670   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2671   rtx x;
2672
2673   gcc_assert (obj);
2674   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2675     {
2676       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2677       x = gen_rtx_SYMBOL_REF (address_mode, name);
2678       SET_SYMBOL_REF_DECL (x, obj);
2679       x = gen_rtx_MEM (DECL_MODE (obj), x);
2680       set_mem_addr_space (x, as);
2681       targetm.encode_section_info (obj, x, true);
2682     }
2683   else
2684     {
2685       x = gen_raw_REG (address_mode, (*regno)++);
2686       x = gen_rtx_MEM (DECL_MODE (obj), x);
2687       set_mem_addr_space (x, as);
2688     }
2689
2690   return x;
2691 }
2692
2693 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2694    walk_tree.  DATA contains the actual fake register number.  */
2695
2696 static tree
2697 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2698 {
2699   tree obj = NULL_TREE;
2700   rtx x = NULL_RTX;
2701   int *regno = (int *) data;
2702
2703   switch (TREE_CODE (*expr_p))
2704     {
2705     case ADDR_EXPR:
2706       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2707            handled_component_p (*expr_p);
2708            expr_p = &TREE_OPERAND (*expr_p, 0))
2709         continue;
2710       obj = *expr_p;
2711       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2712         x = produce_memory_decl_rtl (obj, regno);
2713       break;
2714
2715     case SSA_NAME:
2716       *ws = 0;
2717       obj = SSA_NAME_VAR (*expr_p);
2718       if (!DECL_RTL_SET_P (obj))
2719         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2720       break;
2721
2722     case VAR_DECL:
2723     case PARM_DECL:
2724     case RESULT_DECL:
2725       *ws = 0;
2726       obj = *expr_p;
2727
2728       if (DECL_RTL_SET_P (obj))
2729         break;
2730
2731       if (DECL_MODE (obj) == BLKmode)
2732         x = produce_memory_decl_rtl (obj, regno);
2733       else
2734         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2735
2736       break;
2737
2738     default:
2739       break;
2740     }
2741
2742   if (x)
2743     {
2744       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2745       SET_DECL_RTL (obj, x);
2746     }
2747
2748   return NULL_TREE;
2749 }
2750
2751 /* Determines cost of the computation of EXPR.  */
2752
2753 static unsigned
2754 computation_cost (tree expr, bool speed)
2755 {
2756   rtx seq, rslt;
2757   tree type = TREE_TYPE (expr);
2758   unsigned cost;
2759   /* Avoid using hard regs in ways which may be unsupported.  */
2760   int regno = LAST_VIRTUAL_REGISTER + 1;
2761   struct cgraph_node *node = cgraph_node (current_function_decl);
2762   enum node_frequency real_frequency = node->frequency;
2763
2764   node->frequency = NODE_FREQUENCY_NORMAL;
2765   crtl->maybe_hot_insn_p = speed;
2766   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2767   start_sequence ();
2768   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2769   seq = get_insns ();
2770   end_sequence ();
2771   default_rtl_profile ();
2772   node->frequency = real_frequency;
2773
2774   cost = seq_cost (seq, speed);
2775   if (MEM_P (rslt))
2776     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2777                           TYPE_ADDR_SPACE (type), speed);
2778
2779   return cost;
2780 }
2781
2782 /* Returns variable containing the value of candidate CAND at statement AT.  */
2783
2784 static tree
2785 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2786 {
2787   if (stmt_after_increment (loop, cand, stmt))
2788     return cand->var_after;
2789   else
2790     return cand->var_before;
2791 }
2792
2793 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2794    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2795
2796 int
2797 tree_int_cst_sign_bit (const_tree t)
2798 {
2799   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2800   unsigned HOST_WIDE_INT w;
2801
2802   if (bitno < HOST_BITS_PER_WIDE_INT)
2803     w = TREE_INT_CST_LOW (t);
2804   else
2805     {
2806       w = TREE_INT_CST_HIGH (t);
2807       bitno -= HOST_BITS_PER_WIDE_INT;
2808     }
2809
2810   return (w >> bitno) & 1;
2811 }
2812
2813 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2814    same precision that is at least as wide as the precision of TYPE, stores
2815    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2816    type of A and B.  */
2817
2818 static tree
2819 determine_common_wider_type (tree *a, tree *b)
2820 {
2821   tree wider_type = NULL;
2822   tree suba, subb;
2823   tree atype = TREE_TYPE (*a);
2824
2825   if (CONVERT_EXPR_P (*a))
2826     {
2827       suba = TREE_OPERAND (*a, 0);
2828       wider_type = TREE_TYPE (suba);
2829       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2830         return atype;
2831     }
2832   else
2833     return atype;
2834
2835   if (CONVERT_EXPR_P (*b))
2836     {
2837       subb = TREE_OPERAND (*b, 0);
2838       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2839         return atype;
2840     }
2841   else
2842     return atype;
2843
2844   *a = suba;
2845   *b = subb;
2846   return wider_type;
2847 }
2848
2849 /* Determines the expression by that USE is expressed from induction variable
2850    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2851    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2852
2853 static bool
2854 get_computation_aff (struct loop *loop,
2855                      struct iv_use *use, struct iv_cand *cand, gimple at,
2856                      struct affine_tree_combination *aff)
2857 {
2858   tree ubase = use->iv->base;
2859   tree ustep = use->iv->step;
2860   tree cbase = cand->iv->base;
2861   tree cstep = cand->iv->step, cstep_common;
2862   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2863   tree common_type, var;
2864   tree uutype;
2865   aff_tree cbase_aff, var_aff;
2866   double_int rat;
2867
2868   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2869     {
2870       /* We do not have a precision to express the values of use.  */
2871       return false;
2872     }
2873
2874   var = var_at_stmt (loop, cand, at);
2875   uutype = unsigned_type_for (utype);
2876
2877   /* If the conversion is not noop, perform it.  */
2878   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2879     {
2880       cstep = fold_convert (uutype, cstep);
2881       cbase = fold_convert (uutype, cbase);
2882       var = fold_convert (uutype, var);
2883     }
2884
2885   if (!constant_multiple_of (ustep, cstep, &rat))
2886     return false;
2887
2888   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2889      type, we achieve better folding by computing their difference in this
2890      wider type, and cast the result to UUTYPE.  We do not need to worry about
2891      overflows, as all the arithmetics will in the end be performed in UUTYPE
2892      anyway.  */
2893   common_type = determine_common_wider_type (&ubase, &cbase);
2894
2895   /* use = ubase - ratio * cbase + ratio * var.  */
2896   tree_to_aff_combination (ubase, common_type, aff);
2897   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2898   tree_to_aff_combination (var, uutype, &var_aff);
2899
2900   /* We need to shift the value if we are after the increment.  */
2901   if (stmt_after_increment (loop, cand, at))
2902     {
2903       aff_tree cstep_aff;
2904
2905       if (common_type != uutype)
2906         cstep_common = fold_convert (common_type, cstep);
2907       else
2908         cstep_common = cstep;
2909
2910       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2911       aff_combination_add (&cbase_aff, &cstep_aff);
2912     }
2913
2914   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2915   aff_combination_add (aff, &cbase_aff);
2916   if (common_type != uutype)
2917     aff_combination_convert (aff, uutype);
2918
2919   aff_combination_scale (&var_aff, rat);
2920   aff_combination_add (aff, &var_aff);
2921
2922   return true;
2923 }
2924
2925 /* Determines the expression by that USE is expressed from induction variable
2926    CAND at statement AT in LOOP.  The computation is unshared.  */
2927
2928 static tree
2929 get_computation_at (struct loop *loop,
2930                     struct iv_use *use, struct iv_cand *cand, gimple at)
2931 {
2932   aff_tree aff;
2933   tree type = TREE_TYPE (use->iv->base);
2934
2935   if (!get_computation_aff (loop, use, cand, at, &aff))
2936     return NULL_TREE;
2937   unshare_aff_combination (&aff);
2938   return fold_convert (type, aff_combination_to_tree (&aff));
2939 }
2940
2941 /* Determines the expression by that USE is expressed from induction variable
2942    CAND in LOOP.  The computation is unshared.  */
2943
2944 static tree
2945 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2946 {
2947   return get_computation_at (loop, use, cand, use->stmt);
2948 }
2949
2950 /* Adjust the cost COST for being in loop setup rather than loop body.
2951    If we're optimizing for space, the loop setup overhead is constant;
2952    if we're optimizing for speed, amortize it over the per-iteration cost.  */
2953 static unsigned
2954 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
2955 {
2956   if (cost == INFTY)
2957     return cost;
2958   else if (optimize_loop_for_speed_p (data->current_loop))
2959     return cost / AVG_LOOP_NITER (data->current_loop);
2960   else
2961     return cost;
2962 }
2963
2964 /* Returns cost of addition in MODE.  */
2965
2966 static unsigned
2967 add_cost (enum machine_mode mode, bool speed)
2968 {
2969   static unsigned costs[NUM_MACHINE_MODES];
2970   rtx seq;
2971   unsigned cost;
2972
2973   if (costs[mode])
2974     return costs[mode];
2975
2976   start_sequence ();
2977   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2978                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2979                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2980                  NULL_RTX);
2981   seq = get_insns ();
2982   end_sequence ();
2983
2984   cost = seq_cost (seq, speed);
2985   if (!cost)
2986     cost = 1;
2987
2988   costs[mode] = cost;
2989
2990   if (dump_file && (dump_flags & TDF_DETAILS))
2991     fprintf (dump_file, "Addition in %s costs %d\n",
2992              GET_MODE_NAME (mode), cost);
2993   return cost;
2994 }
2995
2996 /* Entry in a hashtable of already known costs for multiplication.  */
2997 struct mbc_entry
2998 {
2999   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3000   enum machine_mode mode;       /* In mode.  */
3001   unsigned cost;                /* The cost.  */
3002 };
3003
3004 /* Counts hash value for the ENTRY.  */
3005
3006 static hashval_t
3007 mbc_entry_hash (const void *entry)
3008 {
3009   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3010
3011   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3012 }
3013
3014 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3015
3016 static int
3017 mbc_entry_eq (const void *entry1, const void *entry2)
3018 {
3019   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3020   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3021
3022   return (e1->mode == e2->mode
3023           && e1->cst == e2->cst);
3024 }
3025
3026 /* Returns cost of multiplication by constant CST in MODE.  */
3027
3028 unsigned
3029 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3030 {
3031   static htab_t costs;
3032   struct mbc_entry **cached, act;
3033   rtx seq;
3034   unsigned cost;
3035
3036   if (!costs)
3037     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3038
3039   act.mode = mode;
3040   act.cst = cst;
3041   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3042   if (*cached)
3043     return (*cached)->cost;
3044
3045   *cached = XNEW (struct mbc_entry);
3046   (*cached)->mode = mode;
3047   (*cached)->cst = cst;
3048
3049   start_sequence ();
3050   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3051                gen_int_mode (cst, mode), NULL_RTX, 0);
3052   seq = get_insns ();
3053   end_sequence ();
3054
3055   cost = seq_cost (seq, speed);
3056
3057   if (dump_file && (dump_flags & TDF_DETAILS))
3058     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3059              (int) cst, GET_MODE_NAME (mode), cost);
3060
3061   (*cached)->cost = cost;
3062
3063   return cost;
3064 }
3065
3066 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3067    validity for a memory reference accessing memory of mode MODE in
3068    address space AS.  */
3069
3070 DEF_VEC_P (sbitmap);
3071 DEF_VEC_ALLOC_P (sbitmap, heap);
3072
3073 bool
3074 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3075                                  addr_space_t as)
3076 {
3077 #define MAX_RATIO 128
3078   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3079   static VEC (sbitmap, heap) *valid_mult_list;
3080   sbitmap valid_mult;
3081
3082   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3083     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3084
3085   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3086   if (!valid_mult)
3087     {
3088       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3089       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3090       rtx addr;
3091       HOST_WIDE_INT i;
3092
3093       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3094       sbitmap_zero (valid_mult);
3095       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3096       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3097         {
3098           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3099           if (memory_address_addr_space_p (mode, addr, as))
3100             SET_BIT (valid_mult, i + MAX_RATIO);
3101         }
3102
3103       if (dump_file && (dump_flags & TDF_DETAILS))
3104         {
3105           fprintf (dump_file, "  allowed multipliers:");
3106           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3107             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3108               fprintf (dump_file, " %d", (int) i);
3109           fprintf (dump_file, "\n");
3110           fprintf (dump_file, "\n");
3111         }
3112
3113       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3114     }
3115
3116   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3117     return false;
3118
3119   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3120 }
3121
3122 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3123    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3124    variable is omitted.  Compute the cost for a memory reference that accesses
3125    a memory location of mode MEM_MODE in address space AS.
3126
3127    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3128    size of MEM_MODE / RATIO) is available.  To make this determination, we
3129    look at the size of the increment to be made, which is given in CSTEP.
3130    CSTEP may be zero if the step is unknown.
3131    STMT_AFTER_INC is true iff the statement we're looking at is after the
3132    increment of the original biv.
3133
3134    TODO -- there must be some better way.  This all is quite crude.  */
3135
3136 typedef struct
3137 {
3138   HOST_WIDE_INT min_offset, max_offset;
3139   unsigned costs[2][2][2][2];
3140 } *address_cost_data;
3141
3142 DEF_VEC_P (address_cost_data);
3143 DEF_VEC_ALLOC_P (address_cost_data, heap);
3144
3145 static comp_cost
3146 get_address_cost (bool symbol_present, bool var_present,
3147                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3148                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3149                   addr_space_t as, bool speed,
3150                   bool stmt_after_inc, bool *may_autoinc)
3151 {
3152   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3153   static VEC(address_cost_data, heap) *address_cost_data_list;
3154   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3155   address_cost_data data;
3156   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3157   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3158   unsigned cost, acost, complexity;
3159   bool offset_p, ratio_p, autoinc;
3160   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3161   unsigned HOST_WIDE_INT mask;
3162   unsigned bits;
3163
3164   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3165     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3166                            data_index + 1);
3167
3168   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3169   if (!data)
3170     {
3171       HOST_WIDE_INT i;
3172       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3173       HOST_WIDE_INT rat, off;
3174       int old_cse_not_expected;
3175       unsigned sym_p, var_p, off_p, rat_p, add_c;
3176       rtx seq, addr, base;
3177       rtx reg0, reg1;
3178
3179       data = (address_cost_data) xcalloc (1, sizeof (*data));
3180
3181       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3182
3183       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3184       for (i = start; i <= 1 << 20; i <<= 1)
3185         {
3186           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3187           if (!memory_address_addr_space_p (mem_mode, addr, as))
3188             break;
3189         }
3190       data->max_offset = i == start ? 0 : i >> 1;
3191       off = data->max_offset;
3192
3193       for (i = start; i <= 1 << 20; i <<= 1)
3194         {
3195           XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3196           if (!memory_address_addr_space_p (mem_mode, addr, as))
3197             break;
3198         }
3199       data->min_offset = i == start ? 0 : -(i >> 1);
3200
3201       if (dump_file && (dump_flags & TDF_DETAILS))
3202         {
3203           fprintf (dump_file, "get_address_cost:\n");
3204           fprintf (dump_file, "  min offset %s %d\n",
3205                    GET_MODE_NAME (mem_mode),
3206                    (int) data->min_offset);
3207           fprintf (dump_file, "  max offset %s %d\n",
3208                    GET_MODE_NAME (mem_mode),
3209                    (int) data->max_offset);
3210         }
3211
3212       rat = 1;
3213       for (i = 2; i <= MAX_RATIO; i++)
3214         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3215           {
3216             rat = i;
3217             break;
3218           }
3219
3220       /* Compute the cost of various addressing modes.  */
3221       acost = 0;
3222       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3223       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3224
3225       if (HAVE_PRE_DECREMENT)
3226         {
3227           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3228           has_predec[mem_mode]
3229             = memory_address_addr_space_p (mem_mode, addr, as);
3230         }
3231       if (HAVE_POST_DECREMENT)
3232         {
3233           addr = gen_rtx_POST_DEC (address_mode, reg0);
3234           has_postdec[mem_mode]
3235             = memory_address_addr_space_p (mem_mode, addr, as);
3236         }
3237       if (HAVE_PRE_INCREMENT)
3238         {
3239           addr = gen_rtx_PRE_INC (address_mode, reg0);
3240           has_preinc[mem_mode]
3241             = memory_address_addr_space_p (mem_mode, addr, as);
3242         }
3243       if (HAVE_POST_INCREMENT)
3244         {
3245           addr = gen_rtx_POST_INC (address_mode, reg0);
3246           has_postinc[mem_mode]
3247             = memory_address_addr_space_p (mem_mode, addr, as);
3248         }
3249       for (i = 0; i < 16; i++)
3250         {
3251           sym_p = i & 1;
3252           var_p = (i >> 1) & 1;
3253           off_p = (i >> 2) & 1;
3254           rat_p = (i >> 3) & 1;
3255
3256           addr = reg0;
3257           if (rat_p)
3258             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3259                                    gen_int_mode (rat, address_mode));
3260
3261           if (var_p)
3262             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3263
3264           if (sym_p)
3265             {
3266               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3267               /* ??? We can run into trouble with some backends by presenting
3268                  it with symbols which haven't been properly passed through
3269                  targetm.encode_section_info.  By setting the local bit, we
3270                  enhance the probability of things working.  */
3271               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3272
3273               if (off_p)
3274                 base = gen_rtx_fmt_e (CONST, address_mode,
3275                                       gen_rtx_fmt_ee
3276                                         (PLUS, address_mode, base,
3277                                          gen_int_mode (off, address_mode)));
3278             }
3279           else if (off_p)
3280             base = gen_int_mode (off, address_mode);
3281           else
3282             base = NULL_RTX;
3283
3284           if (base)
3285             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3286
3287           start_sequence ();
3288           /* To avoid splitting addressing modes, pretend that no cse will
3289              follow.  */
3290           old_cse_not_expected = cse_not_expected;
3291           cse_not_expected = true;
3292           addr = memory_address_addr_space (mem_mode, addr, as);
3293           cse_not_expected = old_cse_not_expected;
3294           seq = get_insns ();
3295           end_sequence ();
3296
3297           acost = seq_cost (seq, speed);
3298           acost += address_cost (addr, mem_mode, as, speed);
3299
3300           if (!acost)
3301             acost = 1;
3302           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3303         }
3304
3305       /* On some targets, it is quite expensive to load symbol to a register,
3306          which makes addresses that contain symbols look much more expensive.
3307          However, the symbol will have to be loaded in any case before the
3308          loop (and quite likely we have it in register already), so it does not
3309          make much sense to penalize them too heavily.  So make some final
3310          tweaks for the SYMBOL_PRESENT modes:
3311
3312          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3313          var is cheaper, use this mode with small penalty.
3314          If VAR_PRESENT is true, try whether the mode with
3315          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3316          if this is the case, use it.  */
3317       add_c = add_cost (address_mode, speed);
3318       for (i = 0; i < 8; i++)
3319         {
3320           var_p = i & 1;
3321           off_p = (i >> 1) & 1;
3322           rat_p = (i >> 2) & 1;
3323
3324           acost = data->costs[0][1][off_p][rat_p] + 1;
3325           if (var_p)
3326             acost += add_c;
3327
3328           if (acost < data->costs[1][var_p][off_p][rat_p])
3329             data->costs[1][var_p][off_p][rat_p] = acost;
3330         }
3331
3332       if (dump_file && (dump_flags & TDF_DETAILS))
3333         {
3334           fprintf (dump_file, "Address costs:\n");
3335
3336           for (i = 0; i < 16; i++)
3337             {
3338               sym_p = i & 1;
3339               var_p = (i >> 1) & 1;
3340               off_p = (i >> 2) & 1;
3341               rat_p = (i >> 3) & 1;
3342
3343               fprintf (dump_file, "  ");
3344               if (sym_p)
3345                 fprintf (dump_file, "sym + ");
3346               if (var_p)
3347                 fprintf (dump_file, "var + ");
3348               if (off_p)
3349                 fprintf (dump_file, "cst + ");
3350               if (rat_p)
3351                 fprintf (dump_file, "rat * ");
3352
3353               acost = data->costs[sym_p][var_p][off_p][rat_p];
3354               fprintf (dump_file, "index costs %d\n", acost);
3355             }
3356           if (has_predec[mem_mode] || has_postdec[mem_mode]
3357               || has_preinc[mem_mode] || has_postinc[mem_mode])
3358             fprintf (dump_file, "  May include autoinc/dec\n");
3359           fprintf (dump_file, "\n");
3360         }
3361
3362       VEC_replace (address_cost_data, address_cost_data_list,
3363                    data_index, data);
3364     }
3365
3366   bits = GET_MODE_BITSIZE (address_mode);
3367   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3368   offset &= mask;
3369   if ((offset >> (bits - 1) & 1))
3370     offset |= ~mask;
3371   s_offset = offset;
3372
3373   autoinc = false;
3374   msize = GET_MODE_SIZE (mem_mode);
3375   autoinc_offset = offset;
3376   if (stmt_after_inc)
3377     autoinc_offset += ratio * cstep;
3378   if (symbol_present || var_present || ratio != 1)
3379     autoinc = false;
3380   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3381                && msize == cstep)
3382            || (has_postdec[mem_mode] && autoinc_offset == 0
3383                && msize == -cstep)
3384            || (has_preinc[mem_mode] && autoinc_offset == msize
3385                && msize == cstep)
3386            || (has_predec[mem_mode] && autoinc_offset == -msize
3387                && msize == -cstep))
3388     autoinc = true;
3389
3390   cost = 0;
3391   offset_p = (s_offset != 0
3392               && data->min_offset <= s_offset
3393               && s_offset <= data->max_offset);
3394   ratio_p = (ratio != 1
3395              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3396
3397   if (ratio != 1 && !ratio_p)
3398     cost += multiply_by_cost (ratio, address_mode, speed);
3399
3400   if (s_offset && !offset_p && !symbol_present)
3401     cost += add_cost (address_mode, speed);
3402
3403   if (may_autoinc)
3404     *may_autoinc = autoinc;
3405   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3406   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3407   return new_cost (cost + acost, complexity);
3408 }
3409
3410 /* Estimates cost of forcing expression EXPR into a variable.  */
3411
3412 static comp_cost
3413 force_expr_to_var_cost (tree expr, bool speed)
3414 {
3415   static bool costs_initialized = false;
3416   static unsigned integer_cost [2];
3417   static unsigned symbol_cost [2];
3418   static unsigned address_cost [2];
3419   tree op0, op1;
3420   comp_cost cost0, cost1, cost;
3421   enum machine_mode mode;
3422
3423   if (!costs_initialized)
3424     {
3425       tree type = build_pointer_type (integer_type_node);
3426       tree var, addr;
3427       rtx x;
3428       int i;
3429
3430       var = create_tmp_var_raw (integer_type_node, "test_var");
3431       TREE_STATIC (var) = 1;
3432       x = produce_memory_decl_rtl (var, NULL);
3433       SET_DECL_RTL (var, x);
3434
3435       addr = build1 (ADDR_EXPR, type, var);
3436
3437
3438       for (i = 0; i < 2; i++)
3439         {
3440           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3441                                                              2000), i);
3442
3443           symbol_cost[i] = computation_cost (addr, i) + 1;
3444
3445           address_cost[i]
3446             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3447                                         addr,
3448                                         build_int_cst (sizetype, 2000)), i) + 1;
3449           if (dump_file && (dump_flags & TDF_DETAILS))
3450             {
3451               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3452               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3453               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3454               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3455               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3456               fprintf (dump_file, "\n");
3457             }
3458         }
3459
3460       costs_initialized = true;
3461     }
3462
3463   STRIP_NOPS (expr);
3464
3465   if (SSA_VAR_P (expr))
3466     return zero_cost;
3467
3468   if (is_gimple_min_invariant (expr))
3469     {
3470       if (TREE_CODE (expr) == INTEGER_CST)
3471         return new_cost (integer_cost [speed], 0);
3472
3473       if (TREE_CODE (expr) == ADDR_EXPR)
3474         {
3475           tree obj = TREE_OPERAND (expr, 0);
3476
3477           if (TREE_CODE (obj) == VAR_DECL
3478               || TREE_CODE (obj) == PARM_DECL
3479               || TREE_CODE (obj) == RESULT_DECL)
3480             return new_cost (symbol_cost [speed], 0);
3481         }
3482
3483       return new_cost (address_cost [speed], 0);
3484     }
3485
3486   switch (TREE_CODE (expr))
3487     {
3488     case POINTER_PLUS_EXPR:
3489     case PLUS_EXPR:
3490     case MINUS_EXPR:
3491     case MULT_EXPR:
3492       op0 = TREE_OPERAND (expr, 0);
3493       op1 = TREE_OPERAND (expr, 1);
3494       STRIP_NOPS (op0);
3495       STRIP_NOPS (op1);
3496
3497       if (is_gimple_val (op0))
3498         cost0 = zero_cost;
3499       else
3500         cost0 = force_expr_to_var_cost (op0, speed);
3501
3502       if (is_gimple_val (op1))
3503         cost1 = zero_cost;
3504       else
3505         cost1 = force_expr_to_var_cost (op1, speed);
3506
3507       break;
3508
3509     case NEGATE_EXPR:
3510       op0 = TREE_OPERAND (expr, 0);
3511       STRIP_NOPS (op0);
3512       op1 = NULL_TREE;
3513
3514       if (is_gimple_val (op0))
3515         cost0 = zero_cost;
3516       else
3517         cost0 = force_expr_to_var_cost (op0, speed);
3518
3519       cost1 = zero_cost;
3520       break;
3521
3522     default:
3523       /* Just an arbitrary value, FIXME.  */
3524       return new_cost (target_spill_cost[speed], 0);
3525     }
3526
3527   mode = TYPE_MODE (TREE_TYPE (expr));
3528   switch (TREE_CODE (expr))
3529     {
3530     case POINTER_PLUS_EXPR:
3531     case PLUS_EXPR:
3532     case MINUS_EXPR:
3533     case NEGATE_EXPR:
3534       cost = new_cost (add_cost (mode, speed), 0);
3535       break;
3536
3537     case MULT_EXPR:
3538       if (cst_and_fits_in_hwi (op0))
3539         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3540       else if (cst_and_fits_in_hwi (op1))
3541         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3542       else
3543         return new_cost (target_spill_cost [speed], 0);
3544       break;
3545
3546     default:
3547       gcc_unreachable ();
3548     }
3549
3550   cost = add_costs (cost, cost0);
3551   cost = add_costs (cost, cost1);
3552
3553   /* Bound the cost by target_spill_cost.  The parts of complicated
3554      computations often are either loop invariant or at least can
3555      be shared between several iv uses, so letting this grow without
3556      limits would not give reasonable results.  */
3557   if (cost.cost > (int) target_spill_cost [speed])
3558     cost.cost = target_spill_cost [speed];
3559
3560   return cost;
3561 }
3562
3563 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3564    invariants the computation depends on.  */
3565
3566 static comp_cost
3567 force_var_cost (struct ivopts_data *data,
3568                 tree expr, bitmap *depends_on)
3569 {
3570   if (depends_on)
3571     {
3572       fd_ivopts_data = data;
3573       walk_tree (&expr, find_depends, depends_on, NULL);
3574     }
3575
3576   return force_expr_to_var_cost (expr, data->speed);
3577 }
3578
3579 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3580    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3581    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3582    invariants the computation depends on.  */
3583
3584 static comp_cost
3585 split_address_cost (struct ivopts_data *data,
3586                     tree addr, bool *symbol_present, bool *var_present,
3587                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3588 {
3589   tree core;
3590   HOST_WIDE_INT bitsize;
3591   HOST_WIDE_INT bitpos;
3592   tree toffset;
3593   enum machine_mode mode;
3594   int unsignedp, volatilep;
3595
3596   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3597                               &unsignedp, &volatilep, false);
3598
3599   if (toffset != 0
3600       || bitpos % BITS_PER_UNIT != 0
3601       || TREE_CODE (core) != VAR_DECL)
3602     {
3603       *symbol_present = false;
3604       *var_present = true;
3605       fd_ivopts_data = data;
3606       walk_tree (&addr, find_depends, depends_on, NULL);
3607       return new_cost (target_spill_cost[data->speed], 0);
3608     }
3609
3610   *offset += bitpos / BITS_PER_UNIT;
3611   if (TREE_STATIC (core)
3612       || DECL_EXTERNAL (core))
3613     {
3614       *symbol_present = true;
3615       *var_present = false;
3616       return zero_cost;
3617     }
3618
3619   *symbol_present = false;
3620   *var_present = true;
3621   return zero_cost;
3622 }
3623
3624 /* Estimates cost of expressing difference of addresses E1 - E2 as
3625    var + symbol + offset.  The value of offset is added to OFFSET,
3626    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3627    part is missing.  DEPENDS_ON is a set of the invariants the computation
3628    depends on.  */
3629
3630 static comp_cost
3631 ptr_difference_cost (struct ivopts_data *data,
3632                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3633                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3634 {
3635   HOST_WIDE_INT diff = 0;
3636   aff_tree aff_e1, aff_e2;
3637   tree type;
3638
3639   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3640
3641   if (ptr_difference_const (e1, e2, &diff))
3642     {
3643       *offset += diff;
3644       *symbol_present = false;
3645       *var_present = false;
3646       return zero_cost;
3647     }
3648
3649   if (integer_zerop (e2))
3650     return split_address_cost (data, TREE_OPERAND (e1, 0),
3651                                symbol_present, var_present, offset, depends_on);
3652
3653   *symbol_present = false;
3654   *var_present = true;
3655
3656   type = signed_type_for (TREE_TYPE (e1));
3657   tree_to_aff_combination (e1, type, &aff_e1);
3658   tree_to_aff_combination (e2, type, &aff_e2);
3659   aff_combination_scale (&aff_e2, double_int_minus_one);
3660   aff_combination_add (&aff_e1, &aff_e2);
3661
3662   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3663 }
3664
3665 /* Estimates cost of expressing difference E1 - E2 as
3666    var + symbol + offset.  The value of offset is added to OFFSET,
3667    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3668    part is missing.  DEPENDS_ON is a set of the invariants the computation
3669    depends on.  */
3670
3671 static comp_cost
3672 difference_cost (struct ivopts_data *data,
3673                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3674                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3675 {
3676   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3677   unsigned HOST_WIDE_INT off1, off2;
3678   aff_tree aff_e1, aff_e2;
3679   tree type;
3680
3681   e1 = strip_offset (e1, &off1);
3682   e2 = strip_offset (e2, &off2);
3683   *offset += off1 - off2;
3684
3685   STRIP_NOPS (e1);
3686   STRIP_NOPS (e2);
3687
3688   if (TREE_CODE (e1) == ADDR_EXPR)
3689     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3690                                 offset, depends_on);
3691   *symbol_present = false;
3692
3693   if (operand_equal_p (e1, e2, 0))
3694     {
3695       *var_present = false;
3696       return zero_cost;
3697     }
3698
3699   *var_present = true;
3700
3701   if (integer_zerop (e2))
3702     return force_var_cost (data, e1, depends_on);
3703
3704   if (integer_zerop (e1))
3705     {
3706       comp_cost cost = force_var_cost (data, e2, depends_on);
3707       cost.cost += multiply_by_cost (-1, mode, data->speed);
3708       return cost;
3709     }
3710
3711   type = signed_type_for (TREE_TYPE (e1));
3712   tree_to_aff_combination (e1, type, &aff_e1);
3713   tree_to_aff_combination (e2, type, &aff_e2);
3714   aff_combination_scale (&aff_e2, double_int_minus_one);
3715   aff_combination_add (&aff_e1, &aff_e2);
3716
3717   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3718 }
3719
3720 /* Determines the cost of the computation by that USE is expressed
3721    from induction variable CAND.  If ADDRESS_P is true, we just need
3722    to create an address from it, otherwise we want to get it into
3723    register.  A set of invariants we depend on is stored in
3724    DEPENDS_ON.  AT is the statement at that the value is computed.
3725    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3726    addressing is likely.  */
3727
3728 static comp_cost
3729 get_computation_cost_at (struct ivopts_data *data,
3730                          struct iv_use *use, struct iv_cand *cand,
3731                          bool address_p, bitmap *depends_on, gimple at,
3732                          bool *can_autoinc)
3733 {
3734   tree ubase = use->iv->base, ustep = use->iv->step;
3735   tree cbase, cstep;
3736   tree utype = TREE_TYPE (ubase), ctype;
3737   unsigned HOST_WIDE_INT cstepi, offset = 0;
3738   HOST_WIDE_INT ratio, aratio;
3739   bool var_present, symbol_present, stmt_is_after_inc;
3740   comp_cost cost;
3741   double_int rat;
3742   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3743
3744   *depends_on = NULL;
3745
3746   /* Only consider real candidates.  */
3747   if (!cand->iv)
3748     return infinite_cost;
3749
3750   cbase = cand->iv->base;
3751   cstep = cand->iv->step;
3752   ctype = TREE_TYPE (cbase);
3753
3754   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3755     {
3756       /* We do not have a precision to express the values of use.  */
3757       return infinite_cost;
3758     }
3759
3760   if (address_p)
3761     {
3762       /* Do not try to express address of an object with computation based
3763          on address of a different object.  This may cause problems in rtl
3764          level alias analysis (that does not expect this to be happening,
3765          as this is illegal in C), and would be unlikely to be useful
3766          anyway.  */
3767       if (use->iv->base_object
3768           && cand->iv->base_object
3769           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3770         return infinite_cost;
3771     }
3772
3773   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3774     {
3775       /* TODO -- add direct handling of this case.  */
3776       goto fallback;
3777     }
3778
3779   /* CSTEPI is removed from the offset in case statement is after the
3780      increment.  If the step is not constant, we use zero instead.
3781      This is a bit imprecise (there is the extra addition), but
3782      redundancy elimination is likely to transform the code so that
3783      it uses value of the variable before increment anyway,
3784      so it is not that much unrealistic.  */
3785   if (cst_and_fits_in_hwi (cstep))
3786     cstepi = int_cst_value (cstep);
3787   else
3788     cstepi = 0;
3789
3790   if (!constant_multiple_of (ustep, cstep, &rat))
3791     return infinite_cost;
3792
3793   if (double_int_fits_in_shwi_p (rat))
3794     ratio = double_int_to_shwi (rat);
3795   else
3796     return infinite_cost;
3797
3798   STRIP_NOPS (cbase);
3799   ctype = TREE_TYPE (cbase);
3800
3801   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3802      or ratio == 1, it is better to handle this like
3803
3804      ubase - ratio * cbase + ratio * var
3805
3806      (also holds in the case ratio == -1, TODO.  */
3807
3808   if (cst_and_fits_in_hwi (cbase))
3809     {
3810       offset = - ratio * int_cst_value (cbase);
3811       cost = difference_cost (data,
3812                               ubase, build_int_cst (utype, 0),
3813                               &symbol_present, &var_present, &offset,
3814                               depends_on);
3815     }
3816   else if (ratio == 1)
3817     {
3818       cost = difference_cost (data,
3819                               ubase, cbase,
3820                               &symbol_present, &var_present, &offset,
3821                               depends_on);
3822     }
3823   else if (address_p
3824            && !POINTER_TYPE_P (ctype)
3825            && multiplier_allowed_in_address_p
3826                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3827                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3828     {
3829       cbase
3830         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3831       cost = difference_cost (data,
3832                               ubase, cbase,
3833                               &symbol_present, &var_present, &offset,
3834                               depends_on);
3835     }
3836   else
3837     {
3838       cost = force_var_cost (data, cbase, depends_on);
3839       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3840       cost = add_costs (cost,
3841                         difference_cost (data,
3842                                          ubase, build_int_cst (utype, 0),
3843                                          &symbol_present, &var_present,
3844                                          &offset, depends_on));
3845     }
3846
3847   /* If we are after the increment, the value of the candidate is higher by
3848      one iteration.  */
3849   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3850   if (stmt_is_after_inc)
3851     offset -= ratio * cstepi;
3852
3853   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3854      (symbol/var1/const parts may be omitted).  If we are looking for an
3855      address, find the cost of addressing this.  */
3856   if (address_p)
3857     return add_costs (cost,
3858                       get_address_cost (symbol_present, var_present,
3859                                         offset, ratio, cstepi,
3860                                         TYPE_MODE (TREE_TYPE (utype)),
3861                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3862                                         speed, stmt_is_after_inc,
3863                                         can_autoinc));
3864
3865   /* Otherwise estimate the costs for computing the expression.  */
3866   if (!symbol_present && !var_present && !offset)
3867     {
3868       if (ratio != 1)
3869         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3870       return cost;
3871     }
3872
3873   /* Symbol + offset should be compile-time computable so consider that they
3874       are added once to the variable, if present.  */
3875   if (var_present && (symbol_present || offset))
3876     cost.cost += adjust_setup_cost (data,
3877                                     add_cost (TYPE_MODE (ctype), speed));
3878
3879   /* Having offset does not affect runtime cost in case it is added to
3880      symbol, but it increases complexity.  */
3881   if (offset)
3882     cost.complexity++;
3883
3884   cost.cost += add_cost (TYPE_MODE (ctype), speed);
3885
3886   aratio = ratio > 0 ? ratio : -ratio;
3887   if (aratio != 1)
3888     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3889   return cost;
3890
3891 fallback:
3892   if (can_autoinc)
3893     *can_autoinc = false;
3894
3895   {
3896     /* Just get the expression, expand it and measure the cost.  */
3897     tree comp = get_computation_at (data->current_loop, use, cand, at);
3898
3899     if (!comp)
3900       return infinite_cost;
3901
3902     if (address_p)
3903       comp = build_simple_mem_ref (comp);
3904
3905     return new_cost (computation_cost (comp, speed), 0);
3906   }
3907 }
3908
3909 /* Determines the cost of the computation by that USE is expressed
3910    from induction variable CAND.  If ADDRESS_P is true, we just need
3911    to create an address from it, otherwise we want to get it into
3912    register.  A set of invariants we depend on is stored in
3913    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
3914    autoinc addressing is likely.  */
3915
3916 static comp_cost
3917 get_computation_cost (struct ivopts_data *data,
3918                       struct iv_use *use, struct iv_cand *cand,
3919                       bool address_p, bitmap *depends_on, bool *can_autoinc)
3920 {
3921   return get_computation_cost_at (data,
3922                                   use, cand, address_p, depends_on, use->stmt,
3923                                   can_autoinc);
3924 }
3925
3926 /* Determines cost of basing replacement of USE on CAND in a generic
3927    expression.  */
3928
3929 static bool
3930 determine_use_iv_cost_generic (struct ivopts_data *data,
3931                                struct iv_use *use, struct iv_cand *cand)
3932 {
3933   bitmap depends_on;
3934   comp_cost cost;
3935
3936   /* The simple case first -- if we need to express value of the preserved
3937      original biv, the cost is 0.  This also prevents us from counting the
3938      cost of increment twice -- once at this use and once in the cost of
3939      the candidate.  */
3940   if (cand->pos == IP_ORIGINAL
3941       && cand->incremented_at == use->stmt)
3942     {
3943       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3944       return true;
3945     }
3946
3947   cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3948   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3949
3950   return !infinite_cost_p (cost);
3951 }
3952
3953 /* Determines cost of basing replacement of USE on CAND in an address.  */
3954
3955 static bool
3956 determine_use_iv_cost_address (struct ivopts_data *data,
3957                                struct iv_use *use, struct iv_cand *cand)
3958 {
3959   bitmap depends_on;
3960   bool can_autoinc;
3961   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3962                                          &can_autoinc);
3963
3964   if (cand->ainc_use == use)
3965     {
3966       if (can_autoinc)
3967         cost.cost -= cand->cost_step;
3968       /* If we generated the candidate solely for exploiting autoincrement
3969          opportunities, and it turns out it can't be used, set the cost to
3970          infinity to make sure we ignore it.  */
3971       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3972         cost = infinite_cost;
3973     }
3974   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3975
3976   return !infinite_cost_p (cost);
3977 }
3978
3979 /* Computes value of candidate CAND at position AT in iteration NITER, and
3980    stores it to VAL.  */
3981
3982 static void
3983 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3984                aff_tree *val)
3985 {
3986   aff_tree step, delta, nit;
3987   struct iv *iv = cand->iv;
3988   tree type = TREE_TYPE (iv->base);
3989   tree steptype = type;
3990   if (POINTER_TYPE_P (type))
3991     steptype = sizetype;
3992
3993   tree_to_aff_combination (iv->step, steptype, &step);
3994   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3995   aff_combination_convert (&nit, steptype);
3996   aff_combination_mult (&nit, &step, &delta);
3997   if (stmt_after_increment (loop, cand, at))
3998     aff_combination_add (&delta, &step);
3999
4000   tree_to_aff_combination (iv->base, type, val);
4001   aff_combination_add (val, &delta);
4002 }
4003
4004 /* Returns period of induction variable iv.  */
4005
4006 static tree
4007 iv_period (struct iv *iv)
4008 {
4009   tree step = iv->step, period, type;
4010   tree pow2div;
4011
4012   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4013
4014   type = unsigned_type_for (TREE_TYPE (step));
4015   /* Period of the iv is lcm (step, type_range)/step -1,
4016      i.e., N*type_range/step - 1. Since type range is power
4017      of two, N == (step >> num_of_ending_zeros_binary (step),
4018      so the final result is
4019
4020        (type_range >> num_of_ending_zeros_binary (step)) - 1
4021
4022   */
4023   pow2div = num_ending_zeros (step);
4024
4025   period = build_low_bits_mask (type,
4026                                 (TYPE_PRECISION (type)
4027                                  - tree_low_cst (pow2div, 1)));
4028
4029   return period;
4030 }
4031
4032 /* Returns the comparison operator used when eliminating the iv USE.  */
4033
4034 static enum tree_code
4035 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4036 {
4037   struct loop *loop = data->current_loop;
4038   basic_block ex_bb;
4039   edge exit;
4040
4041   ex_bb = gimple_bb (use->stmt);
4042   exit = EDGE_SUCC (ex_bb, 0);
4043   if (flow_bb_inside_loop_p (loop, exit->dest))
4044     exit = EDGE_SUCC (ex_bb, 1);
4045
4046   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4047 }
4048
4049 /* Check whether it is possible to express the condition in USE by comparison
4050    of candidate CAND.  If so, store the value compared with to BOUND.  */
4051
4052 static bool
4053 may_eliminate_iv (struct ivopts_data *data,
4054                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4055 {
4056   basic_block ex_bb;
4057   edge exit;
4058   tree nit, period;
4059   struct loop *loop = data->current_loop;
4060   aff_tree bnd;
4061   struct tree_niter_desc *desc = NULL;
4062
4063   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4064     return false;
4065
4066   /* For now works only for exits that dominate the loop latch.
4067      TODO: extend to other conditions inside loop body.  */
4068   ex_bb = gimple_bb (use->stmt);
4069   if (use->stmt != last_stmt (ex_bb)
4070       || gimple_code (use->stmt) != GIMPLE_COND
4071       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4072     return false;
4073
4074   exit = EDGE_SUCC (ex_bb, 0);
4075   if (flow_bb_inside_loop_p (loop, exit->dest))
4076     exit = EDGE_SUCC (ex_bb, 1);
4077   if (flow_bb_inside_loop_p (loop, exit->dest))
4078     return false;
4079
4080   nit = niter_for_exit (data, exit, &desc);
4081   if (!nit)
4082     return false;
4083
4084   /* Determine whether we can use the variable to test the exit condition.
4085      This is the case iff the period of the induction variable is greater
4086      than the number of iterations for which the exit condition is true.  */
4087   period = iv_period (cand->iv);
4088
4089   /* If the number of iterations is constant, compare against it directly.  */
4090   if (TREE_CODE (nit) == INTEGER_CST)
4091     {
4092       /* See cand_value_at.  */
4093       if (stmt_after_increment (loop, cand, use->stmt))
4094         {
4095           if (!tree_int_cst_lt (nit, period))
4096             return false;
4097         }
4098       else
4099         {
4100           if (tree_int_cst_lt (period, nit))
4101             return false;
4102         }
4103     }
4104
4105   /* If not, and if this is the only possible exit of the loop, see whether
4106      we can get a conservative estimate on the number of iterations of the
4107      entire loop and compare against that instead.  */
4108   else
4109     {
4110       double_int period_value, max_niter;
4111
4112       max_niter = desc->max;
4113       if (stmt_after_increment (loop, cand, use->stmt))
4114         max_niter = double_int_add (max_niter, double_int_one);
4115       period_value = tree_to_double_int (period);
4116       if (double_int_ucmp (max_niter, period_value) > 0)
4117         {
4118           /* See if we can take advantage of infered loop bound information.  */
4119           if (loop_only_exit_p (loop, exit))
4120             {
4121               if (!estimated_loop_iterations (loop, true, &max_niter))
4122                 return false;
4123               /* The loop bound is already adjusted by adding 1.  */
4124               if (double_int_ucmp (max_niter, period_value) > 0)
4125                 return false;
4126             }
4127           else
4128             return false;
4129         }
4130     }
4131
4132   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4133
4134   *bound = aff_combination_to_tree (&bnd);
4135   /* It is unlikely that computing the number of iterations using division
4136      would be more profitable than keeping the original induction variable.  */
4137   if (expression_expensive_p (*bound))
4138     return false;
4139   return true;
4140 }
4141
4142
4143 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4144
4145 static bool
4146 determine_use_iv_cost_condition (struct ivopts_data *data,
4147                                  struct iv_use *use, struct iv_cand *cand)
4148 {
4149   tree bound = NULL_TREE;
4150   struct iv *cmp_iv;
4151   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4152   comp_cost elim_cost, express_cost, cost;
4153   bool ok;
4154   tree *control_var, *bound_cst;
4155
4156   /* Only consider real candidates.  */
4157   if (!cand->iv)
4158     {
4159       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4160       return false;
4161     }
4162
4163   /* Try iv elimination.  */
4164   if (may_eliminate_iv (data, use, cand, &bound))
4165     {
4166       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4167       /* The bound is a loop invariant, so it will be only computed
4168          once.  */
4169       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4170     }
4171   else
4172     elim_cost = infinite_cost;
4173
4174   /* Try expressing the original giv.  If it is compared with an invariant,
4175      note that we cannot get rid of it.  */
4176   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4177                               NULL, &cmp_iv);
4178   gcc_assert (ok);
4179
4180   /* When the condition is a comparison of the candidate IV against
4181      zero, prefer this IV.
4182
4183      TODO: The constant that we're substracting from the cost should
4184      be target-dependent.  This information should be added to the
4185      target costs for each backend.  */
4186   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4187       && integer_zerop (*bound_cst)
4188       && (operand_equal_p (*control_var, cand->var_after, 0)
4189           || operand_equal_p (*control_var, cand->var_before, 0)))
4190     elim_cost.cost -= 1;
4191
4192   express_cost = get_computation_cost (data, use, cand, false,
4193                                        &depends_on_express, NULL);
4194   fd_ivopts_data = data;
4195   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4196
4197   /* Choose the better approach, preferring the eliminated IV. */
4198   if (compare_costs (elim_cost, express_cost) <= 0)
4199     {
4200       cost = elim_cost;
4201       depends_on = depends_on_elim;
4202       depends_on_elim = NULL;
4203     }
4204   else
4205     {
4206       cost = express_cost;
4207       depends_on = depends_on_express;
4208       depends_on_express = NULL;
4209       bound = NULL_TREE;
4210     }
4211
4212   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4213
4214   if (depends_on_elim)
4215     BITMAP_FREE (depends_on_elim);
4216   if (depends_on_express)
4217     BITMAP_FREE (depends_on_express);
4218
4219   return !infinite_cost_p (cost);
4220 }
4221
4222 /* Determines cost of basing replacement of USE on CAND.  Returns false
4223    if USE cannot be based on CAND.  */
4224
4225 static bool
4226 determine_use_iv_cost (struct ivopts_data *data,
4227                        struct iv_use *use, struct iv_cand *cand)
4228 {
4229   switch (use->type)
4230     {
4231     case USE_NONLINEAR_EXPR:
4232       return determine_use_iv_cost_generic (data, use, cand);
4233
4234     case USE_ADDRESS:
4235       return determine_use_iv_cost_address (data, use, cand);
4236
4237     case USE_COMPARE:
4238       return determine_use_iv_cost_condition (data, use, cand);
4239
4240     default:
4241       gcc_unreachable ();
4242     }
4243 }
4244
4245 /* Return true if get_computation_cost indicates that autoincrement is
4246    a possibility for the pair of USE and CAND, false otherwise.  */
4247
4248 static bool
4249 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4250                            struct iv_cand *cand)
4251 {
4252   bitmap depends_on;
4253   bool can_autoinc;
4254   comp_cost cost;
4255
4256   if (use->type != USE_ADDRESS)
4257     return false;
4258
4259   cost = get_computation_cost (data, use, cand, true, &depends_on,
4260                                &can_autoinc);
4261
4262   BITMAP_FREE (depends_on);
4263
4264   return !infinite_cost_p (cost) && can_autoinc;
4265 }
4266
4267 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4268    use that allows autoincrement, and set their AINC_USE if possible.  */
4269
4270 static void
4271 set_autoinc_for_original_candidates (struct ivopts_data *data)
4272 {
4273   unsigned i, j;
4274
4275   for (i = 0; i < n_iv_cands (data); i++)
4276     {
4277       struct iv_cand *cand = iv_cand (data, i);
4278       struct iv_use *closest = NULL;
4279       if (cand->pos != IP_ORIGINAL)
4280         continue;
4281       for (j = 0; j < n_iv_uses (data); j++)
4282         {
4283           struct iv_use *use = iv_use (data, j);
4284           unsigned uid = gimple_uid (use->stmt);
4285           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4286               || uid > gimple_uid (cand->incremented_at))
4287             continue;
4288           if (closest == NULL || uid > gimple_uid (closest->stmt))
4289             closest = use;
4290         }
4291       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4292         continue;
4293       cand->ainc_use = closest;
4294     }
4295 }
4296
4297 /* Finds the candidates for the induction variables.  */
4298
4299 static void
4300 find_iv_candidates (struct ivopts_data *data)
4301 {
4302   /* Add commonly used ivs.  */
4303   add_standard_iv_candidates (data);
4304
4305   /* Add old induction variables.  */
4306   add_old_ivs_candidates (data);
4307
4308   /* Add induction variables derived from uses.  */
4309   add_derived_ivs_candidates (data);
4310
4311   set_autoinc_for_original_candidates (data);
4312
4313   /* Record the important candidates.  */
4314   record_important_candidates (data);
4315 }
4316
4317 /* Determines costs of basing the use of the iv on an iv candidate.  */
4318
4319 static void
4320 determine_use_iv_costs (struct ivopts_data *data)
4321 {
4322   unsigned i, j;
4323   struct iv_use *use;
4324   struct iv_cand *cand;
4325   bitmap to_clear = BITMAP_ALLOC (NULL);
4326
4327   alloc_use_cost_map (data);
4328
4329   for (i = 0; i < n_iv_uses (data); i++)
4330     {
4331       use = iv_use (data, i);
4332
4333       if (data->consider_all_candidates)
4334         {
4335           for (j = 0; j < n_iv_cands (data); j++)
4336             {
4337               cand = iv_cand (data, j);
4338               determine_use_iv_cost (data, use, cand);
4339             }
4340         }
4341       else
4342         {
4343           bitmap_iterator bi;
4344
4345           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4346             {
4347               cand = iv_cand (data, j);
4348               if (!determine_use_iv_cost (data, use, cand))
4349                 bitmap_set_bit (to_clear, j);
4350             }
4351
4352           /* Remove the candidates for that the cost is infinite from
4353              the list of related candidates.  */
4354           bitmap_and_compl_into (use->related_cands, to_clear);
4355           bitmap_clear (to_clear);
4356         }
4357     }
4358
4359   BITMAP_FREE (to_clear);
4360
4361   if (dump_file && (dump_flags & TDF_DETAILS))
4362     {
4363       fprintf (dump_file, "Use-candidate costs:\n");
4364
4365       for (i = 0; i < n_iv_uses (data); i++)
4366         {
4367           use = iv_use (data, i);
4368
4369           fprintf (dump_file, "Use %d:\n", i);
4370           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4371           for (j = 0; j < use->n_map_members; j++)
4372             {
4373               if (!use->cost_map[j].cand
4374                   || infinite_cost_p (use->cost_map[j].cost))
4375                 continue;
4376
4377               fprintf (dump_file, "  %d\t%d\t%d\t",
4378                        use->cost_map[j].cand->id,
4379                        use->cost_map[j].cost.cost,
4380                        use->cost_map[j].cost.complexity);
4381               if (use->cost_map[j].depends_on)
4382                 bitmap_print (dump_file,
4383                               use->cost_map[j].depends_on, "","");
4384               fprintf (dump_file, "\n");
4385             }
4386
4387           fprintf (dump_file, "\n");
4388         }
4389       fprintf (dump_file, "\n");
4390     }
4391 }
4392
4393 /* Determines cost of the candidate CAND.  */
4394
4395 static void
4396 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4397 {
4398   comp_cost cost_base;
4399   unsigned cost, cost_step;
4400   tree base;
4401
4402   if (!cand->iv)
4403     {
4404       cand->cost = 0;
4405       return;
4406     }
4407
4408   /* There are two costs associated with the candidate -- its increment
4409      and its initialization.  The second is almost negligible for any loop
4410      that rolls enough, so we take it just very little into account.  */
4411
4412   base = cand->iv->base;
4413   cost_base = force_var_cost (data, base, NULL);
4414   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4415
4416   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4417
4418   /* Prefer the original ivs unless we may gain something by replacing it.
4419      The reason is to make debugging simpler; so this is not relevant for
4420      artificial ivs created by other optimization passes.  */
4421   if (cand->pos != IP_ORIGINAL
4422       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4423     cost++;
4424
4425   /* Prefer not to insert statements into latch unless there are some
4426      already (so that we do not create unnecessary jumps).  */
4427   if (cand->pos == IP_END
4428       && empty_block_p (ip_end_pos (data->current_loop)))
4429     cost++;
4430
4431   cand->cost = cost;
4432   cand->cost_step = cost_step;
4433 }
4434
4435 /* Determines costs of computation of the candidates.  */
4436
4437 static void
4438 determine_iv_costs (struct ivopts_data *data)
4439 {
4440   unsigned i;
4441
4442   if (dump_file && (dump_flags & TDF_DETAILS))
4443     {
4444       fprintf (dump_file, "Candidate costs:\n");
4445       fprintf (dump_file, "  cand\tcost\n");
4446     }
4447
4448   for (i = 0; i < n_iv_cands (data); i++)
4449     {
4450       struct iv_cand *cand = iv_cand (data, i);
4451
4452       determine_iv_cost (data, cand);
4453
4454       if (dump_file && (dump_flags & TDF_DETAILS))
4455         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4456     }
4457
4458   if (dump_file && (dump_flags & TDF_DETAILS))
4459     fprintf (dump_file, "\n");
4460 }
4461
4462 /* Calculates cost for having SIZE induction variables.  */
4463
4464 static unsigned
4465 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4466 {
4467   /* We add size to the cost, so that we prefer eliminating ivs
4468      if possible.  */
4469   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4470                                             data->body_includes_call);
4471 }
4472
4473 /* For each size of the induction variable set determine the penalty.  */
4474
4475 static void
4476 determine_set_costs (struct ivopts_data *data)
4477 {
4478   unsigned j, n;
4479   gimple phi;
4480   gimple_stmt_iterator psi;
4481   tree op;
4482   struct loop *loop = data->current_loop;
4483   bitmap_iterator bi;
4484
4485   if (dump_file && (dump_flags & TDF_DETAILS))
4486     {
4487       fprintf (dump_file, "Global costs:\n");
4488       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4489       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
4490       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4491       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4492     }
4493
4494   n = 0;
4495   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4496     {
4497       phi = gsi_stmt (psi);
4498       op = PHI_RESULT (phi);
4499
4500       if (!is_gimple_reg (op))
4501         continue;
4502
4503       if (get_iv (data, op))
4504         continue;
4505
4506       n++;
4507     }
4508
4509   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4510     {
4511       struct version_info *info = ver_info (data, j);
4512
4513       if (info->inv_id && info->has_nonlin_use)
4514         n++;
4515     }
4516
4517   data->regs_used = n;
4518   if (dump_file && (dump_flags & TDF_DETAILS))
4519     fprintf (dump_file, "  regs_used %d\n", n);
4520
4521   if (dump_file && (dump_flags & TDF_DETAILS))
4522     {
4523       fprintf (dump_file, "  cost for size:\n");
4524       fprintf (dump_file, "  ivs\tcost\n");
4525       for (j = 0; j <= 2 * target_avail_regs; j++)
4526         fprintf (dump_file, "  %d\t%d\n", j,
4527                  ivopts_global_cost_for_size (data, j));
4528       fprintf (dump_file, "\n");
4529     }
4530 }
4531
4532 /* Returns true if A is a cheaper cost pair than B.  */
4533
4534 static bool
4535 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4536 {
4537   int cmp;
4538
4539   if (!a)
4540     return false;
4541
4542   if (!b)
4543     return true;
4544
4545   cmp = compare_costs (a->cost, b->cost);
4546   if (cmp < 0)
4547     return true;
4548
4549   if (cmp > 0)
4550     return false;
4551
4552   /* In case the costs are the same, prefer the cheaper candidate.  */
4553   if (a->cand->cost < b->cand->cost)
4554     return true;
4555
4556   return false;
4557 }
4558
4559 /* Computes the cost field of IVS structure.  */
4560
4561 static void
4562 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4563 {
4564   comp_cost cost = ivs->cand_use_cost;
4565   cost.cost += ivs->cand_cost;
4566   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4567
4568   ivs->cost = cost;
4569 }
4570
4571 /* Remove invariants in set INVS to set IVS.  */
4572
4573 static void
4574 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4575 {
4576   bitmap_iterator bi;
4577   unsigned iid;
4578
4579   if (!invs)
4580     return;
4581
4582   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4583     {
4584       ivs->n_invariant_uses[iid]--;
4585       if (ivs->n_invariant_uses[iid] == 0)
4586         ivs->n_regs--;
4587     }
4588 }
4589
4590 /* Set USE not to be expressed by any candidate in IVS.  */
4591
4592 static void
4593 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4594                  struct iv_use *use)
4595 {
4596   unsigned uid = use->id, cid;
4597   struct cost_pair *cp;
4598
4599   cp = ivs->cand_for_use[uid];
4600   if (!cp)
4601     return;
4602   cid = cp->cand->id;
4603
4604   ivs->bad_uses++;
4605   ivs->cand_for_use[uid] = NULL;
4606   ivs->n_cand_uses[cid]--;
4607
4608   if (ivs->n_cand_uses[cid] == 0)
4609     {
4610       bitmap_clear_bit (ivs->cands, cid);
4611       /* Do not count the pseudocandidates.  */
4612       if (cp->cand->iv)
4613         ivs->n_regs--;
4614       ivs->n_cands--;
4615       ivs->cand_cost -= cp->cand->cost;
4616
4617       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4618     }
4619
4620   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4621
4622   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4623   iv_ca_recount_cost (data, ivs);
4624 }
4625
4626 /* Add invariants in set INVS to set IVS.  */
4627
4628 static void
4629 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4630 {
4631   bitmap_iterator bi;
4632   unsigned iid;
4633
4634   if (!invs)
4635     return;
4636
4637   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4638     {
4639       ivs->n_invariant_uses[iid]++;
4640       if (ivs->n_invariant_uses[iid] == 1)
4641         ivs->n_regs++;
4642     }
4643 }
4644
4645 /* Set cost pair for USE in set IVS to CP.  */
4646
4647 static void
4648 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4649               struct iv_use *use, struct cost_pair *cp)
4650 {
4651   unsigned uid = use->id, cid;
4652
4653   if (ivs->cand_for_use[uid] == cp)
4654     return;
4655
4656   if (ivs->cand_for_use[uid])
4657     iv_ca_set_no_cp (data, ivs, use);
4658
4659   if (cp)
4660     {
4661       cid = cp->cand->id;
4662
4663       ivs->bad_uses--;
4664       ivs->cand_for_use[uid] = cp;
4665       ivs->n_cand_uses[cid]++;
4666       if (ivs->n_cand_uses[cid] == 1)
4667         {
4668           bitmap_set_bit (ivs->cands, cid);
4669           /* Do not count the pseudocandidates.  */
4670           if (cp->cand->iv)
4671             ivs->n_regs++;
4672           ivs->n_cands++;
4673           ivs->cand_cost += cp->cand->cost;
4674
4675           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4676         }
4677
4678       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4679       iv_ca_set_add_invariants (ivs, cp->depends_on);
4680       iv_ca_recount_cost (data, ivs);
4681     }
4682 }
4683
4684 /* Extend set IVS by expressing USE by some of the candidates in it
4685    if possible.  */
4686
4687 static void
4688 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4689                struct iv_use *use)
4690 {
4691   struct cost_pair *best_cp = NULL, *cp;
4692   bitmap_iterator bi;
4693   unsigned i;
4694
4695   gcc_assert (ivs->upto >= use->id);
4696
4697   if (ivs->upto == use->id)
4698     {
4699       ivs->upto++;
4700       ivs->bad_uses++;
4701     }
4702
4703   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4704     {
4705       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4706
4707       if (cheaper_cost_pair (cp, best_cp))
4708         best_cp = cp;
4709     }
4710
4711   iv_ca_set_cp (data, ivs, use, best_cp);
4712 }
4713
4714 /* Get cost for assignment IVS.  */
4715
4716 static comp_cost
4717 iv_ca_cost (struct iv_ca *ivs)
4718 {
4719   /* This was a conditional expression but it triggered a bug in
4720      Sun C 5.5.  */
4721   if (ivs->bad_uses)
4722     return infinite_cost;
4723   else
4724     return ivs->cost;
4725 }
4726
4727 /* Returns true if all dependences of CP are among invariants in IVS.  */
4728
4729 static bool
4730 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4731 {
4732   unsigned i;
4733   bitmap_iterator bi;
4734
4735   if (!cp->depends_on)
4736     return true;
4737
4738   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4739     {
4740       if (ivs->n_invariant_uses[i] == 0)
4741         return false;
4742     }
4743
4744   return true;
4745 }
4746
4747 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4748    it before NEXT_CHANGE.  */
4749
4750 static struct iv_ca_delta *
4751 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4752                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4753 {
4754   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4755
4756   change->use = use;
4757   change->old_cp = old_cp;
4758   change->new_cp = new_cp;
4759   change->next_change = next_change;
4760
4761   return change;
4762 }
4763
4764 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4765    are rewritten.  */
4766
4767 static struct iv_ca_delta *
4768 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4769 {
4770   struct iv_ca_delta *last;
4771
4772   if (!l2)
4773     return l1;
4774
4775   if (!l1)
4776     return l2;
4777
4778   for (last = l1; last->next_change; last = last->next_change)
4779     continue;
4780   last->next_change = l2;
4781
4782   return l1;
4783 }
4784
4785 /* Returns candidate by that USE is expressed in IVS.  */
4786
4787 static struct cost_pair *
4788 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4789 {
4790   return ivs->cand_for_use[use->id];
4791 }
4792
4793 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4794
4795 static struct iv_ca_delta *
4796 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4797 {
4798   struct iv_ca_delta *act, *next, *prev = NULL;
4799   struct cost_pair *tmp;
4800
4801   for (act = delta; act; act = next)
4802     {
4803       next = act->next_change;
4804       act->next_change = prev;
4805       prev = act;
4806
4807       tmp = act->old_cp;
4808       act->old_cp = act->new_cp;
4809       act->new_cp = tmp;
4810     }
4811
4812   return prev;
4813 }
4814
4815 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4816    reverted instead.  */
4817
4818 static void
4819 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4820                     struct iv_ca_delta *delta, bool forward)
4821 {
4822   struct cost_pair *from, *to;
4823   struct iv_ca_delta *act;
4824
4825   if (!forward)
4826     delta = iv_ca_delta_reverse (delta);
4827
4828   for (act = delta; act; act = act->next_change)
4829     {
4830       from = act->old_cp;
4831       to = act->new_cp;
4832       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4833       iv_ca_set_cp (data, ivs, act->use, to);
4834     }
4835
4836   if (!forward)
4837     iv_ca_delta_reverse (delta);
4838 }
4839
4840 /* Returns true if CAND is used in IVS.  */
4841
4842 static bool
4843 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4844 {
4845   return ivs->n_cand_uses[cand->id] > 0;
4846 }
4847
4848 /* Returns number of induction variable candidates in the set IVS.  */
4849
4850 static unsigned
4851 iv_ca_n_cands (struct iv_ca *ivs)
4852 {
4853   return ivs->n_cands;
4854 }
4855
4856 /* Free the list of changes DELTA.  */
4857
4858 static void
4859 iv_ca_delta_free (struct iv_ca_delta **delta)
4860 {
4861   struct iv_ca_delta *act, *next;
4862
4863   for (act = *delta; act; act = next)
4864     {
4865       next = act->next_change;
4866       free (act);
4867     }
4868
4869   *delta = NULL;
4870 }
4871
4872 /* Allocates new iv candidates assignment.  */
4873
4874 static struct iv_ca *
4875 iv_ca_new (struct ivopts_data *data)
4876 {
4877   struct iv_ca *nw = XNEW (struct iv_ca);
4878
4879   nw->upto = 0;
4880   nw->bad_uses = 0;
4881   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4882   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4883   nw->cands = BITMAP_ALLOC (NULL);
4884   nw->n_cands = 0;
4885   nw->n_regs = 0;
4886   nw->cand_use_cost = zero_cost;
4887   nw->cand_cost = 0;
4888   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4889   nw->cost = zero_cost;
4890
4891   return nw;
4892 }
4893
4894 /* Free memory occupied by the set IVS.  */
4895
4896 static void
4897 iv_ca_free (struct iv_ca **ivs)
4898 {
4899   free ((*ivs)->cand_for_use);
4900   free ((*ivs)->n_cand_uses);
4901   BITMAP_FREE ((*ivs)->cands);
4902   free ((*ivs)->n_invariant_uses);
4903   free (*ivs);
4904   *ivs = NULL;
4905 }
4906
4907 /* Dumps IVS to FILE.  */
4908
4909 static void
4910 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4911 {
4912   const char *pref = "  invariants ";
4913   unsigned i;
4914   comp_cost cost = iv_ca_cost (ivs);
4915
4916   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4917   bitmap_print (file, ivs->cands, "  candidates ","\n");
4918
4919   for (i = 1; i <= data->max_inv_id; i++)
4920     if (ivs->n_invariant_uses[i])
4921       {
4922         fprintf (file, "%s%d", pref, i);
4923         pref = ", ";
4924       }
4925   fprintf (file, "\n");
4926 }
4927
4928 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4929    new set, and store differences in DELTA.  Number of induction variables
4930    in the new set is stored to N_IVS.  */
4931
4932 static comp_cost
4933 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4934               struct iv_cand *cand, struct iv_ca_delta **delta,
4935               unsigned *n_ivs)
4936 {
4937   unsigned i;
4938   comp_cost cost;
4939   struct iv_use *use;
4940   struct cost_pair *old_cp, *new_cp;
4941
4942   *delta = NULL;
4943   for (i = 0; i < ivs->upto; i++)
4944     {
4945       use = iv_use (data, i);
4946       old_cp = iv_ca_cand_for_use (ivs, use);
4947
4948       if (old_cp
4949           && old_cp->cand == cand)
4950         continue;
4951
4952       new_cp = get_use_iv_cost (data, use, cand);
4953       if (!new_cp)
4954         continue;
4955
4956       if (!iv_ca_has_deps (ivs, new_cp))
4957         continue;
4958
4959       if (!cheaper_cost_pair (new_cp, old_cp))
4960         continue;
4961
4962       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4963     }
4964
4965   iv_ca_delta_commit (data, ivs, *delta, true);
4966   cost = iv_ca_cost (ivs);
4967   if (n_ivs)
4968     *n_ivs = iv_ca_n_cands (ivs);
4969   iv_ca_delta_commit (data, ivs, *delta, false);
4970
4971   return cost;
4972 }
4973
4974 /* Try narrowing set IVS by removing CAND.  Return the cost of
4975    the new set and store the differences in DELTA.  */
4976
4977 static comp_cost
4978 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4979               struct iv_cand *cand, struct iv_ca_delta **delta)
4980 {
4981   unsigned i, ci;
4982   struct iv_use *use;
4983   struct cost_pair *old_cp, *new_cp, *cp;
4984   bitmap_iterator bi;
4985   struct iv_cand *cnd;
4986   comp_cost cost;
4987
4988   *delta = NULL;
4989   for (i = 0; i < n_iv_uses (data); i++)
4990     {
4991       use = iv_use (data, i);
4992
4993       old_cp = iv_ca_cand_for_use (ivs, use);
4994       if (old_cp->cand != cand)
4995         continue;
4996
4997       new_cp = NULL;
4998
4999       if (data->consider_all_candidates)
5000         {
5001           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5002             {
5003               if (ci == cand->id)
5004                 continue;
5005
5006               cnd = iv_cand (data, ci);
5007
5008               cp = get_use_iv_cost (data, use, cnd);
5009               if (!cp)
5010                 continue;
5011               if (!iv_ca_has_deps (ivs, cp))
5012                 continue;
5013
5014               if (!cheaper_cost_pair (cp, new_cp))
5015                 continue;
5016
5017               new_cp = cp;
5018             }
5019         }
5020       else
5021         {
5022           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5023             {
5024               if (ci == cand->id)
5025                 continue;
5026
5027               cnd = iv_cand (data, ci);
5028
5029               cp = get_use_iv_cost (data, use, cnd);
5030               if (!cp)
5031                 continue;
5032               if (!iv_ca_has_deps (ivs, cp))
5033                 continue;
5034
5035               if (!cheaper_cost_pair (cp, new_cp))
5036                 continue;
5037
5038               new_cp = cp;
5039             }
5040         }
5041
5042       if (!new_cp)
5043         {
5044           iv_ca_delta_free (delta);
5045           return infinite_cost;
5046         }
5047
5048       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5049     }
5050
5051   iv_ca_delta_commit (data, ivs, *delta, true);
5052   cost = iv_ca_cost (ivs);
5053   iv_ca_delta_commit (data, ivs, *delta, false);
5054
5055   return cost;
5056 }
5057
5058 /* Try optimizing the set of candidates IVS by removing candidates different
5059    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5060    differences in DELTA.  */
5061
5062 static comp_cost
5063 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5064              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5065 {
5066   bitmap_iterator bi;
5067   struct iv_ca_delta *act_delta, *best_delta;
5068   unsigned i;
5069   comp_cost best_cost, acost;
5070   struct iv_cand *cand;
5071
5072   best_delta = NULL;
5073   best_cost = iv_ca_cost (ivs);
5074
5075   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5076     {
5077       cand = iv_cand (data, i);
5078
5079       if (cand == except_cand)
5080         continue;
5081
5082       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5083
5084       if (compare_costs (acost, best_cost) < 0)
5085         {
5086           best_cost = acost;
5087           iv_ca_delta_free (&best_delta);
5088           best_delta = act_delta;
5089         }
5090       else
5091         iv_ca_delta_free (&act_delta);
5092     }
5093
5094   if (!best_delta)
5095     {
5096       *delta = NULL;
5097       return best_cost;
5098     }
5099
5100   /* Recurse to possibly remove other unnecessary ivs.  */
5101   iv_ca_delta_commit (data, ivs, best_delta, true);
5102   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5103   iv_ca_delta_commit (data, ivs, best_delta, false);
5104   *delta = iv_ca_delta_join (best_delta, *delta);
5105   return best_cost;
5106 }
5107
5108 /* Tries to extend the sets IVS in the best possible way in order
5109    to express the USE.  If ORIGINALP is true, prefer candidates from
5110    the original set of IVs, otherwise favor important candidates not
5111    based on any memory object.  */
5112
5113 static bool
5114 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5115                   struct iv_use *use, bool originalp)
5116 {
5117   comp_cost best_cost, act_cost;
5118   unsigned i;
5119   bitmap_iterator bi;
5120   struct iv_cand *cand;
5121   struct iv_ca_delta *best_delta = NULL, *act_delta;
5122   struct cost_pair *cp;
5123
5124   iv_ca_add_use (data, ivs, use);
5125   best_cost = iv_ca_cost (ivs);
5126
5127   cp = iv_ca_cand_for_use (ivs, use);
5128   if (cp)
5129     {
5130       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5131       iv_ca_set_no_cp (data, ivs, use);
5132     }
5133
5134   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5135      first try important candidates not based on any memory object.  Only if
5136      this fails, try the specific ones.  Rationale -- in loops with many
5137      variables the best choice often is to use just one generic biv.  If we
5138      added here many ivs specific to the uses, the optimization algorithm later
5139      would be likely to get stuck in a local minimum, thus causing us to create
5140      too many ivs.  The approach from few ivs to more seems more likely to be
5141      successful -- starting from few ivs, replacing an expensive use by a
5142      specific iv should always be a win.  */
5143   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5144     {
5145       cand = iv_cand (data, i);
5146
5147       if (originalp && cand->pos !=IP_ORIGINAL)
5148         continue;
5149
5150       if (!originalp && cand->iv->base_object != NULL_TREE)
5151         continue;
5152
5153       if (iv_ca_cand_used_p (ivs, cand))
5154         continue;
5155
5156       cp = get_use_iv_cost (data, use, cand);
5157       if (!cp)
5158         continue;
5159
5160       iv_ca_set_cp (data, ivs, use, cp);
5161       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5162       iv_ca_set_no_cp (data, ivs, use);
5163       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5164
5165       if (compare_costs (act_cost, best_cost) < 0)
5166         {
5167           best_cost = act_cost;
5168
5169           iv_ca_delta_free (&best_delta);
5170           best_delta = act_delta;
5171         }
5172       else
5173         iv_ca_delta_free (&act_delta);
5174     }
5175
5176   if (infinite_cost_p (best_cost))
5177     {
5178       for (i = 0; i < use->n_map_members; i++)
5179         {
5180           cp = use->cost_map + i;
5181           cand = cp->cand;
5182           if (!cand)
5183             continue;
5184
5185           /* Already tried this.  */
5186           if (cand->important)
5187             {
5188               if (originalp && cand->pos == IP_ORIGINAL)
5189                 continue;
5190               if (!originalp && cand->iv->base_object == NULL_TREE)
5191                 continue;
5192             }
5193
5194           if (iv_ca_cand_used_p (ivs, cand))
5195             continue;
5196
5197           act_delta = NULL;
5198           iv_ca_set_cp (data, ivs, use, cp);
5199           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5200           iv_ca_set_no_cp (data, ivs, use);
5201           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5202                                        cp, act_delta);
5203
5204           if (compare_costs (act_cost, best_cost) < 0)
5205             {
5206               best_cost = act_cost;
5207
5208               if (best_delta)
5209                 iv_ca_delta_free (&best_delta);
5210               best_delta = act_delta;
5211             }
5212           else
5213             iv_ca_delta_free (&act_delta);
5214         }
5215     }
5216
5217   iv_ca_delta_commit (data, ivs, best_delta, true);
5218   iv_ca_delta_free (&best_delta);
5219
5220   return !infinite_cost_p (best_cost);
5221 }
5222
5223 /* Finds an initial assignment of candidates to uses.  */
5224
5225 static struct iv_ca *
5226 get_initial_solution (struct ivopts_data *data, bool originalp)
5227 {
5228   struct iv_ca *ivs = iv_ca_new (data);
5229   unsigned i;
5230
5231   for (i = 0; i < n_iv_uses (data); i++)
5232     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5233       {
5234         iv_ca_free (&ivs);
5235         return NULL;
5236       }
5237
5238   return ivs;
5239 }
5240
5241 /* Tries to improve set of induction variables IVS.  */
5242
5243 static bool
5244 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5245 {
5246   unsigned i, n_ivs;
5247   comp_cost acost, best_cost = iv_ca_cost (ivs);
5248   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5249   struct iv_cand *cand;
5250
5251   /* Try extending the set of induction variables by one.  */
5252   for (i = 0; i < n_iv_cands (data); i++)
5253     {
5254       cand = iv_cand (data, i);
5255
5256       if (iv_ca_cand_used_p (ivs, cand))
5257         continue;
5258
5259       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5260       if (!act_delta)
5261         continue;
5262
5263       /* If we successfully added the candidate and the set is small enough,
5264          try optimizing it by removing other candidates.  */
5265       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5266         {
5267           iv_ca_delta_commit (data, ivs, act_delta, true);
5268           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5269           iv_ca_delta_commit (data, ivs, act_delta, false);
5270           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5271         }
5272
5273       if (compare_costs (acost, best_cost) < 0)
5274         {
5275           best_cost = acost;
5276           iv_ca_delta_free (&best_delta);
5277           best_delta = act_delta;
5278         }
5279       else
5280         iv_ca_delta_free (&act_delta);
5281     }
5282
5283   if (!best_delta)
5284     {
5285       /* Try removing the candidates from the set instead.  */
5286       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5287
5288       /* Nothing more we can do.  */
5289       if (!best_delta)
5290         return false;
5291     }
5292
5293   iv_ca_delta_commit (data, ivs, best_delta, true);
5294   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5295   iv_ca_delta_free (&best_delta);
5296   return true;
5297 }
5298
5299 /* Attempts to find the optimal set of induction variables.  We do simple
5300    greedy heuristic -- we try to replace at most one candidate in the selected
5301    solution and remove the unused ivs while this improves the cost.  */
5302
5303 static struct iv_ca *
5304 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5305 {
5306   struct iv_ca *set;
5307
5308   /* Get the initial solution.  */
5309   set = get_initial_solution (data, originalp);
5310   if (!set)
5311     {
5312       if (dump_file && (dump_flags & TDF_DETAILS))
5313         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5314       return NULL;
5315     }
5316
5317   if (dump_file && (dump_flags & TDF_DETAILS))
5318     {
5319       fprintf (dump_file, "Initial set of candidates:\n");
5320       iv_ca_dump (data, dump_file, set);
5321     }
5322
5323   while (try_improve_iv_set (data, set))
5324     {
5325       if (dump_file && (dump_flags & TDF_DETAILS))
5326         {
5327           fprintf (dump_file, "Improved to:\n");
5328           iv_ca_dump (data, dump_file, set);
5329         }
5330     }
5331
5332   return set;
5333 }
5334
5335 static struct iv_ca *
5336 find_optimal_iv_set (struct ivopts_data *data)
5337 {
5338   unsigned i;
5339   struct iv_ca *set, *origset;
5340   struct iv_use *use;
5341   comp_cost cost, origcost;
5342
5343   /* Determine the cost based on a strategy that starts with original IVs,
5344      and try again using a strategy that prefers candidates not based
5345      on any IVs.  */
5346   origset = find_optimal_iv_set_1 (data, true);
5347   set = find_optimal_iv_set_1 (data, false);
5348
5349   if (!origset && !set)
5350     return NULL;
5351
5352   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5353   cost = set ? iv_ca_cost (set) : infinite_cost;
5354
5355   if (dump_file && (dump_flags & TDF_DETAILS))
5356     {
5357       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5358                origcost.cost, origcost.complexity);
5359       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5360                cost.cost, cost.complexity);
5361     }
5362
5363   /* Choose the one with the best cost.  */
5364   if (compare_costs (origcost, cost) <= 0)
5365     {
5366       if (set)
5367         iv_ca_free (&set);
5368       set = origset;
5369     }
5370   else if (origset)
5371     iv_ca_free (&origset);
5372
5373   for (i = 0; i < n_iv_uses (data); i++)
5374     {
5375       use = iv_use (data, i);
5376       use->selected = iv_ca_cand_for_use (set, use)->cand;
5377     }
5378
5379   return set;
5380 }
5381
5382 /* Creates a new induction variable corresponding to CAND.  */
5383
5384 static void
5385 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5386 {
5387   gimple_stmt_iterator incr_pos;
5388   tree base;
5389   bool after = false;
5390
5391   if (!cand->iv)
5392     return;
5393
5394   switch (cand->pos)
5395     {
5396     case IP_NORMAL:
5397       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5398       break;
5399
5400     case IP_END:
5401       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5402       after = true;
5403       break;
5404
5405     case IP_AFTER_USE:
5406       after = true;
5407       /* fall through */
5408     case IP_BEFORE_USE:
5409       incr_pos = gsi_for_stmt (cand->incremented_at);
5410       break;
5411
5412     case IP_ORIGINAL:
5413       /* Mark that the iv is preserved.  */
5414       name_info (data, cand->var_before)->preserve_biv = true;
5415       name_info (data, cand->var_after)->preserve_biv = true;
5416
5417       /* Rewrite the increment so that it uses var_before directly.  */
5418       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5419
5420       return;
5421     }
5422
5423   gimple_add_tmp_var (cand->var_before);
5424   add_referenced_var (cand->var_before);
5425
5426   base = unshare_expr (cand->iv->base);
5427
5428   create_iv (base, unshare_expr (cand->iv->step),
5429              cand->var_before, data->current_loop,
5430              &incr_pos, after, &cand->var_before, &cand->var_after);
5431 }
5432
5433 /* Creates new induction variables described in SET.  */
5434
5435 static void
5436 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5437 {
5438   unsigned i;
5439   struct iv_cand *cand;
5440   bitmap_iterator bi;
5441
5442   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5443     {
5444       cand = iv_cand (data, i);
5445       create_new_iv (data, cand);
5446     }
5447 }
5448
5449
5450 /* Rewrites USE (definition of iv used in a nonlinear expression)
5451    using candidate CAND.  */
5452
5453 static void
5454 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5455                             struct iv_use *use, struct iv_cand *cand)
5456 {
5457   tree comp;
5458   tree op, tgt;
5459   gimple ass;
5460   gimple_stmt_iterator bsi;
5461
5462   /* An important special case -- if we are asked to express value of
5463      the original iv by itself, just exit; there is no need to
5464      introduce a new computation (that might also need casting the
5465      variable to unsigned and back).  */
5466   if (cand->pos == IP_ORIGINAL
5467       && cand->incremented_at == use->stmt)
5468     {
5469       tree step, ctype, utype;
5470       enum tree_code incr_code = PLUS_EXPR, old_code;
5471
5472       gcc_assert (is_gimple_assign (use->stmt));
5473       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5474
5475       step = cand->iv->step;
5476       ctype = TREE_TYPE (step);
5477       utype = TREE_TYPE (cand->var_after);
5478       if (TREE_CODE (step) == NEGATE_EXPR)
5479         {
5480           incr_code = MINUS_EXPR;
5481           step = TREE_OPERAND (step, 0);
5482         }
5483
5484       /* Check whether we may leave the computation unchanged.
5485          This is the case only if it does not rely on other
5486          computations in the loop -- otherwise, the computation
5487          we rely upon may be removed in remove_unused_ivs,
5488          thus leading to ICE.  */
5489       old_code = gimple_assign_rhs_code (use->stmt);
5490       if (old_code == PLUS_EXPR
5491           || old_code == MINUS_EXPR
5492           || old_code == POINTER_PLUS_EXPR)
5493         {
5494           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5495             op = gimple_assign_rhs2 (use->stmt);
5496           else if (old_code != MINUS_EXPR
5497                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5498             op = gimple_assign_rhs1 (use->stmt);
5499           else
5500             op = NULL_TREE;
5501         }
5502       else
5503         op = NULL_TREE;
5504
5505       if (op
5506           && (TREE_CODE (op) == INTEGER_CST
5507               || operand_equal_p (op, step, 0)))
5508         return;
5509
5510       /* Otherwise, add the necessary computations to express
5511          the iv.  */
5512       op = fold_convert (ctype, cand->var_before);
5513       comp = fold_convert (utype,
5514                            build2 (incr_code, ctype, op,
5515                                    unshare_expr (step)));
5516     }
5517   else
5518     {
5519       comp = get_computation (data->current_loop, use, cand);
5520       gcc_assert (comp != NULL_TREE);
5521     }
5522
5523   switch (gimple_code (use->stmt))
5524     {
5525     case GIMPLE_PHI:
5526       tgt = PHI_RESULT (use->stmt);
5527
5528       /* If we should keep the biv, do not replace it.  */
5529       if (name_info (data, tgt)->preserve_biv)
5530         return;
5531
5532       bsi = gsi_after_labels (gimple_bb (use->stmt));
5533       break;
5534
5535     case GIMPLE_ASSIGN:
5536       tgt = gimple_assign_lhs (use->stmt);
5537       bsi = gsi_for_stmt (use->stmt);
5538       break;
5539
5540     default:
5541       gcc_unreachable ();
5542     }
5543
5544   if (!valid_gimple_rhs_p (comp)
5545       || (gimple_code (use->stmt) != GIMPLE_PHI
5546           /* We can't allow re-allocating the stmt as it might be pointed
5547              to still.  */
5548           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5549               >= gimple_num_ops (gsi_stmt (bsi)))))
5550     {
5551       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5552                                        true, GSI_SAME_STMT);
5553       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5554         duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5555     }
5556
5557   if (gimple_code (use->stmt) == GIMPLE_PHI)
5558     {
5559       ass = gimple_build_assign (tgt, comp);
5560       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5561
5562       bsi = gsi_for_stmt (use->stmt);
5563       remove_phi_node (&bsi, false);
5564     }
5565   else
5566     {
5567       gimple_assign_set_rhs_from_tree (&bsi, comp);
5568       use->stmt = gsi_stmt (bsi);
5569     }
5570 }
5571
5572 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5573    for_each_index.  */
5574
5575 static bool
5576 idx_remove_ssa_names (tree base, tree *idx,
5577                       void *data ATTRIBUTE_UNUSED)
5578 {
5579   tree *op;
5580
5581   if (TREE_CODE (*idx) == SSA_NAME)
5582     *idx = SSA_NAME_VAR (*idx);
5583
5584   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5585     {
5586       op = &TREE_OPERAND (base, 2);
5587       if (*op
5588           && TREE_CODE (*op) == SSA_NAME)
5589         *op = SSA_NAME_VAR (*op);
5590       op = &TREE_OPERAND (base, 3);
5591       if (*op
5592           && TREE_CODE (*op) == SSA_NAME)
5593         *op = SSA_NAME_VAR (*op);
5594     }
5595
5596   return true;
5597 }
5598
5599 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5600
5601 static tree
5602 unshare_and_remove_ssa_names (tree ref)
5603 {
5604   ref = unshare_expr (ref);
5605   for_each_index (&ref, idx_remove_ssa_names, NULL);
5606
5607   return ref;
5608 }
5609
5610 /* Copies the reference information from OLD_REF to NEW_REF.  */
5611
5612 static void
5613 copy_ref_info (tree new_ref, tree old_ref)
5614 {
5615   tree new_ptr_base = NULL_TREE;
5616
5617   if (TREE_CODE (old_ref) == TARGET_MEM_REF
5618       && TREE_CODE (new_ref) == TARGET_MEM_REF)
5619     TMR_ORIGINAL (new_ref) = TMR_ORIGINAL (old_ref);
5620   else if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5621     TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5622
5623   TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5624   TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5625
5626   if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5627     new_ptr_base = TMR_BASE (new_ref);
5628   else if (TREE_CODE (new_ref) == MEM_REF)
5629     new_ptr_base = TREE_OPERAND (new_ref, 0);
5630
5631   /* We can transfer points-to information from an old pointer
5632      or decl base to the new one.  */
5633   if (new_ptr_base
5634       && TREE_CODE (new_ptr_base) == SSA_NAME
5635       && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5636       && !SSA_NAME_PTR_INFO (new_ptr_base))
5637     {
5638       tree base = get_base_address (old_ref);
5639       if (!base)
5640         ;
5641       else if ((INDIRECT_REF_P (base)
5642                 || TREE_CODE (base) == MEM_REF)
5643                && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5644         duplicate_ssa_name_ptr_info
5645           (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5646       else if (TREE_CODE (base) == VAR_DECL
5647                || TREE_CODE (base) == PARM_DECL
5648                || TREE_CODE (base) == RESULT_DECL)
5649         {
5650           struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5651           pt_solution_set_var (&pi->pt, base);
5652         }
5653     }
5654 }
5655
5656 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5657
5658 static void
5659 rewrite_use_address (struct ivopts_data *data,
5660                      struct iv_use *use, struct iv_cand *cand)
5661 {
5662   aff_tree aff;
5663   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5664   tree base_hint = NULL_TREE;
5665   tree ref;
5666   bool ok;
5667
5668   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5669   gcc_assert (ok);
5670   unshare_aff_combination (&aff);
5671
5672   /* To avoid undefined overflow problems, all IV candidates use unsigned
5673      integer types.  The drawback is that this makes it impossible for
5674      create_mem_ref to distinguish an IV that is based on a memory object
5675      from one that represents simply an offset.
5676
5677      To work around this problem, we pass a hint to create_mem_ref that
5678      indicates which variable (if any) in aff is an IV based on a memory
5679      object.  Note that we only consider the candidate.  If this is not
5680      based on an object, the base of the reference is in some subexpression
5681      of the use -- but these will use pointer types, so they are recognized
5682      by the create_mem_ref heuristics anyway.  */
5683   if (cand->iv->base_object)
5684     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5685
5686   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p),
5687                         reference_alias_ptr_type (*use->op_p),
5688                         &aff, base_hint, data->speed);
5689   copy_ref_info (ref, *use->op_p);
5690   *use->op_p = ref;
5691 }
5692
5693 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5694    candidate CAND.  */
5695
5696 static void
5697 rewrite_use_compare (struct ivopts_data *data,
5698                      struct iv_use *use, struct iv_cand *cand)
5699 {
5700   tree comp, *var_p, op, bound;
5701   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5702   enum tree_code compare;
5703   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5704   bool ok;
5705
5706   bound = cp->value;
5707   if (bound)
5708     {
5709       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5710       tree var_type = TREE_TYPE (var);
5711       gimple_seq stmts;
5712
5713       compare = iv_elimination_compare (data, use);
5714       bound = unshare_expr (fold_convert (var_type, bound));
5715       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5716       if (stmts)
5717         gsi_insert_seq_on_edge_immediate (
5718                 loop_preheader_edge (data->current_loop),
5719                 stmts);
5720
5721       gimple_cond_set_lhs (use->stmt, var);
5722       gimple_cond_set_code (use->stmt, compare);
5723       gimple_cond_set_rhs (use->stmt, op);
5724       return;
5725     }
5726
5727   /* The induction variable elimination failed; just express the original
5728      giv.  */
5729   comp = get_computation (data->current_loop, use, cand);
5730   gcc_assert (comp != NULL_TREE);
5731
5732   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5733   gcc_assert (ok);
5734
5735   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5736                                      true, GSI_SAME_STMT);
5737 }
5738
5739 /* Rewrites USE using candidate CAND.  */
5740
5741 static void
5742 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5743 {
5744   switch (use->type)
5745     {
5746       case USE_NONLINEAR_EXPR:
5747         rewrite_use_nonlinear_expr (data, use, cand);
5748         break;
5749
5750       case USE_ADDRESS:
5751         rewrite_use_address (data, use, cand);
5752         break;
5753
5754       case USE_COMPARE:
5755         rewrite_use_compare (data, use, cand);
5756         break;
5757
5758       default:
5759         gcc_unreachable ();
5760     }
5761
5762   update_stmt (use->stmt);
5763 }
5764
5765 /* Rewrite the uses using the selected induction variables.  */
5766
5767 static void
5768 rewrite_uses (struct ivopts_data *data)
5769 {
5770   unsigned i;
5771   struct iv_cand *cand;
5772   struct iv_use *use;
5773
5774   for (i = 0; i < n_iv_uses (data); i++)
5775     {
5776       use = iv_use (data, i);
5777       cand = use->selected;
5778       gcc_assert (cand);
5779
5780       rewrite_use (data, use, cand);
5781     }
5782 }
5783
5784 /* Removes the ivs that are not used after rewriting.  */
5785
5786 static void
5787 remove_unused_ivs (struct ivopts_data *data)
5788 {
5789   unsigned j;
5790   bitmap_iterator bi;
5791   bitmap toremove = BITMAP_ALLOC (NULL);
5792
5793   /* Figure out an order in which to release SSA DEFs so that we don't
5794      release something that we'd have to propagate into a debug stmt
5795      afterwards.  */
5796   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5797     {
5798       struct version_info *info;
5799
5800       info = ver_info (data, j);
5801       if (info->iv
5802           && !integer_zerop (info->iv->step)
5803           && !info->inv_id
5804           && !info->iv->have_use_for
5805           && !info->preserve_biv)
5806         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5807     }
5808
5809   release_defs_bitset (toremove);
5810
5811   BITMAP_FREE (toremove);
5812 }
5813
5814 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
5815    for pointer_map_traverse.  */
5816
5817 static bool
5818 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
5819                       void *data ATTRIBUTE_UNUSED)
5820 {
5821   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
5822
5823   free (niter);
5824   return true;
5825 }
5826
5827 /* Frees data allocated by the optimization of a single loop.  */
5828
5829 static void
5830 free_loop_data (struct ivopts_data *data)
5831 {
5832   unsigned i, j;
5833   bitmap_iterator bi;
5834   tree obj;
5835
5836   if (data->niters)
5837     {
5838       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
5839       pointer_map_destroy (data->niters);
5840       data->niters = NULL;
5841     }
5842
5843   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5844     {
5845       struct version_info *info;
5846
5847       info = ver_info (data, i);
5848       if (info->iv)
5849         free (info->iv);
5850       info->iv = NULL;
5851       info->has_nonlin_use = false;
5852       info->preserve_biv = false;
5853       info->inv_id = 0;
5854     }
5855   bitmap_clear (data->relevant);
5856   bitmap_clear (data->important_candidates);
5857
5858   for (i = 0; i < n_iv_uses (data); i++)
5859     {
5860       struct iv_use *use = iv_use (data, i);
5861
5862       free (use->iv);
5863       BITMAP_FREE (use->related_cands);
5864       for (j = 0; j < use->n_map_members; j++)
5865         if (use->cost_map[j].depends_on)
5866           BITMAP_FREE (use->cost_map[j].depends_on);
5867       free (use->cost_map);
5868       free (use);
5869     }
5870   VEC_truncate (iv_use_p, data->iv_uses, 0);
5871
5872   for (i = 0; i < n_iv_cands (data); i++)
5873     {
5874       struct iv_cand *cand = iv_cand (data, i);
5875
5876       if (cand->iv)
5877         free (cand->iv);
5878       if (cand->depends_on)
5879         BITMAP_FREE (cand->depends_on);
5880       free (cand);
5881     }
5882   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5883
5884   if (data->version_info_size < num_ssa_names)
5885     {
5886       data->version_info_size = 2 * num_ssa_names;
5887       free (data->version_info);
5888       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5889     }
5890
5891   data->max_inv_id = 0;
5892
5893   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5894     SET_DECL_RTL (obj, NULL_RTX);
5895
5896   VEC_truncate (tree, decl_rtl_to_reset, 0);
5897 }
5898
5899 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5900    loop tree.  */
5901
5902 static void
5903 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5904 {
5905   free_loop_data (data);
5906   free (data->version_info);
5907   BITMAP_FREE (data->relevant);
5908   BITMAP_FREE (data->important_candidates);
5909
5910   VEC_free (tree, heap, decl_rtl_to_reset);
5911   VEC_free (iv_use_p, heap, data->iv_uses);
5912   VEC_free (iv_cand_p, heap, data->iv_candidates);
5913 }
5914
5915 /* Returns true if the loop body BODY includes any function calls.  */
5916
5917 static bool
5918 loop_body_includes_call (basic_block *body, unsigned num_nodes)
5919 {
5920   gimple_stmt_iterator gsi;
5921   unsigned i;
5922
5923   for (i = 0; i < num_nodes; i++)
5924     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
5925       {
5926         gimple stmt = gsi_stmt (gsi);
5927         if (is_gimple_call (stmt)
5928             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
5929           return true;
5930       }
5931   return false;
5932 }
5933
5934 /* Optimizes the LOOP.  Returns true if anything changed.  */
5935
5936 static bool
5937 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5938 {
5939   bool changed = false;
5940   struct iv_ca *iv_ca;
5941   edge exit;
5942   basic_block *body;
5943
5944   gcc_assert (!data->niters);
5945   data->current_loop = loop;
5946   data->speed = optimize_loop_for_speed_p (loop);
5947
5948   if (dump_file && (dump_flags & TDF_DETAILS))
5949     {
5950       fprintf (dump_file, "Processing loop %d\n", loop->num);
5951
5952       exit = single_dom_exit (loop);
5953       if (exit)
5954         {
5955           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5956                    exit->src->index, exit->dest->index);
5957           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5958           fprintf (dump_file, "\n");
5959         }
5960
5961       fprintf (dump_file, "\n");
5962     }
5963
5964   body = get_loop_body (loop);
5965   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
5966   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5967   free (body);
5968
5969   /* For each ssa name determines whether it behaves as an induction variable
5970      in some loop.  */
5971   if (!find_induction_variables (data))
5972     goto finish;
5973
5974   /* Finds interesting uses (item 1).  */
5975   find_interesting_uses (data);
5976   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5977     goto finish;
5978
5979   /* Finds candidates for the induction variables (item 2).  */
5980   find_iv_candidates (data);
5981
5982   /* Calculates the costs (item 3, part 1).  */
5983   determine_iv_costs (data);
5984   determine_use_iv_costs (data);
5985   determine_set_costs (data);
5986
5987   /* Find the optimal set of induction variables (item 3, part 2).  */
5988   iv_ca = find_optimal_iv_set (data);
5989   if (!iv_ca)
5990     goto finish;
5991   changed = true;
5992
5993   /* Create the new induction variables (item 4, part 1).  */
5994   create_new_ivs (data, iv_ca);
5995   iv_ca_free (&iv_ca);
5996
5997   /* Rewrite the uses (item 4, part 2).  */
5998   rewrite_uses (data);
5999
6000   /* Remove the ivs that are unused after rewriting.  */
6001   remove_unused_ivs (data);
6002
6003   /* We have changed the structure of induction variables; it might happen
6004      that definitions in the scev database refer to some of them that were
6005      eliminated.  */
6006   scev_reset ();
6007
6008 finish:
6009   free_loop_data (data);
6010
6011   return changed;
6012 }
6013
6014 /* Main entry point.  Optimizes induction variables in loops.  */
6015
6016 void
6017 tree_ssa_iv_optimize (void)
6018 {
6019   struct loop *loop;
6020   struct ivopts_data data;
6021   loop_iterator li;
6022
6023   tree_ssa_iv_optimize_init (&data);
6024
6025   /* Optimize the loops starting with the innermost ones.  */
6026   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6027     {
6028       if (dump_file && (dump_flags & TDF_DETAILS))
6029         flow_loop_dump (loop, dump_file, NULL, 1);
6030
6031       tree_ssa_iv_optimize_loop (&data, loop);
6032     }
6033
6034   tree_ssa_iv_optimize_finalize (&data);
6035 }