gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "pointer-set.h"
  87 #include "hashtab.h"
  88 #include "tree-chrec.h"
  89 #include "tree-scalar-evolution.h"
  90 #include "cfgloop.h"
  91 #include "params.h"
  92 #include "langhooks.h"
  93 #include "tree-affine.h"
  94 #include "target.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 /* The expected number of loop iterations.  TODO -- use profiling instead of
 100    this.  */
 101 #define AVG_LOOP_NITER(LOOP) 5
 102
 103
 104 /* Representation of the induction variable.  */
 105 struct iv
 106 {
 107   tree base;            /* Initial value of the iv.  */
 108   tree base_object;     /* A memory object to that the induction variable points.  */
 109   tree step;            /* Step of the iv (constant only).  */
 110   tree ssa_name;        /* The ssa name with the value.  */
 111   bool biv_p;           /* Is it a biv?  */
 112   bool have_use_for;    /* Do we already have a use for it?  */
 113   unsigned use_id;      /* The identifier in the use if it is the case.  */
 114 };
 115
 116 /* Per-ssa version information (induction variable descriptions, etc.).  */
 117 struct version_info
 118 {
 119   tree name;            /* The ssa name.  */
 120   struct iv *iv;        /* Induction variable description.  */
 121   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 122                            an expression that is not an induction variable.  */
 123   unsigned inv_id;      /* Id of an invariant.  */
 124   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 125 };
 126
 127 /* Types of uses.  */
 128 enum use_type
 129 {
 130   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 131   USE_ADDRESS,          /* Use in an address.  */
 132   USE_COMPARE           /* Use is a compare.  */
 133 };
 134
 135 /* Cost of a computation.  */
 136 typedef struct
 137 {
 138   int cost;             /* The runtime cost.  */
 139   unsigned complexity;  /* The estimate of the complexity of the code for
 140                            the computation (in no concrete units --
 141                            complexity field should be larger for more
 142                            complex expressions and addressing modes).  */
 143 } comp_cost;
 144
 145 static const comp_cost zero_cost = {0, 0};
 146 static const comp_cost infinite_cost = {INFTY, INFTY};
 147
 148 /* The candidate - cost pair.  */
 149 struct cost_pair
 150 {
 151   struct iv_cand *cand; /* The candidate.  */
 152   comp_cost cost;       /* The cost.  */
 153   bitmap depends_on;    /* The list of invariants that have to be
 154                            preserved.  */
 155   tree value;           /* For final value elimination, the expression for
 156                            the final value of the iv.  For iv elimination,
 157                            the new bound to compare with.  */
 158 };
 159
 160 /* Use.  */
 161 struct iv_use
 162 {
 163   unsigned id;          /* The id of the use.  */
 164   enum use_type type;   /* Type of the use.  */
 165   struct iv *iv;        /* The induction variable it is based on.  */
 166   gimple stmt;          /* Statement in that it occurs.  */
 167   tree *op_p;           /* The place where it occurs.  */
 168   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 169                            important ones.  */
 170
 171   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 172   struct cost_pair *cost_map;
 173                         /* The costs wrto the iv candidates.  */
 174
 175   struct iv_cand *selected;
 176                         /* The selected candidate.  */
 177 };
 178
 179 /* The position where the iv is computed.  */
 180 enum iv_position
 181 {
 182   IP_NORMAL,            /* At the end, just before the exit condition.  */
 183   IP_END,               /* At the end of the latch block.  */
 184   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 185   IP_AFTER_USE,         /* Immediately after a specific use.  */
 186   IP_ORIGINAL           /* The original biv.  */
 187 };
 188
 189 /* The induction variable candidate.  */
 190 struct iv_cand
 191 {
 192   unsigned id;          /* The number of the candidate.  */
 193   bool important;       /* Whether this is an "important" candidate, i.e. such
 194                            that it should be considered by all uses.  */
 195   enum iv_position pos; /* Where it is computed.  */
 196   gimple incremented_at;/* For original biv, the statement where it is
 197                            incremented.  */
 198   tree var_before;      /* The variable used for it before increment.  */
 199   tree var_after;       /* The variable used for it after increment.  */
 200   struct iv *iv;        /* The value of the candidate.  NULL for
 201                            "pseudocandidate" used to indicate the possibility
 202                            to replace the final value of an iv by direct
 203                            computation of the value.  */
 204   unsigned cost;        /* Cost of the candidate.  */
 205   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 206   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 207                               where it is incremented.  */
 208   bitmap depends_on;    /* The list of invariants that are used in step of the
 209                            biv.  */
 210 };
 211
 212 /* The data used by the induction variable optimizations.  */
 213
 214 typedef struct iv_use *iv_use_p;
 215 DEF_VEC_P(iv_use_p);
 216 DEF_VEC_ALLOC_P(iv_use_p,heap);
 217
 218 typedef struct iv_cand *iv_cand_p;
 219 DEF_VEC_P(iv_cand_p);
 220 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 221
 222 struct ivopts_data
 223 {
 224   /* The currently optimized loop.  */
 225   struct loop *current_loop;
 226
 227   /* Numbers of iterations for all exits of the current loop.  */
 228   struct pointer_map_t *niters;
 229
 230   /* Number of registers used in it.  */
 231   unsigned regs_used;
 232
 233   /* The size of version_info array allocated.  */
 234   unsigned version_info_size;
 235
 236   /* The array of information for the ssa names.  */
 237   struct version_info *version_info;
 238
 239   /* The bitmap of indices in version_info whose value was changed.  */
 240   bitmap relevant;
 241
 242   /* The uses of induction variables.  */
 243   VEC(iv_use_p,heap) *iv_uses;
 244
 245   /* The candidates.  */
 246   VEC(iv_cand_p,heap) *iv_candidates;
 247
 248   /* A bitmap of important candidates.  */
 249   bitmap important_candidates;
 250
 251   /* The maximum invariant id.  */
 252   unsigned max_inv_id;
 253
 254   /* Whether to consider just related and important candidates when replacing a
 255      use.  */
 256   bool consider_all_candidates;
 257
 258   /* Are we optimizing for speed?  */
 259   bool speed;
 260 };
 261
 262 /* An assignment of iv candidates to uses.  */
 263
 264 struct iv_ca
 265 {
 266   /* The number of uses covered by the assignment.  */
 267   unsigned upto;
 268
 269   /* Number of uses that cannot be expressed by the candidates in the set.  */
 270   unsigned bad_uses;
 271
 272   /* Candidate assigned to a use, together with the related costs.  */
 273   struct cost_pair **cand_for_use;
 274
 275   /* Number of times each candidate is used.  */
 276   unsigned *n_cand_uses;
 277
 278   /* The candidates used.  */
 279   bitmap cands;
 280
 281   /* The number of candidates in the set.  */
 282   unsigned n_cands;
 283
 284   /* Total number of registers needed.  */
 285   unsigned n_regs;
 286
 287   /* Total cost of expressing uses.  */
 288   comp_cost cand_use_cost;
 289
 290   /* Total cost of candidates.  */
 291   unsigned cand_cost;
 292
 293   /* Number of times each invariant is used.  */
 294   unsigned *n_invariant_uses;
 295
 296   /* Total cost of the assignment.  */
 297   comp_cost cost;
 298 };
 299
 300 /* Difference of two iv candidate assignments.  */
 301
 302 struct iv_ca_delta
 303 {
 304   /* Changed use.  */
 305   struct iv_use *use;
 306
 307   /* An old assignment (for rollback purposes).  */
 308   struct cost_pair *old_cp;
 309
 310   /* A new assignment.  */
 311   struct cost_pair *new_cp;
 312
 313   /* Next change in the list.  */
 314   struct iv_ca_delta *next_change;
 315 };
 316
 317 /* Bound on number of candidates below that all candidates are considered.  */
 318
 319 #define CONSIDER_ALL_CANDIDATES_BOUND \
 320   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 321
 322 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 323    optimizing such a loop would help, and it would take ages).  */
 324
 325 #define MAX_CONSIDERED_USES \
 326   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 327
 328 /* If there are at most this number of ivs in the set, try removing unnecessary
 329    ivs from the set always.  */
 330
 331 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 332   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 333
 334 /* The list of trees for that the decl_rtl field must be reset is stored
 335    here.  */
 336
 337 static VEC(tree,heap) *decl_rtl_to_reset;
 338
 339 /* Number of uses recorded in DATA.  */
 340
 341 static inline unsigned
 342 n_iv_uses (struct ivopts_data *data)
 343 {
 344   return VEC_length (iv_use_p, data->iv_uses);
 345 }
 346
 347 /* Ith use recorded in DATA.  */
 348
 349 static inline struct iv_use *
 350 iv_use (struct ivopts_data *data, unsigned i)
 351 {
 352   return VEC_index (iv_use_p, data->iv_uses, i);
 353 }
 354
 355 /* Number of candidates recorded in DATA.  */
 356
 357 static inline unsigned
 358 n_iv_cands (struct ivopts_data *data)
 359 {
 360   return VEC_length (iv_cand_p, data->iv_candidates);
 361 }
 362
 363 /* Ith candidate recorded in DATA.  */
 364
 365 static inline struct iv_cand *
 366 iv_cand (struct ivopts_data *data, unsigned i)
 367 {
 368   return VEC_index (iv_cand_p, data->iv_candidates, i);
 369 }
 370
 371 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 372
 373 edge
 374 single_dom_exit (struct loop *loop)
 375 {
 376   edge exit = single_exit (loop);
 377
 378   if (!exit)
 379     return NULL;
 380
 381   if (!just_once_each_iteration_p (loop, exit->src))
 382     return NULL;
 383
 384   return exit;
 385 }
 386
 387 /* Dumps information about the induction variable IV to FILE.  */
 388
 389 extern void dump_iv (FILE *, struct iv *);
 390 void
 391 dump_iv (FILE *file, struct iv *iv)
 392 {
 393   if (iv->ssa_name)
 394     {
 395       fprintf (file, "ssa name ");
 396       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 397       fprintf (file, "\n");
 398     }
 399
 400   fprintf (file, "  type ");
 401   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 402   fprintf (file, "\n");
 403
 404   if (iv->step)
 405     {
 406       fprintf (file, "  base ");
 407       print_generic_expr (file, iv->base, TDF_SLIM);
 408       fprintf (file, "\n");
 409
 410       fprintf (file, "  step ");
 411       print_generic_expr (file, iv->step, TDF_SLIM);
 412       fprintf (file, "\n");
 413     }
 414   else
 415     {
 416       fprintf (file, "  invariant ");
 417       print_generic_expr (file, iv->base, TDF_SLIM);
 418       fprintf (file, "\n");
 419     }
 420
 421   if (iv->base_object)
 422     {
 423       fprintf (file, "  base object ");
 424       print_generic_expr (file, iv->base_object, TDF_SLIM);
 425       fprintf (file, "\n");
 426     }
 427
 428   if (iv->biv_p)
 429     fprintf (file, "  is a biv\n");
 430 }
 431
 432 /* Dumps information about the USE to FILE.  */
 433
 434 extern void dump_use (FILE *, struct iv_use *);
 435 void
 436 dump_use (FILE *file, struct iv_use *use)
 437 {
 438   fprintf (file, "use %d\n", use->id);
 439
 440   switch (use->type)
 441     {
 442     case USE_NONLINEAR_EXPR:
 443       fprintf (file, "  generic\n");
 444       break;
 445
 446     case USE_ADDRESS:
 447       fprintf (file, "  address\n");
 448       break;
 449
 450     case USE_COMPARE:
 451       fprintf (file, "  compare\n");
 452       break;
 453
 454     default:
 455       gcc_unreachable ();
 456     }
 457
 458   fprintf (file, "  in statement ");
 459   print_gimple_stmt (file, use->stmt, 0, 0);
 460   fprintf (file, "\n");
 461
 462   fprintf (file, "  at position ");
 463   if (use->op_p)
 464     print_generic_expr (file, *use->op_p, TDF_SLIM);
 465   fprintf (file, "\n");
 466
 467   dump_iv (file, use->iv);
 468
 469   if (use->related_cands)
 470     {
 471       fprintf (file, "  related candidates ");
 472       dump_bitmap (file, use->related_cands);
 473     }
 474 }
 475
 476 /* Dumps information about the uses to FILE.  */
 477
 478 extern void dump_uses (FILE *, struct ivopts_data *);
 479 void
 480 dump_uses (FILE *file, struct ivopts_data *data)
 481 {
 482   unsigned i;
 483   struct iv_use *use;
 484
 485   for (i = 0; i < n_iv_uses (data); i++)
 486     {
 487       use = iv_use (data, i);
 488
 489       dump_use (file, use);
 490       fprintf (file, "\n");
 491     }
 492 }
 493
 494 /* Dumps information about induction variable candidate CAND to FILE.  */
 495
 496 extern void dump_cand (FILE *, struct iv_cand *);
 497 void
 498 dump_cand (FILE *file, struct iv_cand *cand)
 499 {
 500   struct iv *iv = cand->iv;
 501
 502   fprintf (file, "candidate %d%s\n",
 503            cand->id, cand->important ? " (important)" : "");
 504
 505   if (cand->depends_on)
 506     {
 507       fprintf (file, "  depends on ");
 508       dump_bitmap (file, cand->depends_on);
 509     }
 510
 511   if (!iv)
 512     {
 513       fprintf (file, "  final value replacement\n");
 514       return;
 515     }
 516
 517   switch (cand->pos)
 518     {
 519     case IP_NORMAL:
 520       fprintf (file, "  incremented before exit test\n");
 521       break;
 522
 523     case IP_BEFORE_USE:
 524       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 525       break;
 526
 527     case IP_AFTER_USE:
 528       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 529       break;
 530
 531     case IP_END:
 532       fprintf (file, "  incremented at end\n");
 533       break;
 534
 535     case IP_ORIGINAL:
 536       fprintf (file, "  original biv\n");
 537       break;
 538     }
 539
 540   dump_iv (file, iv);
 541 }
 542
 543 /* Returns the info for ssa version VER.  */
 544
 545 static inline struct version_info *
 546 ver_info (struct ivopts_data *data, unsigned ver)
 547 {
 548   return data->version_info + ver;
 549 }
 550
 551 /* Returns the info for ssa name NAME.  */
 552
 553 static inline struct version_info *
 554 name_info (struct ivopts_data *data, tree name)
 555 {
 556   return ver_info (data, SSA_NAME_VERSION (name));
 557 }
 558
 559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 560    emitted in LOOP.  */
 561
 562 static bool
 563 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 564 {
 565   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 566
 567   gcc_assert (bb);
 568
 569   if (sbb == loop->latch)
 570     return true;
 571
 572   if (sbb != bb)
 573     return false;
 574
 575   return stmt == last_stmt (bb);
 576 }
 577
 578 /* Returns true if STMT if after the place where the original induction
 579    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 580    if the positions are identical.  */
 581
 582 static bool
 583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 584 {
 585   basic_block cand_bb = gimple_bb (cand->incremented_at);
 586   basic_block stmt_bb = gimple_bb (stmt);
 587
 588   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 589     return false;
 590
 591   if (stmt_bb != cand_bb)
 592     return true;
 593
 594   if (true_if_equal
 595       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 596     return true;
 597   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 598 }
 599
 600 /* Returns true if STMT if after the place where the induction variable
 601    CAND is incremented in LOOP.  */
 602
 603 static bool
 604 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 605 {
 606   switch (cand->pos)
 607     {
 608     case IP_END:
 609       return false;
 610
 611     case IP_NORMAL:
 612       return stmt_after_ip_normal_pos (loop, stmt);
 613
 614     case IP_ORIGINAL:
 615     case IP_AFTER_USE:
 616       return stmt_after_inc_pos (cand, stmt, false);
 617
 618     case IP_BEFORE_USE:
 619       return stmt_after_inc_pos (cand, stmt, true);
 620
 621     default:
 622       gcc_unreachable ();
 623     }
 624 }
 625
 626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 627
 628 static bool
 629 abnormal_ssa_name_p (tree exp)
 630 {
 631   if (!exp)
 632     return false;
 633
 634   if (TREE_CODE (exp) != SSA_NAME)
 635     return false;
 636
 637   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 638 }
 639
 640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 641    abnormal phi node.  Callback for for_each_index.  */
 642
 643 static bool
 644 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 645                                   void *data ATTRIBUTE_UNUSED)
 646 {
 647   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 648     {
 649       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 650         return false;
 651       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 652         return false;
 653     }
 654
 655   return !abnormal_ssa_name_p (*index);
 656 }
 657
 658 /* Returns true if EXPR contains a ssa name that occurs in an
 659    abnormal phi node.  */
 660
 661 bool
 662 contains_abnormal_ssa_name_p (tree expr)
 663 {
 664   enum tree_code code;
 665   enum tree_code_class codeclass;
 666
 667   if (!expr)
 668     return false;
 669
 670   code = TREE_CODE (expr);
 671   codeclass = TREE_CODE_CLASS (code);
 672
 673   if (code == SSA_NAME)
 674     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 675
 676   if (code == INTEGER_CST
 677       || is_gimple_min_invariant (expr))
 678     return false;
 679
 680   if (code == ADDR_EXPR)
 681     return !for_each_index (&TREE_OPERAND (expr, 0),
 682                             idx_contains_abnormal_ssa_name_p,
 683                             NULL);
 684
 685   switch (codeclass)
 686     {
 687     case tcc_binary:
 688     case tcc_comparison:
 689       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 690         return true;
 691
 692       /* Fallthru.  */
 693     case tcc_unary:
 694       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 695         return true;
 696
 697       break;
 698
 699     default:
 700       gcc_unreachable ();
 701     }
 702
 703   return false;
 704 }
 705
 706 /*  Returns tree describing number of iterations determined from
 707     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 708
 709 static tree
 710 niter_for_exit (struct ivopts_data *data, edge exit)
 711 {
 712   struct tree_niter_desc desc;
 713   tree niter;
 714   void **slot;
 715
 716   if (!data->niters)
 717     {
 718       data->niters = pointer_map_create ();
 719       slot = NULL;
 720     }
 721   else
 722     slot = pointer_map_contains (data->niters, exit);
 723
 724   if (!slot)
 725     {
 726       /* Try to determine number of iterations.  We must know it
 727          unconditionally (i.e., without possibility of # of iterations
 728          being zero).  Also, we cannot safely work with ssa names that
 729          appear in phi nodes on abnormal edges, so that we do not create
 730          overlapping life ranges for them (PR 27283).  */
 731       if (number_of_iterations_exit (data->current_loop,
 732                                      exit, &desc, true)
 733           && integer_zerop (desc.may_be_zero)
 734           && !contains_abnormal_ssa_name_p (desc.niter))
 735         niter = desc.niter;
 736       else
 737         niter = NULL_TREE;
 738
 739       *pointer_map_insert (data->niters, exit) = niter;
 740     }
 741   else
 742     niter = (tree) *slot;
 743
 744   return niter;
 745 }
 746
 747 /* Returns tree describing number of iterations determined from
 748    single dominating exit of DATA->current_loop, or NULL if something
 749    goes wrong.  */
 750
 751 static tree
 752 niter_for_single_dom_exit (struct ivopts_data *data)
 753 {
 754   edge exit = single_dom_exit (data->current_loop);
 755
 756   if (!exit)
 757     return NULL;
 758
 759   return niter_for_exit (data, exit);
 760 }
 761
 762 /* Initializes data structures used by the iv optimization pass, stored
 763    in DATA.  */
 764
 765 static void
 766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 767 {
 768   data->version_info_size = 2 * num_ssa_names;
 769   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 770   data->relevant = BITMAP_ALLOC (NULL);
 771   data->important_candidates = BITMAP_ALLOC (NULL);
 772   data->max_inv_id = 0;
 773   data->niters = NULL;
 774   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 775   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 776   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 777 }
 778
 779 /* Returns a memory object to that EXPR points.  In case we are able to
 780    determine that it does not point to any such object, NULL is returned.  */
 781
 782 static tree
 783 determine_base_object (tree expr)
 784 {
 785   enum tree_code code = TREE_CODE (expr);
 786   tree base, obj;
 787
 788   /* If this is a pointer casted to any type, we need to determine
 789      the base object for the pointer; so handle conversions before
 790      throwing away non-pointer expressions.  */
 791   if (CONVERT_EXPR_P (expr))
 792     return determine_base_object (TREE_OPERAND (expr, 0));
 793
 794   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 795     return NULL_TREE;
 796
 797   switch (code)
 798     {
 799     case INTEGER_CST:
 800       return NULL_TREE;
 801
 802     case ADDR_EXPR:
 803       obj = TREE_OPERAND (expr, 0);
 804       base = get_base_address (obj);
 805
 806       if (!base)
 807         return expr;
 808
 809       if (TREE_CODE (base) == INDIRECT_REF)
 810         return determine_base_object (TREE_OPERAND (base, 0));
 811
 812       return fold_convert (ptr_type_node,
 813                            build_fold_addr_expr (base));
 814
 815     case POINTER_PLUS_EXPR:
 816       return determine_base_object (TREE_OPERAND (expr, 0));
 817
 818     case PLUS_EXPR:
 819     case MINUS_EXPR:
 820       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 821       gcc_unreachable ();
 822
 823     default:
 824       return fold_convert (ptr_type_node, expr);
 825     }
 826 }
 827
 828 /* Allocates an induction variable with given initial value BASE and step STEP
 829    for loop LOOP.  */
 830
 831 static struct iv *
 832 alloc_iv (tree base, tree step)
 833 {
 834   struct iv *iv = XCNEW (struct iv);
 835   gcc_assert (step != NULL_TREE);
 836
 837   iv->base = base;
 838   iv->base_object = determine_base_object (base);
 839   iv->step = step;
 840   iv->biv_p = false;
 841   iv->have_use_for = false;
 842   iv->use_id = 0;
 843   iv->ssa_name = NULL_TREE;
 844
 845   return iv;
 846 }
 847
 848 /* Sets STEP and BASE for induction variable IV.  */
 849
 850 static void
 851 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 852 {
 853   struct version_info *info = name_info (data, iv);
 854
 855   gcc_assert (!info->iv);
 856
 857   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 858   info->iv = alloc_iv (base, step);
 859   info->iv->ssa_name = iv;
 860 }
 861
 862 /* Finds induction variable declaration for VAR.  */
 863
 864 static struct iv *
 865 get_iv (struct ivopts_data *data, tree var)
 866 {
 867   basic_block bb;
 868   tree type = TREE_TYPE (var);
 869
 870   if (!POINTER_TYPE_P (type)
 871       && !INTEGRAL_TYPE_P (type))
 872     return NULL;
 873
 874   if (!name_info (data, var)->iv)
 875     {
 876       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 877
 878       if (!bb
 879           || !flow_bb_inside_loop_p (data->current_loop, bb))
 880         set_iv (data, var, var, build_int_cst (type, 0));
 881     }
 882
 883   return name_info (data, var)->iv;
 884 }
 885
 886 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 887    not define a simple affine biv with nonzero step.  */
 888
 889 static tree
 890 determine_biv_step (gimple phi)
 891 {
 892   struct loop *loop = gimple_bb (phi)->loop_father;
 893   tree name = PHI_RESULT (phi);
 894   affine_iv iv;
 895
 896   if (!is_gimple_reg (name))
 897     return NULL_TREE;
 898
 899   if (!simple_iv (loop, loop, name, &iv, true))
 900     return NULL_TREE;
 901
 902   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 903 }
 904
 905 /* Finds basic ivs.  */
 906
 907 static bool
 908 find_bivs (struct ivopts_data *data)
 909 {
 910   gimple phi;
 911   tree step, type, base;
 912   bool found = false;
 913   struct loop *loop = data->current_loop;
 914   gimple_stmt_iterator psi;
 915
 916   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 917     {
 918       phi = gsi_stmt (psi);
 919
 920       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 921         continue;
 922
 923       step = determine_biv_step (phi);
 924       if (!step)
 925         continue;
 926
 927       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 928       base = expand_simple_operations (base);
 929       if (contains_abnormal_ssa_name_p (base)
 930           || contains_abnormal_ssa_name_p (step))
 931         continue;
 932
 933       type = TREE_TYPE (PHI_RESULT (phi));
 934       base = fold_convert (type, base);
 935       if (step)
 936         {
 937           if (POINTER_TYPE_P (type))
 938             step = fold_convert (sizetype, step);
 939           else
 940             step = fold_convert (type, step);
 941         }
 942
 943       set_iv (data, PHI_RESULT (phi), base, step);
 944       found = true;
 945     }
 946
 947   return found;
 948 }
 949
 950 /* Marks basic ivs.  */
 951
 952 static void
 953 mark_bivs (struct ivopts_data *data)
 954 {
 955   gimple phi;
 956   tree var;
 957   struct iv *iv, *incr_iv;
 958   struct loop *loop = data->current_loop;
 959   basic_block incr_bb;
 960   gimple_stmt_iterator psi;
 961
 962   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 963     {
 964       phi = gsi_stmt (psi);
 965
 966       iv = get_iv (data, PHI_RESULT (phi));
 967       if (!iv)
 968         continue;
 969
 970       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 971       incr_iv = get_iv (data, var);
 972       if (!incr_iv)
 973         continue;
 974
 975       /* If the increment is in the subloop, ignore it.  */
 976       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 977       if (incr_bb->loop_father != data->current_loop
 978           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 979         continue;
 980
 981       iv->biv_p = true;
 982       incr_iv->biv_p = true;
 983     }
 984 }
 985
 986 /* Checks whether STMT defines a linear induction variable and stores its
 987    parameters to IV.  */
 988
 989 static bool
 990 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
 991 {
 992   tree lhs;
 993   struct loop *loop = data->current_loop;
 994
 995   iv->base = NULL_TREE;
 996   iv->step = NULL_TREE;
 997
 998   if (gimple_code (stmt) != GIMPLE_ASSIGN)
 999     return false;
1000
1001   lhs = gimple_assign_lhs (stmt);
1002   if (TREE_CODE (lhs) != SSA_NAME)
1003     return false;
1004
1005   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1006     return false;
1007   iv->base = expand_simple_operations (iv->base);
1008
1009   if (contains_abnormal_ssa_name_p (iv->base)
1010       || contains_abnormal_ssa_name_p (iv->step))
1011     return false;
1012
1013   return true;
1014 }
1015
1016 /* Finds general ivs in statement STMT.  */
1017
1018 static void
1019 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1020 {
1021   affine_iv iv;
1022
1023   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1024     return;
1025
1026   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1027 }
1028
1029 /* Finds general ivs in basic block BB.  */
1030
1031 static void
1032 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1033 {
1034   gimple_stmt_iterator bsi;
1035
1036   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037     find_givs_in_stmt (data, gsi_stmt (bsi));
1038 }
1039
1040 /* Finds general ivs.  */
1041
1042 static void
1043 find_givs (struct ivopts_data *data)
1044 {
1045   struct loop *loop = data->current_loop;
1046   basic_block *body = get_loop_body_in_dom_order (loop);
1047   unsigned i;
1048
1049   for (i = 0; i < loop->num_nodes; i++)
1050     find_givs_in_bb (data, body[i]);
1051   free (body);
1052 }
1053
1054 /* For each ssa name defined in LOOP determines whether it is an induction
1055    variable and if so, its initial value and step.  */
1056
1057 static bool
1058 find_induction_variables (struct ivopts_data *data)
1059 {
1060   unsigned i;
1061   bitmap_iterator bi;
1062
1063   if (!find_bivs (data))
1064     return false;
1065
1066   find_givs (data);
1067   mark_bivs (data);
1068
1069   if (dump_file && (dump_flags & TDF_DETAILS))
1070     {
1071       tree niter = niter_for_single_dom_exit (data);
1072
1073       if (niter)
1074         {
1075           fprintf (dump_file, "  number of iterations ");
1076           print_generic_expr (dump_file, niter, TDF_SLIM);
1077           fprintf (dump_file, "\n\n");
1078         };
1079
1080       fprintf (dump_file, "Induction variables:\n\n");
1081
1082       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1083         {
1084           if (ver_info (data, i)->iv)
1085             dump_iv (dump_file, ver_info (data, i)->iv);
1086         }
1087     }
1088
1089   return true;
1090 }
1091
1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1093
1094 static struct iv_use *
1095 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1096             gimple stmt, enum use_type use_type)
1097 {
1098   struct iv_use *use = XCNEW (struct iv_use);
1099
1100   use->id = n_iv_uses (data);
1101   use->type = use_type;
1102   use->iv = iv;
1103   use->stmt = stmt;
1104   use->op_p = use_p;
1105   use->related_cands = BITMAP_ALLOC (NULL);
1106
1107   /* To avoid showing ssa name in the dumps, if it was not reset by the
1108      caller.  */
1109   iv->ssa_name = NULL_TREE;
1110
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     dump_use (dump_file, use);
1113
1114   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1115
1116   return use;
1117 }
1118
1119 /* Checks whether OP is a loop-level invariant and if so, records it.
1120    NONLINEAR_USE is true if the invariant is used in a way we do not
1121    handle specially.  */
1122
1123 static void
1124 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1125 {
1126   basic_block bb;
1127   struct version_info *info;
1128
1129   if (TREE_CODE (op) != SSA_NAME
1130       || !is_gimple_reg (op))
1131     return;
1132
1133   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1134   if (bb
1135       && flow_bb_inside_loop_p (data->current_loop, bb))
1136     return;
1137
1138   info = name_info (data, op);
1139   info->name = op;
1140   info->has_nonlin_use |= nonlinear_use;
1141   if (!info->inv_id)
1142     info->inv_id = ++data->max_inv_id;
1143   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1144 }
1145
1146 /* Checks whether the use OP is interesting and if so, records it.  */
1147
1148 static struct iv_use *
1149 find_interesting_uses_op (struct ivopts_data *data, tree op)
1150 {
1151   struct iv *iv;
1152   struct iv *civ;
1153   gimple stmt;
1154   struct iv_use *use;
1155
1156   if (TREE_CODE (op) != SSA_NAME)
1157     return NULL;
1158
1159   iv = get_iv (data, op);
1160   if (!iv)
1161     return NULL;
1162
1163   if (iv->have_use_for)
1164     {
1165       use = iv_use (data, iv->use_id);
1166
1167       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1168       return use;
1169     }
1170
1171   if (integer_zerop (iv->step))
1172     {
1173       record_invariant (data, op, true);
1174       return NULL;
1175     }
1176   iv->have_use_for = true;
1177
1178   civ = XNEW (struct iv);
1179   *civ = *iv;
1180
1181   stmt = SSA_NAME_DEF_STMT (op);
1182   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183               || is_gimple_assign (stmt));
1184
1185   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1186   iv->use_id = use->id;
1187
1188   return use;
1189 }
1190
1191 /* Given a condition in statement STMT, checks whether it is a compare
1192    of an induction variable and an invariant.  If this is the case,
1193    CONTROL_VAR is set to location of the iv, BOUND to the location of
1194    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195    induction variable descriptions, and true is returned.  If this is not
1196    the case, CONTROL_VAR and BOUND are set to the arguments of the
1197    condition and false is returned.  */
1198
1199 static bool
1200 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1201                        tree **control_var, tree **bound,
1202                        struct iv **iv_var, struct iv **iv_bound)
1203 {
1204   /* The objects returned when COND has constant operands.  */
1205   static struct iv const_iv;
1206   static tree zero;
1207   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1209   bool ret = false;
1210
1211   if (gimple_code (stmt) == GIMPLE_COND)
1212     {
1213       op0 = gimple_cond_lhs_ptr (stmt);
1214       op1 = gimple_cond_rhs_ptr (stmt);
1215     }
1216   else
1217     {
1218       op0 = gimple_assign_rhs1_ptr (stmt);
1219       op1 = gimple_assign_rhs2_ptr (stmt);
1220     }
1221
1222   zero = integer_zero_node;
1223   const_iv.step = integer_zero_node;
1224
1225   if (TREE_CODE (*op0) == SSA_NAME)
1226     iv0 = get_iv (data, *op0);
1227   if (TREE_CODE (*op1) == SSA_NAME)
1228     iv1 = get_iv (data, *op1);
1229
1230   /* Exactly one of the compared values must be an iv, and the other one must
1231      be an invariant.  */
1232   if (!iv0 || !iv1)
1233     goto end;
1234
1235   if (integer_zerop (iv0->step))
1236     {
1237       /* Control variable may be on the other side.  */
1238       tmp_op = op0; op0 = op1; op1 = tmp_op;
1239       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1240     }
1241   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1242
1243 end:
1244   if (control_var)
1245     *control_var = op0;;
1246   if (iv_var)
1247     *iv_var = iv0;;
1248   if (bound)
1249     *bound = op1;
1250   if (iv_bound)
1251     *iv_bound = iv1;
1252
1253   return ret;
1254 }
1255
1256 /* Checks whether the condition in STMT is interesting and if so,
1257    records it.  */
1258
1259 static void
1260 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1261 {
1262   tree *var_p, *bound_p;
1263   struct iv *var_iv, *civ;
1264
1265   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1266     {
1267       find_interesting_uses_op (data, *var_p);
1268       find_interesting_uses_op (data, *bound_p);
1269       return;
1270     }
1271
1272   civ = XNEW (struct iv);
1273   *civ = *var_iv;
1274   record_use (data, NULL, civ, stmt, USE_COMPARE);
1275 }
1276
1277 /* Returns true if expression EXPR is obviously invariant in LOOP,
1278    i.e. if all its operands are defined outside of the LOOP.  LOOP
1279    should not be the function body.  */
1280
1281 bool
1282 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1283 {
1284   basic_block def_bb;
1285   unsigned i, len;
1286
1287   gcc_assert (loop_depth (loop) > 0);
1288
1289   if (is_gimple_min_invariant (expr))
1290     return true;
1291
1292   if (TREE_CODE (expr) == SSA_NAME)
1293     {
1294       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1295       if (def_bb
1296           && flow_bb_inside_loop_p (loop, def_bb))
1297         return false;
1298
1299       return true;
1300     }
1301
1302   if (!EXPR_P (expr))
1303     return false;
1304
1305   len = TREE_OPERAND_LENGTH (expr);
1306   for (i = 0; i < len; i++)
1307     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1308       return false;
1309
1310   return true;
1311 }
1312
1313 /* Returns true if statement STMT is obviously invariant in LOOP,
1314    i.e. if all its operands on the RHS are defined outside of the LOOP.
1315    LOOP should not be the function body.  */
1316
1317 bool
1318 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1319 {
1320   unsigned i;
1321   tree lhs;
1322
1323   gcc_assert (loop_depth (loop) > 0);
1324
1325   lhs = gimple_get_lhs (stmt);
1326   for (i = 0; i < gimple_num_ops (stmt); i++)
1327     {
1328       tree op = gimple_op (stmt, i);
1329       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1330         return false;
1331     }
1332
1333   return true;
1334 }
1335
1336 /* Cumulates the steps of indices into DATA and replaces their values with the
1337    initial ones.  Returns false when the value of the index cannot be determined.
1338    Callback for for_each_index.  */
1339
1340 struct ifs_ivopts_data
1341 {
1342   struct ivopts_data *ivopts_data;
1343   gimple stmt;
1344   tree step;
1345 };
1346
1347 static bool
1348 idx_find_step (tree base, tree *idx, void *data)
1349 {
1350   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1351   struct iv *iv;
1352   tree step, iv_base, iv_step, lbound, off;
1353   struct loop *loop = dta->ivopts_data->current_loop;
1354
1355   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1357     return false;
1358
1359   /* If base is a component ref, require that the offset of the reference
1360      be invariant.  */
1361   if (TREE_CODE (base) == COMPONENT_REF)
1362     {
1363       off = component_ref_field_offset (base);
1364       return expr_invariant_in_loop_p (loop, off);
1365     }
1366
1367   /* If base is array, first check whether we will be able to move the
1368      reference out of the loop (in order to take its address in strength
1369      reduction).  In order for this to work we need both lower bound
1370      and step to be loop invariants.  */
1371   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1372     {
1373       /* Moreover, for a range, the size needs to be invariant as well.  */
1374       if (TREE_CODE (base) == ARRAY_RANGE_REF
1375           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1376         return false;
1377
1378       step = array_ref_element_size (base);
1379       lbound = array_ref_low_bound (base);
1380
1381       if (!expr_invariant_in_loop_p (loop, step)
1382           || !expr_invariant_in_loop_p (loop, lbound))
1383         return false;
1384     }
1385
1386   if (TREE_CODE (*idx) != SSA_NAME)
1387     return true;
1388
1389   iv = get_iv (dta->ivopts_data, *idx);
1390   if (!iv)
1391     return false;
1392
1393   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1394           *&x[0], which is not folded and does not trigger the
1395           ARRAY_REF path below.  */
1396   *idx = iv->base;
1397
1398   if (integer_zerop (iv->step))
1399     return true;
1400
1401   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1402     {
1403       step = array_ref_element_size (base);
1404
1405       /* We only handle addresses whose step is an integer constant.  */
1406       if (TREE_CODE (step) != INTEGER_CST)
1407         return false;
1408     }
1409   else
1410     /* The step for pointer arithmetics already is 1 byte.  */
1411     step = build_int_cst (sizetype, 1);
1412
1413   iv_base = iv->base;
1414   iv_step = iv->step;
1415   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416                             sizetype, &iv_base, &iv_step, dta->stmt,
1417                             false))
1418     {
1419       /* The index might wrap.  */
1420       return false;
1421     }
1422
1423   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1424   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1425
1426   return true;
1427 }
1428
1429 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1430    object is passed to it in DATA.  */
1431
1432 static bool
1433 idx_record_use (tree base, tree *idx,
1434                 void *vdata)
1435 {
1436   struct ivopts_data *data = (struct ivopts_data *) vdata;
1437   find_interesting_uses_op (data, *idx);
1438   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1439     {
1440       find_interesting_uses_op (data, array_ref_element_size (base));
1441       find_interesting_uses_op (data, array_ref_low_bound (base));
1442     }
1443   return true;
1444 }
1445
1446 /* If we can prove that TOP = cst * BOT for some constant cst,
1447    store cst to MUL and return true.  Otherwise return false.
1448    The returned value is always sign-extended, regardless of the
1449    signedness of TOP and BOT.  */
1450
1451 static bool
1452 constant_multiple_of (tree top, tree bot, double_int *mul)
1453 {
1454   tree mby;
1455   enum tree_code code;
1456   double_int res, p0, p1;
1457   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1458
1459   STRIP_NOPS (top);
1460   STRIP_NOPS (bot);
1461
1462   if (operand_equal_p (top, bot, 0))
1463     {
1464       *mul = double_int_one;
1465       return true;
1466     }
1467
1468   code = TREE_CODE (top);
1469   switch (code)
1470     {
1471     case MULT_EXPR:
1472       mby = TREE_OPERAND (top, 1);
1473       if (TREE_CODE (mby) != INTEGER_CST)
1474         return false;
1475
1476       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1477         return false;
1478
1479       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1480                               precision);
1481       return true;
1482
1483     case PLUS_EXPR:
1484     case MINUS_EXPR:
1485       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1487         return false;
1488
1489       if (code == MINUS_EXPR)
1490         p1 = double_int_neg (p1);
1491       *mul = double_int_sext (double_int_add (p0, p1), precision);
1492       return true;
1493
1494     case INTEGER_CST:
1495       if (TREE_CODE (bot) != INTEGER_CST)
1496         return false;
1497
1498       p0 = double_int_sext (tree_to_double_int (top), precision);
1499       p1 = double_int_sext (tree_to_double_int (bot), precision);
1500       if (double_int_zero_p (p1))
1501         return false;
1502       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503                               precision);
1504       return double_int_zero_p (res);
1505
1506     default:
1507       return false;
1508     }
1509 }
1510
1511 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1512
1513 static bool
1514 may_be_unaligned_p (tree ref, tree step)
1515 {
1516   tree base;
1517   tree base_type;
1518   HOST_WIDE_INT bitsize;
1519   HOST_WIDE_INT bitpos;
1520   tree toffset;
1521   enum machine_mode mode;
1522   int unsignedp, volatilep;
1523   unsigned base_align;
1524
1525   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526      thus they are not misaligned.  */
1527   if (TREE_CODE (ref) == TARGET_MEM_REF)
1528     return false;
1529
1530   /* The test below is basically copy of what expr.c:normal_inner_ref
1531      does to check whether the object must be loaded by parts when
1532      STRICT_ALIGNMENT is true.  */
1533   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534                               &unsignedp, &volatilep, true);
1535   base_type = TREE_TYPE (base);
1536   base_align = TYPE_ALIGN (base_type);
1537
1538   if (mode != BLKmode)
1539     {
1540       double_int mul;
1541       tree al = build_int_cst (TREE_TYPE (step),
1542                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1543
1544       if (base_align < GET_MODE_ALIGNMENT (mode)
1545           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1546           || bitpos % BITS_PER_UNIT != 0)
1547         return true;
1548
1549       if (!constant_multiple_of (step, al, &mul))
1550         return true;
1551     }
1552
1553   return false;
1554 }
1555
1556 /* Return true if EXPR may be non-addressable.   */
1557
1558 static bool
1559 may_be_nonaddressable_p (tree expr)
1560 {
1561   switch (TREE_CODE (expr))
1562     {
1563     case TARGET_MEM_REF:
1564       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1565          target, thus they are always addressable.  */
1566       return false;
1567
1568     case COMPONENT_REF:
1569       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1570              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1571
1572     case VIEW_CONVERT_EXPR:
1573       /* This kind of view-conversions may wrap non-addressable objects
1574          and make them look addressable.  After some processing the
1575          non-addressability may be uncovered again, causing ADDR_EXPRs
1576          of inappropriate objects to be built.  */
1577       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1578           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1579         return true;
1580
1581       /* ... fall through ... */
1582
1583     case ARRAY_REF:
1584     case ARRAY_RANGE_REF:
1585       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1586
1587     CASE_CONVERT:
1588       return true;
1589
1590     default:
1591       break;
1592     }
1593
1594   return false;
1595 }
1596
1597 /* Finds addresses in *OP_P inside STMT.  */
1598
1599 static void
1600 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1601 {
1602   tree base = *op_p, step = build_int_cst (sizetype, 0);
1603   struct iv *civ;
1604   struct ifs_ivopts_data ifs_ivopts_data;
1605
1606   /* Do not play with volatile memory references.  A bit too conservative,
1607      perhaps, but safe.  */
1608   if (gimple_has_volatile_ops (stmt))
1609     goto fail;
1610
1611   /* Ignore bitfields for now.  Not really something terribly complicated
1612      to handle.  TODO.  */
1613   if (TREE_CODE (base) == BIT_FIELD_REF)
1614     goto fail;
1615
1616   base = unshare_expr (base);
1617
1618   if (TREE_CODE (base) == TARGET_MEM_REF)
1619     {
1620       tree type = build_pointer_type (TREE_TYPE (base));
1621       tree astep;
1622
1623       if (TMR_BASE (base)
1624           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1625         {
1626           civ = get_iv (data, TMR_BASE (base));
1627           if (!civ)
1628             goto fail;
1629
1630           TMR_BASE (base) = civ->base;
1631           step = civ->step;
1632         }
1633       if (TMR_INDEX (base)
1634           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1635         {
1636           civ = get_iv (data, TMR_INDEX (base));
1637           if (!civ)
1638             goto fail;
1639
1640           TMR_INDEX (base) = civ->base;
1641           astep = civ->step;
1642
1643           if (astep)
1644             {
1645               if (TMR_STEP (base))
1646                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1647
1648               step = fold_build2 (PLUS_EXPR, type, step, astep);
1649             }
1650         }
1651
1652       if (integer_zerop (step))
1653         goto fail;
1654       base = tree_mem_ref_addr (type, base);
1655     }
1656   else
1657     {
1658       ifs_ivopts_data.ivopts_data = data;
1659       ifs_ivopts_data.stmt = stmt;
1660       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1661       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1662           || integer_zerop (ifs_ivopts_data.step))
1663         goto fail;
1664       step = ifs_ivopts_data.step;
1665
1666       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1667       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1668
1669       /* Check that the base expression is addressable.  This needs
1670          to be done after substituting bases of IVs into it.  */
1671       if (may_be_nonaddressable_p (base))
1672         goto fail;
1673
1674       /* Moreover, on strict alignment platforms, check that it is
1675          sufficiently aligned.  */
1676       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1677         goto fail;
1678
1679       base = build_fold_addr_expr (base);
1680
1681       /* Substituting bases of IVs into the base expression might
1682          have caused folding opportunities.  */
1683       if (TREE_CODE (base) == ADDR_EXPR)
1684         {
1685           tree *ref = &TREE_OPERAND (base, 0);
1686           while (handled_component_p (*ref))
1687             ref = &TREE_OPERAND (*ref, 0);
1688           if (TREE_CODE (*ref) == INDIRECT_REF)
1689             *ref = fold_indirect_ref (*ref);
1690         }
1691     }
1692
1693   civ = alloc_iv (base, step);
1694   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1695   return;
1696
1697 fail:
1698   for_each_index (op_p, idx_record_use, data);
1699 }
1700
1701 /* Finds and records invariants used in STMT.  */
1702
1703 static void
1704 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1705 {
1706   ssa_op_iter iter;
1707   use_operand_p use_p;
1708   tree op;
1709
1710   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1711     {
1712       op = USE_FROM_PTR (use_p);
1713       record_invariant (data, op, false);
1714     }
1715 }
1716
1717 /* Finds interesting uses of induction variables in the statement STMT.  */
1718
1719 static void
1720 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1721 {
1722   struct iv *iv;
1723   tree op, *lhs, *rhs;
1724   ssa_op_iter iter;
1725   use_operand_p use_p;
1726   enum tree_code code;
1727
1728   find_invariants_stmt (data, stmt);
1729
1730   if (gimple_code (stmt) == GIMPLE_COND)
1731     {
1732       find_interesting_uses_cond (data, stmt);
1733       return;
1734     }
1735
1736   if (is_gimple_assign (stmt))
1737     {
1738       lhs = gimple_assign_lhs_ptr (stmt);
1739       rhs = gimple_assign_rhs1_ptr (stmt);
1740
1741       if (TREE_CODE (*lhs) == SSA_NAME)
1742         {
1743           /* If the statement defines an induction variable, the uses are not
1744              interesting by themselves.  */
1745
1746           iv = get_iv (data, *lhs);
1747
1748           if (iv && !integer_zerop (iv->step))
1749             return;
1750         }
1751
1752       code = gimple_assign_rhs_code (stmt);
1753       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1754           && (REFERENCE_CLASS_P (*rhs)
1755               || is_gimple_val (*rhs)))
1756         {
1757           if (REFERENCE_CLASS_P (*rhs))
1758             find_interesting_uses_address (data, stmt, rhs);
1759           else
1760             find_interesting_uses_op (data, *rhs);
1761
1762           if (REFERENCE_CLASS_P (*lhs))
1763             find_interesting_uses_address (data, stmt, lhs);
1764           return;
1765         }
1766       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1767         {
1768           find_interesting_uses_cond (data, stmt);
1769           return;
1770         }
1771
1772       /* TODO -- we should also handle address uses of type
1773
1774          memory = call (whatever);
1775
1776          and
1777
1778          call (memory).  */
1779     }
1780
1781   if (gimple_code (stmt) == GIMPLE_PHI
1782       && gimple_bb (stmt) == data->current_loop->header)
1783     {
1784       iv = get_iv (data, PHI_RESULT (stmt));
1785
1786       if (iv && !integer_zerop (iv->step))
1787         return;
1788     }
1789
1790   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1791     {
1792       op = USE_FROM_PTR (use_p);
1793
1794       if (TREE_CODE (op) != SSA_NAME)
1795         continue;
1796
1797       iv = get_iv (data, op);
1798       if (!iv)
1799         continue;
1800
1801       find_interesting_uses_op (data, op);
1802     }
1803 }
1804
1805 /* Finds interesting uses of induction variables outside of loops
1806    on loop exit edge EXIT.  */
1807
1808 static void
1809 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1810 {
1811   gimple phi;
1812   gimple_stmt_iterator psi;
1813   tree def;
1814
1815   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1816     {
1817       phi = gsi_stmt (psi);
1818       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1819       if (is_gimple_reg (def))
1820         find_interesting_uses_op (data, def);
1821     }
1822 }
1823
1824 /* Finds uses of the induction variables that are interesting.  */
1825
1826 static void
1827 find_interesting_uses (struct ivopts_data *data)
1828 {
1829   basic_block bb;
1830   gimple_stmt_iterator bsi;
1831   basic_block *body = get_loop_body (data->current_loop);
1832   unsigned i;
1833   struct version_info *info;
1834   edge e;
1835
1836   if (dump_file && (dump_flags & TDF_DETAILS))
1837     fprintf (dump_file, "Uses:\n\n");
1838
1839   for (i = 0; i < data->current_loop->num_nodes; i++)
1840     {
1841       edge_iterator ei;
1842       bb = body[i];
1843
1844       FOR_EACH_EDGE (e, ei, bb->succs)
1845         if (e->dest != EXIT_BLOCK_PTR
1846             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1847           find_interesting_uses_outside (data, e);
1848
1849       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1850         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1851       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1852         if (!is_gimple_debug (gsi_stmt (bsi)))
1853           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1854     }
1855
1856   if (dump_file && (dump_flags & TDF_DETAILS))
1857     {
1858       bitmap_iterator bi;
1859
1860       fprintf (dump_file, "\n");
1861
1862       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1863         {
1864           info = ver_info (data, i);
1865           if (info->inv_id)
1866             {
1867               fprintf (dump_file, "  ");
1868               print_generic_expr (dump_file, info->name, TDF_SLIM);
1869               fprintf (dump_file, " is invariant (%d)%s\n",
1870                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1871             }
1872         }
1873
1874       fprintf (dump_file, "\n");
1875     }
1876
1877   free (body);
1878 }
1879
1880 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1881    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1882    we are at the top-level of the processed address.  */
1883
1884 static tree
1885 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1886                 unsigned HOST_WIDE_INT *offset)
1887 {
1888   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1889   enum tree_code code;
1890   tree type, orig_type = TREE_TYPE (expr);
1891   unsigned HOST_WIDE_INT off0, off1, st;
1892   tree orig_expr = expr;
1893
1894   STRIP_NOPS (expr);
1895
1896   type = TREE_TYPE (expr);
1897   code = TREE_CODE (expr);
1898   *offset = 0;
1899
1900   switch (code)
1901     {
1902     case INTEGER_CST:
1903       if (!cst_and_fits_in_hwi (expr)
1904           || integer_zerop (expr))
1905         return orig_expr;
1906
1907       *offset = int_cst_value (expr);
1908       return build_int_cst (orig_type, 0);
1909
1910     case POINTER_PLUS_EXPR:
1911     case PLUS_EXPR:
1912     case MINUS_EXPR:
1913       op0 = TREE_OPERAND (expr, 0);
1914       op1 = TREE_OPERAND (expr, 1);
1915
1916       op0 = strip_offset_1 (op0, false, false, &off0);
1917       op1 = strip_offset_1 (op1, false, false, &off1);
1918
1919       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1920       if (op0 == TREE_OPERAND (expr, 0)
1921           && op1 == TREE_OPERAND (expr, 1))
1922         return orig_expr;
1923
1924       if (integer_zerop (op1))
1925         expr = op0;
1926       else if (integer_zerop (op0))
1927         {
1928           if (code == MINUS_EXPR)
1929             expr = fold_build1 (NEGATE_EXPR, type, op1);
1930           else
1931             expr = op1;
1932         }
1933       else
1934         expr = fold_build2 (code, type, op0, op1);
1935
1936       return fold_convert (orig_type, expr);
1937
1938     case MULT_EXPR:
1939       op1 = TREE_OPERAND (expr, 1);
1940       if (!cst_and_fits_in_hwi (op1))
1941         return orig_expr;
1942
1943       op0 = TREE_OPERAND (expr, 0);
1944       op0 = strip_offset_1 (op0, false, false, &off0);
1945       if (op0 == TREE_OPERAND (expr, 0))
1946         return orig_expr;
1947
1948       *offset = off0 * int_cst_value (op1);
1949       if (integer_zerop (op0))
1950         expr = op0;
1951       else
1952         expr = fold_build2 (MULT_EXPR, type, op0, op1);
1953
1954       return fold_convert (orig_type, expr);
1955
1956     case ARRAY_REF:
1957     case ARRAY_RANGE_REF:
1958       if (!inside_addr)
1959         return orig_expr;
1960
1961       step = array_ref_element_size (expr);
1962       if (!cst_and_fits_in_hwi (step))
1963         break;
1964
1965       st = int_cst_value (step);
1966       op1 = TREE_OPERAND (expr, 1);
1967       op1 = strip_offset_1 (op1, false, false, &off1);
1968       *offset = off1 * st;
1969
1970       if (top_compref
1971           && integer_zerop (op1))
1972         {
1973           /* Strip the component reference completely.  */
1974           op0 = TREE_OPERAND (expr, 0);
1975           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1976           *offset += off0;
1977           return op0;
1978         }
1979       break;
1980
1981     case COMPONENT_REF:
1982       if (!inside_addr)
1983         return orig_expr;
1984
1985       tmp = component_ref_field_offset (expr);
1986       if (top_compref
1987           && cst_and_fits_in_hwi (tmp))
1988         {
1989           /* Strip the component reference completely.  */
1990           op0 = TREE_OPERAND (expr, 0);
1991           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1992           *offset = off0 + int_cst_value (tmp);
1993           return op0;
1994         }
1995       break;
1996
1997     case ADDR_EXPR:
1998       op0 = TREE_OPERAND (expr, 0);
1999       op0 = strip_offset_1 (op0, true, true, &off0);
2000       *offset += off0;
2001
2002       if (op0 == TREE_OPERAND (expr, 0))
2003         return orig_expr;
2004
2005       expr = build_fold_addr_expr (op0);
2006       return fold_convert (orig_type, expr);
2007
2008     case INDIRECT_REF:
2009       inside_addr = false;
2010       break;
2011
2012     default:
2013       return orig_expr;
2014     }
2015
2016   /* Default handling of expressions for that we want to recurse into
2017      the first operand.  */
2018   op0 = TREE_OPERAND (expr, 0);
2019   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2020   *offset += off0;
2021
2022   if (op0 == TREE_OPERAND (expr, 0)
2023       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2024     return orig_expr;
2025
2026   expr = copy_node (expr);
2027   TREE_OPERAND (expr, 0) = op0;
2028   if (op1)
2029     TREE_OPERAND (expr, 1) = op1;
2030
2031   /* Inside address, we might strip the top level component references,
2032      thus changing type of the expression.  Handling of ADDR_EXPR
2033      will fix that.  */
2034   expr = fold_convert (orig_type, expr);
2035
2036   return expr;
2037 }
2038
2039 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2040
2041 static tree
2042 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2043 {
2044   return strip_offset_1 (expr, false, false, offset);
2045 }
2046
2047 /* Returns variant of TYPE that can be used as base for different uses.
2048    We return unsigned type with the same precision, which avoids problems
2049    with overflows.  */
2050
2051 static tree
2052 generic_type_for (tree type)
2053 {
2054   if (POINTER_TYPE_P (type))
2055     return unsigned_type_for (type);
2056
2057   if (TYPE_UNSIGNED (type))
2058     return type;
2059
2060   return unsigned_type_for (type);
2061 }
2062
2063 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2064    the bitmap to that we should store it.  */
2065
2066 static struct ivopts_data *fd_ivopts_data;
2067 static tree
2068 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2069 {
2070   bitmap *depends_on = (bitmap *) data;
2071   struct version_info *info;
2072
2073   if (TREE_CODE (*expr_p) != SSA_NAME)
2074     return NULL_TREE;
2075   info = name_info (fd_ivopts_data, *expr_p);
2076
2077   if (!info->inv_id || info->has_nonlin_use)
2078     return NULL_TREE;
2079
2080   if (!*depends_on)
2081     *depends_on = BITMAP_ALLOC (NULL);
2082   bitmap_set_bit (*depends_on, info->inv_id);
2083
2084   return NULL_TREE;
2085 }
2086
2087 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2088    position to POS.  If USE is not NULL, the candidate is set as related to
2089    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2090    replacement of the final value of the iv by a direct computation.  */
2091
2092 static struct iv_cand *
2093 add_candidate_1 (struct ivopts_data *data,
2094                  tree base, tree step, bool important, enum iv_position pos,
2095                  struct iv_use *use, gimple incremented_at)
2096 {
2097   unsigned i;
2098   struct iv_cand *cand = NULL;
2099   tree type, orig_type;
2100
2101   if (base)
2102     {
2103       orig_type = TREE_TYPE (base);
2104       type = generic_type_for (orig_type);
2105       if (type != orig_type)
2106         {
2107           base = fold_convert (type, base);
2108           step = fold_convert (type, step);
2109         }
2110     }
2111
2112   for (i = 0; i < n_iv_cands (data); i++)
2113     {
2114       cand = iv_cand (data, i);
2115
2116       if (cand->pos != pos)
2117         continue;
2118
2119       if (cand->incremented_at != incremented_at
2120           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2121               && cand->ainc_use != use))
2122         continue;
2123
2124       if (!cand->iv)
2125         {
2126           if (!base && !step)
2127             break;
2128
2129           continue;
2130         }
2131
2132       if (!base && !step)
2133         continue;
2134
2135       if (operand_equal_p (base, cand->iv->base, 0)
2136           && operand_equal_p (step, cand->iv->step, 0))
2137         break;
2138     }
2139
2140   if (i == n_iv_cands (data))
2141     {
2142       cand = XCNEW (struct iv_cand);
2143       cand->id = i;
2144
2145       if (!base && !step)
2146         cand->iv = NULL;
2147       else
2148         cand->iv = alloc_iv (base, step);
2149
2150       cand->pos = pos;
2151       if (pos != IP_ORIGINAL && cand->iv)
2152         {
2153           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2154           cand->var_after = cand->var_before;
2155         }
2156       cand->important = important;
2157       cand->incremented_at = incremented_at;
2158       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2159
2160       if (step
2161           && TREE_CODE (step) != INTEGER_CST)
2162         {
2163           fd_ivopts_data = data;
2164           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2165         }
2166
2167       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2168         cand->ainc_use = use;
2169       else
2170         cand->ainc_use = NULL;
2171
2172       if (dump_file && (dump_flags & TDF_DETAILS))
2173         dump_cand (dump_file, cand);
2174     }
2175
2176   if (important && !cand->important)
2177     {
2178       cand->important = true;
2179       if (dump_file && (dump_flags & TDF_DETAILS))
2180         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2181     }
2182
2183   if (use)
2184     {
2185       bitmap_set_bit (use->related_cands, i);
2186       if (dump_file && (dump_flags & TDF_DETAILS))
2187         fprintf (dump_file, "Candidate %d is related to use %d\n",
2188                  cand->id, use->id);
2189     }
2190
2191   return cand;
2192 }
2193
2194 /* Returns true if incrementing the induction variable at the end of the LOOP
2195    is allowed.
2196
2197    The purpose is to avoid splitting latch edge with a biv increment, thus
2198    creating a jump, possibly confusing other optimization passes and leaving
2199    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2200    is not available (so we do not have a better alternative), or if the latch
2201    edge is already nonempty.  */
2202
2203 static bool
2204 allow_ip_end_pos_p (struct loop *loop)
2205 {
2206   if (!ip_normal_pos (loop))
2207     return true;
2208
2209   if (!empty_block_p (ip_end_pos (loop)))
2210     return true;
2211
2212   return false;
2213 }
2214
2215 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2216    Important field is set to IMPORTANT.  */
2217
2218 static void
2219 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2220                         bool important, struct iv_use *use)
2221 {
2222   basic_block use_bb = gimple_bb (use->stmt);
2223   enum machine_mode mem_mode;
2224   unsigned HOST_WIDE_INT cstepi;
2225
2226   /* If we insert the increment in any position other than the standard
2227      ones, we must ensure that it is incremented once per iteration.
2228      It must not be in an inner nested loop, or one side of an if
2229      statement.  */
2230   if (use_bb->loop_father != data->current_loop
2231       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2232       || stmt_could_throw_p (use->stmt)
2233       || !cst_and_fits_in_hwi (step))
2234     return;
2235
2236   cstepi = int_cst_value (step);
2237
2238   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2239   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2240       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2241     {
2242       enum tree_code code = MINUS_EXPR;
2243       tree new_base;
2244       tree new_step = step;
2245
2246       if (POINTER_TYPE_P (TREE_TYPE (base)))
2247         {
2248           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2249           code = POINTER_PLUS_EXPR;
2250         }
2251       else
2252         new_step = fold_convert (TREE_TYPE (base), new_step);
2253       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2254       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2255                        use->stmt);
2256     }
2257   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2258       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2259     {
2260       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2261                        use->stmt);
2262     }
2263 }
2264
2265 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2266    position to POS.  If USE is not NULL, the candidate is set as related to
2267    it.  The candidate computation is scheduled on all available positions.  */
2268
2269 static void
2270 add_candidate (struct ivopts_data *data,
2271                tree base, tree step, bool important, struct iv_use *use)
2272 {
2273   if (ip_normal_pos (data->current_loop))
2274     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2275   if (ip_end_pos (data->current_loop)
2276       && allow_ip_end_pos_p (data->current_loop))
2277     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2278
2279   if (use != NULL && use->type == USE_ADDRESS)
2280     add_autoinc_candidates (data, base, step, important, use);
2281 }
2282
2283 /* Add a standard "0 + 1 * iteration" iv candidate for a
2284    type with SIZE bits.  */
2285
2286 static void
2287 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2288                                      unsigned int size)
2289 {
2290   tree type = lang_hooks.types.type_for_size (size, true);
2291   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2292                  true, NULL);
2293 }
2294
2295 /* Adds standard iv candidates.  */
2296
2297 static void
2298 add_standard_iv_candidates (struct ivopts_data *data)
2299 {
2300   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2301
2302   /* The same for a double-integer type if it is still fast enough.  */
2303   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2304     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2305 }
2306
2307
2308 /* Adds candidates bases on the old induction variable IV.  */
2309
2310 static void
2311 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2312 {
2313   gimple phi;
2314   tree def;
2315   struct iv_cand *cand;
2316
2317   add_candidate (data, iv->base, iv->step, true, NULL);
2318
2319   /* The same, but with initial value zero.  */
2320   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2321     add_candidate (data, size_int (0), iv->step, true, NULL);
2322   else
2323     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2324                    iv->step, true, NULL);
2325
2326   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2327   if (gimple_code (phi) == GIMPLE_PHI)
2328     {
2329       /* Additionally record the possibility of leaving the original iv
2330          untouched.  */
2331       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2332       cand = add_candidate_1 (data,
2333                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2334                               SSA_NAME_DEF_STMT (def));
2335       cand->var_before = iv->ssa_name;
2336       cand->var_after = def;
2337     }
2338 }
2339
2340 /* Adds candidates based on the old induction variables.  */
2341
2342 static void
2343 add_old_ivs_candidates (struct ivopts_data *data)
2344 {
2345   unsigned i;
2346   struct iv *iv;
2347   bitmap_iterator bi;
2348
2349   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2350     {
2351       iv = ver_info (data, i)->iv;
2352       if (iv && iv->biv_p && !integer_zerop (iv->step))
2353         add_old_iv_candidates (data, iv);
2354     }
2355 }
2356
2357 /* Adds candidates based on the value of the induction variable IV and USE.  */
2358
2359 static void
2360 add_iv_value_candidates (struct ivopts_data *data,
2361                          struct iv *iv, struct iv_use *use)
2362 {
2363   unsigned HOST_WIDE_INT offset;
2364   tree base;
2365   tree basetype;
2366
2367   add_candidate (data, iv->base, iv->step, false, use);
2368
2369   /* The same, but with initial value zero.  Make such variable important,
2370      since it is generic enough so that possibly many uses may be based
2371      on it.  */
2372   basetype = TREE_TYPE (iv->base);
2373   if (POINTER_TYPE_P (basetype))
2374     basetype = sizetype;
2375   add_candidate (data, build_int_cst (basetype, 0),
2376                  iv->step, true, use);
2377
2378   /* Third, try removing the constant offset.  Make sure to even
2379      add a candidate for &a[0] vs. (T *)&a.  */
2380   base = strip_offset (iv->base, &offset);
2381   if (offset
2382       || base != iv->base)
2383     add_candidate (data, base, iv->step, false, use);
2384 }
2385
2386 /* Adds candidates based on the uses.  */
2387
2388 static void
2389 add_derived_ivs_candidates (struct ivopts_data *data)
2390 {
2391   unsigned i;
2392
2393   for (i = 0; i < n_iv_uses (data); i++)
2394     {
2395       struct iv_use *use = iv_use (data, i);
2396
2397       if (!use)
2398         continue;
2399
2400       switch (use->type)
2401         {
2402         case USE_NONLINEAR_EXPR:
2403         case USE_COMPARE:
2404         case USE_ADDRESS:
2405           /* Just add the ivs based on the value of the iv used here.  */
2406           add_iv_value_candidates (data, use->iv, use);
2407           break;
2408
2409         default:
2410           gcc_unreachable ();
2411         }
2412     }
2413 }
2414
2415 /* Record important candidates and add them to related_cands bitmaps
2416    if needed.  */
2417
2418 static void
2419 record_important_candidates (struct ivopts_data *data)
2420 {
2421   unsigned i;
2422   struct iv_use *use;
2423
2424   for (i = 0; i < n_iv_cands (data); i++)
2425     {
2426       struct iv_cand *cand = iv_cand (data, i);
2427
2428       if (cand->important)
2429         bitmap_set_bit (data->important_candidates, i);
2430     }
2431
2432   data->consider_all_candidates = (n_iv_cands (data)
2433                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2434
2435   if (data->consider_all_candidates)
2436     {
2437       /* We will not need "related_cands" bitmaps in this case,
2438          so release them to decrease peak memory consumption.  */
2439       for (i = 0; i < n_iv_uses (data); i++)
2440         {
2441           use = iv_use (data, i);
2442           BITMAP_FREE (use->related_cands);
2443         }
2444     }
2445   else
2446     {
2447       /* Add important candidates to the related_cands bitmaps.  */
2448       for (i = 0; i < n_iv_uses (data); i++)
2449         bitmap_ior_into (iv_use (data, i)->related_cands,
2450                          data->important_candidates);
2451     }
2452 }
2453
2454 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2455    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2456    we allocate a simple list to every use.  */
2457
2458 static void
2459 alloc_use_cost_map (struct ivopts_data *data)
2460 {
2461   unsigned i, size, s, j;
2462
2463   for (i = 0; i < n_iv_uses (data); i++)
2464     {
2465       struct iv_use *use = iv_use (data, i);
2466       bitmap_iterator bi;
2467
2468       if (data->consider_all_candidates)
2469         size = n_iv_cands (data);
2470       else
2471         {
2472           s = 0;
2473           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2474             {
2475               s++;
2476             }
2477
2478           /* Round up to the power of two, so that moduling by it is fast.  */
2479           for (size = 1; size < s; size <<= 1)
2480             continue;
2481         }
2482
2483       use->n_map_members = size;
2484       use->cost_map = XCNEWVEC (struct cost_pair, size);
2485     }
2486 }
2487
2488 /* Returns description of computation cost of expression whose runtime
2489    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2490
2491 static comp_cost
2492 new_cost (unsigned runtime, unsigned complexity)
2493 {
2494   comp_cost cost;
2495
2496   cost.cost = runtime;
2497   cost.complexity = complexity;
2498
2499   return cost;
2500 }
2501
2502 /* Adds costs COST1 and COST2.  */
2503
2504 static comp_cost
2505 add_costs (comp_cost cost1, comp_cost cost2)
2506 {
2507   cost1.cost += cost2.cost;
2508   cost1.complexity += cost2.complexity;
2509
2510   return cost1;
2511 }
2512 /* Subtracts costs COST1 and COST2.  */
2513
2514 static comp_cost
2515 sub_costs (comp_cost cost1, comp_cost cost2)
2516 {
2517   cost1.cost -= cost2.cost;
2518   cost1.complexity -= cost2.complexity;
2519
2520   return cost1;
2521 }
2522
2523 /* Returns a negative number if COST1 < COST2, a positive number if
2524    COST1 > COST2, and 0 if COST1 = COST2.  */
2525
2526 static int
2527 compare_costs (comp_cost cost1, comp_cost cost2)
2528 {
2529   if (cost1.cost == cost2.cost)
2530     return cost1.complexity - cost2.complexity;
2531
2532   return cost1.cost - cost2.cost;
2533 }
2534
2535 /* Returns true if COST is infinite.  */
2536
2537 static bool
2538 infinite_cost_p (comp_cost cost)
2539 {
2540   return cost.cost == INFTY;
2541 }
2542
2543 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2544    on invariants DEPENDS_ON and that the value used in expressing it
2545    is VALUE.  */
2546
2547 static void
2548 set_use_iv_cost (struct ivopts_data *data,
2549                  struct iv_use *use, struct iv_cand *cand,
2550                  comp_cost cost, bitmap depends_on, tree value)
2551 {
2552   unsigned i, s;
2553
2554   if (infinite_cost_p (cost))
2555     {
2556       BITMAP_FREE (depends_on);
2557       return;
2558     }
2559
2560   if (data->consider_all_candidates)
2561     {
2562       use->cost_map[cand->id].cand = cand;
2563       use->cost_map[cand->id].cost = cost;
2564       use->cost_map[cand->id].depends_on = depends_on;
2565       use->cost_map[cand->id].value = value;
2566       return;
2567     }
2568
2569   /* n_map_members is a power of two, so this computes modulo.  */
2570   s = cand->id & (use->n_map_members - 1);
2571   for (i = s; i < use->n_map_members; i++)
2572     if (!use->cost_map[i].cand)
2573       goto found;
2574   for (i = 0; i < s; i++)
2575     if (!use->cost_map[i].cand)
2576       goto found;
2577
2578   gcc_unreachable ();
2579
2580 found:
2581   use->cost_map[i].cand = cand;
2582   use->cost_map[i].cost = cost;
2583   use->cost_map[i].depends_on = depends_on;
2584   use->cost_map[i].value = value;
2585 }
2586
2587 /* Gets cost of (USE, CANDIDATE) pair.  */
2588
2589 static struct cost_pair *
2590 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2591                  struct iv_cand *cand)
2592 {
2593   unsigned i, s;
2594   struct cost_pair *ret;
2595
2596   if (!cand)
2597     return NULL;
2598
2599   if (data->consider_all_candidates)
2600     {
2601       ret = use->cost_map + cand->id;
2602       if (!ret->cand)
2603         return NULL;
2604
2605       return ret;
2606     }
2607
2608   /* n_map_members is a power of two, so this computes modulo.  */
2609   s = cand->id & (use->n_map_members - 1);
2610   for (i = s; i < use->n_map_members; i++)
2611     if (use->cost_map[i].cand == cand)
2612       return use->cost_map + i;
2613
2614   for (i = 0; i < s; i++)
2615     if (use->cost_map[i].cand == cand)
2616       return use->cost_map + i;
2617
2618   return NULL;
2619 }
2620
2621 /* Returns estimate on cost of computing SEQ.  */
2622
2623 static unsigned
2624 seq_cost (rtx seq, bool speed)
2625 {
2626   unsigned cost = 0;
2627   rtx set;
2628
2629   for (; seq; seq = NEXT_INSN (seq))
2630     {
2631       set = single_set (seq);
2632       if (set)
2633         cost += rtx_cost (set, SET,speed);
2634       else
2635         cost++;
2636     }
2637
2638   return cost;
2639 }
2640
2641 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2642 static rtx
2643 produce_memory_decl_rtl (tree obj, int *regno)
2644 {
2645   rtx x;
2646
2647   gcc_assert (obj);
2648   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2649     {
2650       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2651       x = gen_rtx_SYMBOL_REF (Pmode, name);
2652       SET_SYMBOL_REF_DECL (x, obj);
2653       x = gen_rtx_MEM (DECL_MODE (obj), x);
2654       targetm.encode_section_info (obj, x, true);
2655     }
2656   else
2657     {
2658       x = gen_raw_REG (Pmode, (*regno)++);
2659       x = gen_rtx_MEM (DECL_MODE (obj), x);
2660     }
2661
2662   return x;
2663 }
2664
2665 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2666    walk_tree.  DATA contains the actual fake register number.  */
2667
2668 static tree
2669 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2670 {
2671   tree obj = NULL_TREE;
2672   rtx x = NULL_RTX;
2673   int *regno = (int *) data;
2674
2675   switch (TREE_CODE (*expr_p))
2676     {
2677     case ADDR_EXPR:
2678       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2679            handled_component_p (*expr_p);
2680            expr_p = &TREE_OPERAND (*expr_p, 0))
2681         continue;
2682       obj = *expr_p;
2683       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2684         x = produce_memory_decl_rtl (obj, regno);
2685       break;
2686
2687     case SSA_NAME:
2688       *ws = 0;
2689       obj = SSA_NAME_VAR (*expr_p);
2690       if (!DECL_RTL_SET_P (obj))
2691         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2692       break;
2693
2694     case VAR_DECL:
2695     case PARM_DECL:
2696     case RESULT_DECL:
2697       *ws = 0;
2698       obj = *expr_p;
2699
2700       if (DECL_RTL_SET_P (obj))
2701         break;
2702
2703       if (DECL_MODE (obj) == BLKmode)
2704         x = produce_memory_decl_rtl (obj, regno);
2705       else
2706         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2707
2708       break;
2709
2710     default:
2711       break;
2712     }
2713
2714   if (x)
2715     {
2716       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2717       SET_DECL_RTL (obj, x);
2718     }
2719
2720   return NULL_TREE;
2721 }
2722
2723 /* Determines cost of the computation of EXPR.  */
2724
2725 static unsigned
2726 computation_cost (tree expr, bool speed)
2727 {
2728   rtx seq, rslt;
2729   tree type = TREE_TYPE (expr);
2730   unsigned cost;
2731   /* Avoid using hard regs in ways which may be unsupported.  */
2732   int regno = LAST_VIRTUAL_REGISTER + 1;
2733   enum function_frequency real_frequency = cfun->function_frequency;
2734
2735   cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2736   crtl->maybe_hot_insn_p = speed;
2737   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2738   start_sequence ();
2739   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2740   seq = get_insns ();
2741   end_sequence ();
2742   default_rtl_profile ();
2743   cfun->function_frequency = real_frequency;
2744
2745   cost = seq_cost (seq, speed);
2746   if (MEM_P (rslt))
2747     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2748
2749   return cost;
2750 }
2751
2752 /* Returns variable containing the value of candidate CAND at statement AT.  */
2753
2754 static tree
2755 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2756 {
2757   if (stmt_after_increment (loop, cand, stmt))
2758     return cand->var_after;
2759   else
2760     return cand->var_before;
2761 }
2762
2763 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2764    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2765
2766 int
2767 tree_int_cst_sign_bit (const_tree t)
2768 {
2769   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2770   unsigned HOST_WIDE_INT w;
2771
2772   if (bitno < HOST_BITS_PER_WIDE_INT)
2773     w = TREE_INT_CST_LOW (t);
2774   else
2775     {
2776       w = TREE_INT_CST_HIGH (t);
2777       bitno -= HOST_BITS_PER_WIDE_INT;
2778     }
2779
2780   return (w >> bitno) & 1;
2781 }
2782
2783 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2784    same precision that is at least as wide as the precision of TYPE, stores
2785    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2786    type of A and B.  */
2787
2788 static tree
2789 determine_common_wider_type (tree *a, tree *b)
2790 {
2791   tree wider_type = NULL;
2792   tree suba, subb;
2793   tree atype = TREE_TYPE (*a);
2794
2795   if (CONVERT_EXPR_P (*a))
2796     {
2797       suba = TREE_OPERAND (*a, 0);
2798       wider_type = TREE_TYPE (suba);
2799       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2800         return atype;
2801     }
2802   else
2803     return atype;
2804
2805   if (CONVERT_EXPR_P (*b))
2806     {
2807       subb = TREE_OPERAND (*b, 0);
2808       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2809         return atype;
2810     }
2811   else
2812     return atype;
2813
2814   *a = suba;
2815   *b = subb;
2816   return wider_type;
2817 }
2818
2819 /* Determines the expression by that USE is expressed from induction variable
2820    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2821    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2822
2823 static bool
2824 get_computation_aff (struct loop *loop,
2825                      struct iv_use *use, struct iv_cand *cand, gimple at,
2826                      struct affine_tree_combination *aff)
2827 {
2828   tree ubase = use->iv->base;
2829   tree ustep = use->iv->step;
2830   tree cbase = cand->iv->base;
2831   tree cstep = cand->iv->step, cstep_common;
2832   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2833   tree common_type, var;
2834   tree uutype;
2835   aff_tree cbase_aff, var_aff;
2836   double_int rat;
2837
2838   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2839     {
2840       /* We do not have a precision to express the values of use.  */
2841       return false;
2842     }
2843
2844   var = var_at_stmt (loop, cand, at);
2845   uutype = unsigned_type_for (utype);
2846
2847   /* If the conversion is not noop, perform it.  */
2848   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2849     {
2850       cstep = fold_convert (uutype, cstep);
2851       cbase = fold_convert (uutype, cbase);
2852       var = fold_convert (uutype, var);
2853     }
2854
2855   if (!constant_multiple_of (ustep, cstep, &rat))
2856     return false;
2857
2858   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2859      type, we achieve better folding by computing their difference in this
2860      wider type, and cast the result to UUTYPE.  We do not need to worry about
2861      overflows, as all the arithmetics will in the end be performed in UUTYPE
2862      anyway.  */
2863   common_type = determine_common_wider_type (&ubase, &cbase);
2864
2865   /* use = ubase - ratio * cbase + ratio * var.  */
2866   tree_to_aff_combination (ubase, common_type, aff);
2867   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2868   tree_to_aff_combination (var, uutype, &var_aff);
2869
2870   /* We need to shift the value if we are after the increment.  */
2871   if (stmt_after_increment (loop, cand, at))
2872     {
2873       aff_tree cstep_aff;
2874
2875       if (common_type != uutype)
2876         cstep_common = fold_convert (common_type, cstep);
2877       else
2878         cstep_common = cstep;
2879
2880       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2881       aff_combination_add (&cbase_aff, &cstep_aff);
2882     }
2883
2884   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2885   aff_combination_add (aff, &cbase_aff);
2886   if (common_type != uutype)
2887     aff_combination_convert (aff, uutype);
2888
2889   aff_combination_scale (&var_aff, rat);
2890   aff_combination_add (aff, &var_aff);
2891
2892   return true;
2893 }
2894
2895 /* Determines the expression by that USE is expressed from induction variable
2896    CAND at statement AT in LOOP.  The computation is unshared.  */
2897
2898 static tree
2899 get_computation_at (struct loop *loop,
2900                     struct iv_use *use, struct iv_cand *cand, gimple at)
2901 {
2902   aff_tree aff;
2903   tree type = TREE_TYPE (use->iv->base);
2904
2905   if (!get_computation_aff (loop, use, cand, at, &aff))
2906     return NULL_TREE;
2907   unshare_aff_combination (&aff);
2908   return fold_convert (type, aff_combination_to_tree (&aff));
2909 }
2910
2911 /* Determines the expression by that USE is expressed from induction variable
2912    CAND in LOOP.  The computation is unshared.  */
2913
2914 static tree
2915 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2916 {
2917   return get_computation_at (loop, use, cand, use->stmt);
2918 }
2919
2920 /* Returns cost of addition in MODE.  */
2921
2922 static unsigned
2923 add_cost (enum machine_mode mode, bool speed)
2924 {
2925   static unsigned costs[NUM_MACHINE_MODES];
2926   rtx seq;
2927   unsigned cost;
2928
2929   if (costs[mode])
2930     return costs[mode];
2931
2932   start_sequence ();
2933   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2934                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2935                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2936                  NULL_RTX);
2937   seq = get_insns ();
2938   end_sequence ();
2939
2940   cost = seq_cost (seq, speed);
2941   if (!cost)
2942     cost = 1;
2943
2944   costs[mode] = cost;
2945
2946   if (dump_file && (dump_flags & TDF_DETAILS))
2947     fprintf (dump_file, "Addition in %s costs %d\n",
2948              GET_MODE_NAME (mode), cost);
2949   return cost;
2950 }
2951
2952 /* Entry in a hashtable of already known costs for multiplication.  */
2953 struct mbc_entry
2954 {
2955   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2956   enum machine_mode mode;       /* In mode.  */
2957   unsigned cost;                /* The cost.  */
2958 };
2959
2960 /* Counts hash value for the ENTRY.  */
2961
2962 static hashval_t
2963 mbc_entry_hash (const void *entry)
2964 {
2965   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2966
2967   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2968 }
2969
2970 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2971
2972 static int
2973 mbc_entry_eq (const void *entry1, const void *entry2)
2974 {
2975   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2976   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2977
2978   return (e1->mode == e2->mode
2979           && e1->cst == e2->cst);
2980 }
2981
2982 /* Returns cost of multiplication by constant CST in MODE.  */
2983
2984 unsigned
2985 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2986 {
2987   static htab_t costs;
2988   struct mbc_entry **cached, act;
2989   rtx seq;
2990   unsigned cost;
2991
2992   if (!costs)
2993     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2994
2995   act.mode = mode;
2996   act.cst = cst;
2997   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2998   if (*cached)
2999     return (*cached)->cost;
3000
3001   *cached = XNEW (struct mbc_entry);
3002   (*cached)->mode = mode;
3003   (*cached)->cst = cst;
3004
3005   start_sequence ();
3006   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3007                gen_int_mode (cst, mode), NULL_RTX, 0);
3008   seq = get_insns ();
3009   end_sequence ();
3010
3011   cost = seq_cost (seq, speed);
3012
3013   if (dump_file && (dump_flags & TDF_DETAILS))
3014     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3015              (int) cst, GET_MODE_NAME (mode), cost);
3016
3017   (*cached)->cost = cost;
3018
3019   return cost;
3020 }
3021
3022 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3023    validity for a memory reference accessing memory of mode MODE.  */
3024
3025 bool
3026 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
3027 {
3028 #define MAX_RATIO 128
3029   static sbitmap valid_mult[MAX_MACHINE_MODE];
3030
3031   if (!valid_mult[mode])
3032     {
3033       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3034       rtx addr;
3035       HOST_WIDE_INT i;
3036
3037       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
3038       sbitmap_zero (valid_mult[mode]);
3039       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3040       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3041         {
3042           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3043           if (memory_address_p (mode, addr))
3044             SET_BIT (valid_mult[mode], i + MAX_RATIO);
3045         }
3046
3047       if (dump_file && (dump_flags & TDF_DETAILS))
3048         {
3049           fprintf (dump_file, "  allowed multipliers:");
3050           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3051             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
3052               fprintf (dump_file, " %d", (int) i);
3053           fprintf (dump_file, "\n");
3054           fprintf (dump_file, "\n");
3055         }
3056     }
3057
3058   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3059     return false;
3060
3061   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
3062 }
3063
3064 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3065    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3066    variable is omitted.  Compute the cost for a memory reference that accesses
3067    a memory location of mode MEM_MODE.
3068
3069    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3070    size of MEM_MODE / RATIO) is available.  To make this determination, we
3071    look at the size of the increment to be made, which is given in CSTEP.
3072    CSTEP may be zero if the step is unknown.
3073    STMT_AFTER_INC is true iff the statement we're looking at is after the
3074    increment of the original biv.
3075
3076    TODO -- there must be some better way.  This all is quite crude.  */
3077
3078 static comp_cost
3079 get_address_cost (bool symbol_present, bool var_present,
3080                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3081                   HOST_WIDE_INT cstep, enum machine_mode mem_mode, bool speed,
3082                   bool stmt_after_inc, bool *may_autoinc)
3083 {
3084   static bool initialized[MAX_MACHINE_MODE];
3085   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3086   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3087   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3088   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3089   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3090   unsigned cost, acost, complexity;
3091   bool offset_p, ratio_p, autoinc;
3092   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3093   unsigned HOST_WIDE_INT mask;
3094   unsigned bits;
3095
3096   if (!initialized[mem_mode])
3097     {
3098       HOST_WIDE_INT i;
3099       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3100       int old_cse_not_expected;
3101       unsigned sym_p, var_p, off_p, rat_p, add_c;
3102       rtx seq, addr, base;
3103       rtx reg0, reg1;
3104
3105       initialized[mem_mode] = true;
3106
3107       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3108
3109       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3110       for (i = start; i <= 1 << 20; i <<= 1)
3111         {
3112           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3113           if (!memory_address_p (mem_mode, addr))
3114             break;
3115         }
3116       max_offset[mem_mode] = i == start ? 0 : i >> 1;
3117       off[mem_mode] = max_offset[mem_mode];
3118
3119       for (i = start; i <= 1 << 20; i <<= 1)
3120         {
3121           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3122           if (!memory_address_p (mem_mode, addr))
3123             break;
3124         }
3125       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3126
3127       if (dump_file && (dump_flags & TDF_DETAILS))
3128         {
3129           fprintf (dump_file, "get_address_cost:\n");
3130           fprintf (dump_file, "  min offset %s %d\n",
3131                    GET_MODE_NAME (mem_mode),
3132                    (int) min_offset[mem_mode]);
3133           fprintf (dump_file, "  max offset %s %d\n",
3134                    GET_MODE_NAME (mem_mode),
3135                    (int) max_offset[mem_mode]);
3136         }
3137
3138       rat[mem_mode] = 1;
3139       for (i = 2; i <= MAX_RATIO; i++)
3140         if (multiplier_allowed_in_address_p (i, mem_mode))
3141           {
3142             rat[mem_mode] = i;
3143             break;
3144           }
3145
3146       /* Compute the cost of various addressing modes.  */
3147       acost = 0;
3148       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3149       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3150
3151       if (HAVE_PRE_DECREMENT)
3152         {
3153           addr = gen_rtx_PRE_DEC (Pmode, reg0);
3154           has_predec[mem_mode] = memory_address_p (mem_mode, addr);
3155         }
3156       if (HAVE_POST_DECREMENT)
3157         {
3158           addr = gen_rtx_POST_DEC (Pmode, reg0);
3159           has_postdec[mem_mode] = memory_address_p (mem_mode, addr);
3160         }
3161       if (HAVE_PRE_INCREMENT)
3162         {
3163           addr = gen_rtx_PRE_INC (Pmode, reg0);
3164           has_preinc[mem_mode] = memory_address_p (mem_mode, addr);
3165         }
3166       if (HAVE_POST_INCREMENT)
3167         {
3168           addr = gen_rtx_POST_INC (Pmode, reg0);
3169           has_postinc[mem_mode] = memory_address_p (mem_mode, addr);
3170         }
3171       for (i = 0; i < 16; i++)
3172         {
3173           sym_p = i & 1;
3174           var_p = (i >> 1) & 1;
3175           off_p = (i >> 2) & 1;
3176           rat_p = (i >> 3) & 1;
3177
3178           addr = reg0;
3179           if (rat_p)
3180             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3181                                    gen_int_mode (rat[mem_mode], Pmode));
3182
3183           if (var_p)
3184             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3185
3186           if (sym_p)
3187             {
3188               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3189               /* ??? We can run into trouble with some backends by presenting
3190                  it with symbols which haven't been properly passed through
3191                  targetm.encode_section_info.  By setting the local bit, we
3192                  enhance the probability of things working.  */
3193               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3194
3195               if (off_p)
3196                 base = gen_rtx_fmt_e (CONST, Pmode,
3197                                       gen_rtx_fmt_ee (PLUS, Pmode,
3198                                                       base,
3199                                                       gen_int_mode (off[mem_mode],
3200                                                                     Pmode)));
3201             }
3202           else if (off_p)
3203             base = gen_int_mode (off[mem_mode], Pmode);
3204           else
3205             base = NULL_RTX;
3206
3207           if (base)
3208             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3209
3210           start_sequence ();
3211           /* To avoid splitting addressing modes, pretend that no cse will
3212              follow.  */
3213           old_cse_not_expected = cse_not_expected;
3214           cse_not_expected = true;
3215           addr = memory_address (mem_mode, addr);
3216           cse_not_expected = old_cse_not_expected;
3217           seq = get_insns ();
3218           end_sequence ();
3219
3220           acost = seq_cost (seq, speed);
3221           acost += address_cost (addr, mem_mode, speed);
3222
3223           if (!acost)
3224             acost = 1;
3225           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3226         }
3227
3228       /* On some targets, it is quite expensive to load symbol to a register,
3229          which makes addresses that contain symbols look much more expensive.
3230          However, the symbol will have to be loaded in any case before the
3231          loop (and quite likely we have it in register already), so it does not
3232          make much sense to penalize them too heavily.  So make some final
3233          tweaks for the SYMBOL_PRESENT modes:
3234
3235          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3236          var is cheaper, use this mode with small penalty.
3237          If VAR_PRESENT is true, try whether the mode with
3238          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3239          if this is the case, use it.  */
3240       add_c = add_cost (Pmode, speed);
3241       for (i = 0; i < 8; i++)
3242         {
3243           var_p = i & 1;
3244           off_p = (i >> 1) & 1;
3245           rat_p = (i >> 2) & 1;
3246
3247           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3248           if (var_p)
3249             acost += add_c;
3250
3251           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3252             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3253         }
3254
3255       if (dump_file && (dump_flags & TDF_DETAILS))
3256         {
3257           fprintf (dump_file, "Address costs:\n");
3258
3259           for (i = 0; i < 16; i++)
3260             {
3261               sym_p = i & 1;
3262               var_p = (i >> 1) & 1;
3263               off_p = (i >> 2) & 1;
3264               rat_p = (i >> 3) & 1;
3265
3266               fprintf (dump_file, "  ");
3267               if (sym_p)
3268                 fprintf (dump_file, "sym + ");
3269               if (var_p)
3270                 fprintf (dump_file, "var + ");
3271               if (off_p)
3272                 fprintf (dump_file, "cst + ");
3273               if (rat_p)
3274                 fprintf (dump_file, "rat * ");
3275
3276               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3277               fprintf (dump_file, "index costs %d\n", acost);
3278             }
3279           if (has_predec[mem_mode] || has_postdec[mem_mode]
3280               || has_preinc[mem_mode] || has_postinc[mem_mode])
3281             fprintf (dump_file, "  May include autoinc/dec\n");
3282           fprintf (dump_file, "\n");
3283         }
3284     }
3285
3286   bits = GET_MODE_BITSIZE (Pmode);
3287   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3288   offset &= mask;
3289   if ((offset >> (bits - 1) & 1))
3290     offset |= ~mask;
3291   s_offset = offset;
3292
3293   autoinc = false;
3294   msize = GET_MODE_SIZE (mem_mode);
3295   autoinc_offset = offset;
3296   if (stmt_after_inc)
3297     autoinc_offset += ratio * cstep;
3298   if (symbol_present || var_present || ratio != 1)
3299     autoinc = false;
3300   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3301                && msize == cstep)
3302            || (has_postdec[mem_mode] && autoinc_offset == 0
3303                && msize == -cstep)
3304            || (has_preinc[mem_mode] && autoinc_offset == msize
3305                && msize == cstep)
3306            || (has_predec[mem_mode] && autoinc_offset == -msize
3307                && msize == -cstep))
3308     autoinc = true;
3309
3310   cost = 0;
3311   offset_p = (s_offset != 0
3312               && min_offset[mem_mode] <= s_offset
3313               && s_offset <= max_offset[mem_mode]);
3314   ratio_p = (ratio != 1
3315              && multiplier_allowed_in_address_p (ratio, mem_mode));
3316
3317   if (ratio != 1 && !ratio_p)
3318     cost += multiply_by_cost (ratio, Pmode, speed);
3319
3320   if (s_offset && !offset_p && !symbol_present)
3321     cost += add_cost (Pmode, speed);
3322
3323   if (may_autoinc)
3324     *may_autoinc = autoinc;
3325   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3326   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3327   return new_cost (cost + acost, complexity);
3328 }
3329
3330 /* Estimates cost of forcing expression EXPR into a variable.  */
3331
3332 static comp_cost
3333 force_expr_to_var_cost (tree expr, bool speed)
3334 {
3335   static bool costs_initialized = false;
3336   static unsigned integer_cost [2];
3337   static unsigned symbol_cost [2];
3338   static unsigned address_cost [2];
3339   tree op0, op1;
3340   comp_cost cost0, cost1, cost;
3341   enum machine_mode mode;
3342
3343   if (!costs_initialized)
3344     {
3345       tree type = build_pointer_type (integer_type_node);
3346       tree var, addr;
3347       rtx x;
3348       int i;
3349
3350       var = create_tmp_var_raw (integer_type_node, "test_var");
3351       TREE_STATIC (var) = 1;
3352       x = produce_memory_decl_rtl (var, NULL);
3353       SET_DECL_RTL (var, x);
3354
3355       addr = build1 (ADDR_EXPR, type, var);
3356
3357
3358       for (i = 0; i < 2; i++)
3359         {
3360           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3361                                                              2000), i);
3362
3363           symbol_cost[i] = computation_cost (addr, i) + 1;
3364
3365           address_cost[i]
3366             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3367                                         addr,
3368                                         build_int_cst (sizetype, 2000)), i) + 1;
3369           if (dump_file && (dump_flags & TDF_DETAILS))
3370             {
3371               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3372               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3373               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3374               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3375               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3376               fprintf (dump_file, "\n");
3377             }
3378         }
3379
3380       costs_initialized = true;
3381     }
3382
3383   STRIP_NOPS (expr);
3384
3385   if (SSA_VAR_P (expr))
3386     return zero_cost;
3387
3388   if (is_gimple_min_invariant (expr))
3389     {
3390       if (TREE_CODE (expr) == INTEGER_CST)
3391         return new_cost (integer_cost [speed], 0);
3392
3393       if (TREE_CODE (expr) == ADDR_EXPR)
3394         {
3395           tree obj = TREE_OPERAND (expr, 0);
3396
3397           if (TREE_CODE (obj) == VAR_DECL
3398               || TREE_CODE (obj) == PARM_DECL
3399               || TREE_CODE (obj) == RESULT_DECL)
3400             return new_cost (symbol_cost [speed], 0);
3401         }
3402
3403       return new_cost (address_cost [speed], 0);
3404     }
3405
3406   switch (TREE_CODE (expr))
3407     {
3408     case POINTER_PLUS_EXPR:
3409     case PLUS_EXPR:
3410     case MINUS_EXPR:
3411     case MULT_EXPR:
3412       op0 = TREE_OPERAND (expr, 0);
3413       op1 = TREE_OPERAND (expr, 1);
3414       STRIP_NOPS (op0);
3415       STRIP_NOPS (op1);
3416
3417       if (is_gimple_val (op0))
3418         cost0 = zero_cost;
3419       else
3420         cost0 = force_expr_to_var_cost (op0, speed);
3421
3422       if (is_gimple_val (op1))
3423         cost1 = zero_cost;
3424       else
3425         cost1 = force_expr_to_var_cost (op1, speed);
3426
3427       break;
3428
3429     case NEGATE_EXPR:
3430       op0 = TREE_OPERAND (expr, 0);
3431       STRIP_NOPS (op0);
3432       op1 = NULL_TREE;
3433
3434       if (is_gimple_val (op0))
3435         cost0 = zero_cost;
3436       else
3437         cost0 = force_expr_to_var_cost (op0, speed);
3438
3439       cost1 = zero_cost;
3440       break;
3441
3442     default:
3443       /* Just an arbitrary value, FIXME.  */
3444       return new_cost (target_spill_cost[speed], 0);
3445     }
3446
3447   mode = TYPE_MODE (TREE_TYPE (expr));
3448   switch (TREE_CODE (expr))
3449     {
3450     case POINTER_PLUS_EXPR:
3451     case PLUS_EXPR:
3452     case MINUS_EXPR:
3453     case NEGATE_EXPR:
3454       cost = new_cost (add_cost (mode, speed), 0);
3455       break;
3456
3457     case MULT_EXPR:
3458       if (cst_and_fits_in_hwi (op0))
3459         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3460       else if (cst_and_fits_in_hwi (op1))
3461         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3462       else
3463         return new_cost (target_spill_cost [speed], 0);
3464       break;
3465
3466     default:
3467       gcc_unreachable ();
3468     }
3469
3470   cost = add_costs (cost, cost0);
3471   cost = add_costs (cost, cost1);
3472
3473   /* Bound the cost by target_spill_cost.  The parts of complicated
3474      computations often are either loop invariant or at least can
3475      be shared between several iv uses, so letting this grow without
3476      limits would not give reasonable results.  */
3477   if (cost.cost > (int) target_spill_cost [speed])
3478     cost.cost = target_spill_cost [speed];
3479
3480   return cost;
3481 }
3482
3483 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3484    invariants the computation depends on.  */
3485
3486 static comp_cost
3487 force_var_cost (struct ivopts_data *data,
3488                 tree expr, bitmap *depends_on)
3489 {
3490   if (depends_on)
3491     {
3492       fd_ivopts_data = data;
3493       walk_tree (&expr, find_depends, depends_on, NULL);
3494     }
3495
3496   return force_expr_to_var_cost (expr, data->speed);
3497 }
3498
3499 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3500    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3501    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3502    invariants the computation depends on.  */
3503
3504 static comp_cost
3505 split_address_cost (struct ivopts_data *data,
3506                     tree addr, bool *symbol_present, bool *var_present,
3507                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3508 {
3509   tree core;
3510   HOST_WIDE_INT bitsize;
3511   HOST_WIDE_INT bitpos;
3512   tree toffset;
3513   enum machine_mode mode;
3514   int unsignedp, volatilep;
3515
3516   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3517                               &unsignedp, &volatilep, false);
3518
3519   if (toffset != 0
3520       || bitpos % BITS_PER_UNIT != 0
3521       || TREE_CODE (core) != VAR_DECL)
3522     {
3523       *symbol_present = false;
3524       *var_present = true;
3525       fd_ivopts_data = data;
3526       walk_tree (&addr, find_depends, depends_on, NULL);
3527       return new_cost (target_spill_cost[data->speed], 0);
3528     }
3529
3530   *offset += bitpos / BITS_PER_UNIT;
3531   if (TREE_STATIC (core)
3532       || DECL_EXTERNAL (core))
3533     {
3534       *symbol_present = true;
3535       *var_present = false;
3536       return zero_cost;
3537     }
3538
3539   *symbol_present = false;
3540   *var_present = true;
3541   return zero_cost;
3542 }
3543
3544 /* Estimates cost of expressing difference of addresses E1 - E2 as
3545    var + symbol + offset.  The value of offset is added to OFFSET,
3546    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3547    part is missing.  DEPENDS_ON is a set of the invariants the computation
3548    depends on.  */
3549
3550 static comp_cost
3551 ptr_difference_cost (struct ivopts_data *data,
3552                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3553                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3554 {
3555   HOST_WIDE_INT diff = 0;
3556   aff_tree aff_e1, aff_e2;
3557   tree type;
3558
3559   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3560
3561   if (ptr_difference_const (e1, e2, &diff))
3562     {
3563       *offset += diff;
3564       *symbol_present = false;
3565       *var_present = false;
3566       return zero_cost;
3567     }
3568
3569   if (integer_zerop (e2))
3570     return split_address_cost (data, TREE_OPERAND (e1, 0),
3571                                symbol_present, var_present, offset, depends_on);
3572
3573   *symbol_present = false;
3574   *var_present = true;
3575
3576   type = signed_type_for (TREE_TYPE (e1));
3577   tree_to_aff_combination (e1, type, &aff_e1);
3578   tree_to_aff_combination (e2, type, &aff_e2);
3579   aff_combination_scale (&aff_e2, double_int_minus_one);
3580   aff_combination_add (&aff_e1, &aff_e2);
3581
3582   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3583 }
3584
3585 /* Estimates cost of expressing difference E1 - E2 as
3586    var + symbol + offset.  The value of offset is added to OFFSET,
3587    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3588    part is missing.  DEPENDS_ON is a set of the invariants the computation
3589    depends on.  */
3590
3591 static comp_cost
3592 difference_cost (struct ivopts_data *data,
3593                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3594                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3595 {
3596   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3597   unsigned HOST_WIDE_INT off1, off2;
3598   aff_tree aff_e1, aff_e2;
3599   tree type;
3600
3601   e1 = strip_offset (e1, &off1);
3602   e2 = strip_offset (e2, &off2);
3603   *offset += off1 - off2;
3604
3605   STRIP_NOPS (e1);
3606   STRIP_NOPS (e2);
3607
3608   if (TREE_CODE (e1) == ADDR_EXPR)
3609     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3610                                 offset, depends_on);
3611   *symbol_present = false;
3612
3613   if (operand_equal_p (e1, e2, 0))
3614     {
3615       *var_present = false;
3616       return zero_cost;
3617     }
3618
3619   *var_present = true;
3620
3621   if (integer_zerop (e2))
3622     return force_var_cost (data, e1, depends_on);
3623
3624   if (integer_zerop (e1))
3625     {
3626       comp_cost cost = force_var_cost (data, e2, depends_on);
3627       cost.cost += multiply_by_cost (-1, mode, data->speed);
3628       return cost;
3629     }
3630
3631   type = signed_type_for (TREE_TYPE (e1));
3632   tree_to_aff_combination (e1, type, &aff_e1);
3633   tree_to_aff_combination (e2, type, &aff_e2);
3634   aff_combination_scale (&aff_e2, double_int_minus_one);
3635   aff_combination_add (&aff_e1, &aff_e2);
3636
3637   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3638 }
3639
3640 /* Determines the cost of the computation by that USE is expressed
3641    from induction variable CAND.  If ADDRESS_P is true, we just need
3642    to create an address from it, otherwise we want to get it into
3643    register.  A set of invariants we depend on is stored in
3644    DEPENDS_ON.  AT is the statement at that the value is computed.
3645    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3646    addressing is likely.  */
3647
3648 static comp_cost
3649 get_computation_cost_at (struct ivopts_data *data,
3650                          struct iv_use *use, struct iv_cand *cand,
3651                          bool address_p, bitmap *depends_on, gimple at,
3652                          bool *can_autoinc)
3653 {
3654   tree ubase = use->iv->base, ustep = use->iv->step;
3655   tree cbase, cstep;
3656   tree utype = TREE_TYPE (ubase), ctype;
3657   unsigned HOST_WIDE_INT cstepi, offset = 0;
3658   HOST_WIDE_INT ratio, aratio;
3659   bool var_present, symbol_present, stmt_is_after_inc;
3660   comp_cost cost;
3661   double_int rat;
3662   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3663
3664   *depends_on = NULL;
3665
3666   /* Only consider real candidates.  */
3667   if (!cand->iv)
3668     return infinite_cost;
3669
3670   cbase = cand->iv->base;
3671   cstep = cand->iv->step;
3672   ctype = TREE_TYPE (cbase);
3673
3674   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3675     {
3676       /* We do not have a precision to express the values of use.  */
3677       return infinite_cost;
3678     }
3679
3680   if (address_p)
3681     {
3682       /* Do not try to express address of an object with computation based
3683          on address of a different object.  This may cause problems in rtl
3684          level alias analysis (that does not expect this to be happening,
3685          as this is illegal in C), and would be unlikely to be useful
3686          anyway.  */
3687       if (use->iv->base_object
3688           && cand->iv->base_object
3689           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3690         return infinite_cost;
3691     }
3692
3693   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3694     {
3695       /* TODO -- add direct handling of this case.  */
3696       goto fallback;
3697     }
3698
3699   /* CSTEPI is removed from the offset in case statement is after the
3700      increment.  If the step is not constant, we use zero instead.
3701      This is a bit imprecise (there is the extra addition), but
3702      redundancy elimination is likely to transform the code so that
3703      it uses value of the variable before increment anyway,
3704      so it is not that much unrealistic.  */
3705   if (cst_and_fits_in_hwi (cstep))
3706     cstepi = int_cst_value (cstep);
3707   else
3708     cstepi = 0;
3709
3710   if (!constant_multiple_of (ustep, cstep, &rat))
3711     return infinite_cost;
3712
3713   if (double_int_fits_in_shwi_p (rat))
3714     ratio = double_int_to_shwi (rat);
3715   else
3716     return infinite_cost;
3717
3718   STRIP_NOPS (cbase);
3719   ctype = TREE_TYPE (cbase);
3720
3721   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3722      or ratio == 1, it is better to handle this like
3723
3724      ubase - ratio * cbase + ratio * var
3725
3726      (also holds in the case ratio == -1, TODO.  */
3727
3728   if (cst_and_fits_in_hwi (cbase))
3729     {
3730       offset = - ratio * int_cst_value (cbase);
3731       cost = difference_cost (data,
3732                               ubase, build_int_cst (utype, 0),
3733                               &symbol_present, &var_present, &offset,
3734                               depends_on);
3735     }
3736   else if (ratio == 1)
3737     {
3738       cost = difference_cost (data,
3739                               ubase, cbase,
3740                               &symbol_present, &var_present, &offset,
3741                               depends_on);
3742     }
3743   else if (address_p
3744            && !POINTER_TYPE_P (ctype)
3745            && multiplier_allowed_in_address_p (ratio,
3746                                                TYPE_MODE (TREE_TYPE (utype))))
3747     {
3748       cbase
3749         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3750       cost = difference_cost (data,
3751                               ubase, cbase,
3752                               &symbol_present, &var_present, &offset,
3753                               depends_on);
3754     }
3755   else
3756     {
3757       cost = force_var_cost (data, cbase, depends_on);
3758       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3759       cost = add_costs (cost,
3760                         difference_cost (data,
3761                                          ubase, build_int_cst (utype, 0),
3762                                          &symbol_present, &var_present,
3763                                          &offset, depends_on));
3764     }
3765
3766   /* If we are after the increment, the value of the candidate is higher by
3767      one iteration.  */
3768   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3769   if (stmt_is_after_inc)
3770     offset -= ratio * cstepi;
3771
3772   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3773      (symbol/var1/const parts may be omitted).  If we are looking for an
3774      address, find the cost of addressing this.  */
3775   if (address_p)
3776     return add_costs (cost,
3777                       get_address_cost (symbol_present, var_present,
3778                                         offset, ratio, cstepi,
3779                                         TYPE_MODE (TREE_TYPE (utype)),
3780                                         speed, stmt_is_after_inc,
3781                                         can_autoinc));
3782
3783   /* Otherwise estimate the costs for computing the expression.  */
3784   if (!symbol_present && !var_present && !offset)
3785     {
3786       if (ratio != 1)
3787         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3788       return cost;
3789     }
3790
3791   /* Symbol + offset should be compile-time computable so consider that they
3792       are added once to the variable, if present.  */
3793   if (var_present && (symbol_present || offset))
3794     cost.cost += add_cost (TYPE_MODE (ctype), speed)
3795                  / AVG_LOOP_NITER (data->current_loop);
3796
3797   /* Having offset does not affect runtime cost in case it is added to
3798      symbol, but it increases complexity.  */
3799   if (offset)
3800     cost.complexity++;
3801
3802   cost.cost += add_cost (TYPE_MODE (ctype), speed);
3803
3804   aratio = ratio > 0 ? ratio : -ratio;
3805   if (aratio != 1)
3806     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3807
3808 fallback:
3809   if (can_autoinc)
3810     *can_autoinc = false;
3811
3812   {
3813     /* Just get the expression, expand it and measure the cost.  */
3814     tree comp = get_computation_at (data->current_loop, use, cand, at);
3815
3816     if (!comp)
3817       return infinite_cost;
3818
3819     if (address_p)
3820       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3821
3822     return new_cost (computation_cost (comp, speed), 0);
3823   }
3824 }
3825
3826 /* Determines the cost of the computation by that USE is expressed
3827    from induction variable CAND.  If ADDRESS_P is true, we just need
3828    to create an address from it, otherwise we want to get it into
3829    register.  A set of invariants we depend on is stored in
3830    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
3831    autoinc addressing is likely.  */
3832
3833 static comp_cost
3834 get_computation_cost (struct ivopts_data *data,
3835                       struct iv_use *use, struct iv_cand *cand,
3836                       bool address_p, bitmap *depends_on, bool *can_autoinc)
3837 {
3838   return get_computation_cost_at (data,
3839                                   use, cand, address_p, depends_on, use->stmt,
3840                                   can_autoinc);
3841 }
3842
3843 /* Determines cost of basing replacement of USE on CAND in a generic
3844    expression.  */
3845
3846 static bool
3847 determine_use_iv_cost_generic (struct ivopts_data *data,
3848                                struct iv_use *use, struct iv_cand *cand)
3849 {
3850   bitmap depends_on;
3851   comp_cost cost;
3852
3853   /* The simple case first -- if we need to express value of the preserved
3854      original biv, the cost is 0.  This also prevents us from counting the
3855      cost of increment twice -- once at this use and once in the cost of
3856      the candidate.  */
3857   if (cand->pos == IP_ORIGINAL
3858       && cand->incremented_at == use->stmt)
3859     {
3860       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3861       return true;
3862     }
3863
3864   cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3865   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3866
3867   return !infinite_cost_p (cost);
3868 }
3869
3870 /* Determines cost of basing replacement of USE on CAND in an address.  */
3871
3872 static bool
3873 determine_use_iv_cost_address (struct ivopts_data *data,
3874                                struct iv_use *use, struct iv_cand *cand)
3875 {
3876   bitmap depends_on;
3877   bool can_autoinc;
3878   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3879                                          &can_autoinc);
3880
3881   if (cand->ainc_use == use)
3882     {
3883       if (can_autoinc)
3884         cost.cost -= cand->cost_step;
3885       /* If we generated the candidate solely for exploiting autoincrement
3886          opportunities, and it turns out it can't be used, set the cost to
3887          infinity to make sure we ignore it.  */
3888       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3889         cost = infinite_cost;
3890     }
3891   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3892
3893   return !infinite_cost_p (cost);
3894 }
3895
3896 /* Computes value of candidate CAND at position AT in iteration NITER, and
3897    stores it to VAL.  */
3898
3899 static void
3900 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3901                aff_tree *val)
3902 {
3903   aff_tree step, delta, nit;
3904   struct iv *iv = cand->iv;
3905   tree type = TREE_TYPE (iv->base);
3906   tree steptype = type;
3907   if (POINTER_TYPE_P (type))
3908     steptype = sizetype;
3909
3910   tree_to_aff_combination (iv->step, steptype, &step);
3911   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3912   aff_combination_convert (&nit, steptype);
3913   aff_combination_mult (&nit, &step, &delta);
3914   if (stmt_after_increment (loop, cand, at))
3915     aff_combination_add (&delta, &step);
3916
3917   tree_to_aff_combination (iv->base, type, val);
3918   aff_combination_add (val, &delta);
3919 }
3920
3921 /* Returns period of induction variable iv.  */
3922
3923 static tree
3924 iv_period (struct iv *iv)
3925 {
3926   tree step = iv->step, period, type;
3927   tree pow2div;
3928
3929   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3930
3931   /* Period of the iv is gcd (step, type range).  Since type range is power
3932      of two, it suffices to determine the maximum power of two that divides
3933      step.  */
3934   pow2div = num_ending_zeros (step);
3935   type = unsigned_type_for (TREE_TYPE (step));
3936
3937   period = build_low_bits_mask (type,
3938                                 (TYPE_PRECISION (type)
3939                                  - tree_low_cst (pow2div, 1)));
3940
3941   return period;
3942 }
3943
3944 /* Returns the comparison operator used when eliminating the iv USE.  */
3945
3946 static enum tree_code
3947 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3948 {
3949   struct loop *loop = data->current_loop;
3950   basic_block ex_bb;
3951   edge exit;
3952
3953   ex_bb = gimple_bb (use->stmt);
3954   exit = EDGE_SUCC (ex_bb, 0);
3955   if (flow_bb_inside_loop_p (loop, exit->dest))
3956     exit = EDGE_SUCC (ex_bb, 1);
3957
3958   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3959 }
3960
3961 /* Check whether it is possible to express the condition in USE by comparison
3962    of candidate CAND.  If so, store the value compared with to BOUND.  */
3963
3964 static bool
3965 may_eliminate_iv (struct ivopts_data *data,
3966                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3967 {
3968   basic_block ex_bb;
3969   edge exit;
3970   tree nit, period;
3971   struct loop *loop = data->current_loop;
3972   aff_tree bnd;
3973
3974   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3975     return false;
3976
3977   /* For now works only for exits that dominate the loop latch.
3978      TODO: extend to other conditions inside loop body.  */
3979   ex_bb = gimple_bb (use->stmt);
3980   if (use->stmt != last_stmt (ex_bb)
3981       || gimple_code (use->stmt) != GIMPLE_COND
3982       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3983     return false;
3984
3985   exit = EDGE_SUCC (ex_bb, 0);
3986   if (flow_bb_inside_loop_p (loop, exit->dest))
3987     exit = EDGE_SUCC (ex_bb, 1);
3988   if (flow_bb_inside_loop_p (loop, exit->dest))
3989     return false;
3990
3991   nit = niter_for_exit (data, exit);
3992   if (!nit)
3993     return false;
3994
3995   /* Determine whether we can use the variable to test the exit condition.
3996      This is the case iff the period of the induction variable is greater
3997      than the number of iterations for which the exit condition is true.  */
3998   period = iv_period (cand->iv);
3999
4000   /* If the number of iterations is constant, compare against it directly.  */
4001   if (TREE_CODE (nit) == INTEGER_CST)
4002     {
4003       if (!tree_int_cst_lt (nit, period))
4004         return false;
4005     }
4006
4007   /* If not, and if this is the only possible exit of the loop, see whether
4008      we can get a conservative estimate on the number of iterations of the
4009      entire loop and compare against that instead.  */
4010   else if (loop_only_exit_p (loop, exit))
4011     {
4012       double_int period_value, max_niter;
4013       if (!estimated_loop_iterations (loop, true, &max_niter))
4014         return false;
4015       period_value = tree_to_double_int (period);
4016       if (double_int_ucmp (max_niter, period_value) >= 0)
4017         return false;
4018     }
4019
4020   /* Otherwise, punt.  */
4021   else
4022     return false;
4023
4024   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4025
4026   *bound = aff_combination_to_tree (&bnd);
4027   /* It is unlikely that computing the number of iterations using division
4028      would be more profitable than keeping the original induction variable.  */
4029   if (expression_expensive_p (*bound))
4030     return false;
4031   return true;
4032 }
4033
4034 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4035
4036 static bool
4037 determine_use_iv_cost_condition (struct ivopts_data *data,
4038                                  struct iv_use *use, struct iv_cand *cand)
4039 {
4040   tree bound = NULL_TREE;
4041   struct iv *cmp_iv;
4042   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4043   comp_cost elim_cost, express_cost, cost;
4044   bool ok;
4045
4046   /* Only consider real candidates.  */
4047   if (!cand->iv)
4048     {
4049       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4050       return false;
4051     }
4052
4053   /* Try iv elimination.  */
4054   if (may_eliminate_iv (data, use, cand, &bound))
4055     {
4056       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4057       /* The bound is a loop invariant, so it will be only computed
4058          once.  */
4059       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4060     }
4061   else
4062     elim_cost = infinite_cost;
4063
4064   /* Try expressing the original giv.  If it is compared with an invariant,
4065      note that we cannot get rid of it.  */
4066   ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
4067   gcc_assert (ok);
4068
4069   express_cost = get_computation_cost (data, use, cand, false,
4070                                        &depends_on_express, NULL);
4071   fd_ivopts_data = data;
4072   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4073
4074   /* Choose the better approach, preferring the eliminated IV. */
4075   if (compare_costs (elim_cost, express_cost) <= 0)
4076     {
4077       cost = elim_cost;
4078       depends_on = depends_on_elim;
4079       depends_on_elim = NULL;
4080     }
4081   else
4082     {
4083       cost = express_cost;
4084       depends_on = depends_on_express;
4085       depends_on_express = NULL;
4086       bound = NULL_TREE;
4087     }
4088
4089   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4090
4091   if (depends_on_elim)
4092     BITMAP_FREE (depends_on_elim);
4093   if (depends_on_express)
4094     BITMAP_FREE (depends_on_express);
4095
4096   return !infinite_cost_p (cost);
4097 }
4098
4099 /* Determines cost of basing replacement of USE on CAND.  Returns false
4100    if USE cannot be based on CAND.  */
4101
4102 static bool
4103 determine_use_iv_cost (struct ivopts_data *data,
4104                        struct iv_use *use, struct iv_cand *cand)
4105 {
4106   switch (use->type)
4107     {
4108     case USE_NONLINEAR_EXPR:
4109       return determine_use_iv_cost_generic (data, use, cand);
4110
4111     case USE_ADDRESS:
4112       return determine_use_iv_cost_address (data, use, cand);
4113
4114     case USE_COMPARE:
4115       return determine_use_iv_cost_condition (data, use, cand);
4116
4117     default:
4118       gcc_unreachable ();
4119     }
4120 }
4121
4122 /* Return true if get_computation_cost indicates that autoincrement is
4123    a possibility for the pair of USE and CAND, false otherwise.  */
4124
4125 static bool
4126 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4127                            struct iv_cand *cand)
4128 {
4129   bitmap depends_on;
4130   bool can_autoinc;
4131   comp_cost cost;
4132
4133   if (use->type != USE_ADDRESS)
4134     return false;
4135
4136   cost = get_computation_cost (data, use, cand, true, &depends_on,
4137                                &can_autoinc);
4138
4139   BITMAP_FREE (depends_on);
4140
4141   return !infinite_cost_p (cost) && can_autoinc;
4142 }
4143
4144 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4145    use that allows autoincrement, and set their AINC_USE if possible.  */
4146
4147 static void
4148 set_autoinc_for_original_candidates (struct ivopts_data *data)
4149 {
4150   unsigned i, j;
4151
4152   for (i = 0; i < n_iv_cands (data); i++)
4153     {
4154       struct iv_cand *cand = iv_cand (data, i);
4155       struct iv_use *closest = NULL;
4156       if (cand->pos != IP_ORIGINAL)
4157         continue;
4158       for (j = 0; j < n_iv_uses (data); j++)
4159         {
4160           struct iv_use *use = iv_use (data, j);
4161           unsigned uid = gimple_uid (use->stmt);
4162           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4163               || uid > gimple_uid (cand->incremented_at))
4164             continue;
4165           if (closest == NULL || uid > gimple_uid (closest->stmt))
4166             closest = use;
4167         }
4168       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4169         continue;
4170       cand->ainc_use = closest;
4171     }
4172 }
4173
4174 /* Finds the candidates for the induction variables.  */
4175
4176 static void
4177 find_iv_candidates (struct ivopts_data *data)
4178 {
4179   /* Add commonly used ivs.  */
4180   add_standard_iv_candidates (data);
4181
4182   /* Add old induction variables.  */
4183   add_old_ivs_candidates (data);
4184
4185   /* Add induction variables derived from uses.  */
4186   add_derived_ivs_candidates (data);
4187
4188   set_autoinc_for_original_candidates (data);
4189
4190   /* Record the important candidates.  */
4191   record_important_candidates (data);
4192 }
4193
4194 /* Determines costs of basing the use of the iv on an iv candidate.  */
4195
4196 static void
4197 determine_use_iv_costs (struct ivopts_data *data)
4198 {
4199   unsigned i, j;
4200   struct iv_use *use;
4201   struct iv_cand *cand;
4202   bitmap to_clear = BITMAP_ALLOC (NULL);
4203
4204   alloc_use_cost_map (data);
4205
4206   for (i = 0; i < n_iv_uses (data); i++)
4207     {
4208       use = iv_use (data, i);
4209
4210       if (data->consider_all_candidates)
4211         {
4212           for (j = 0; j < n_iv_cands (data); j++)
4213             {
4214               cand = iv_cand (data, j);
4215               determine_use_iv_cost (data, use, cand);
4216             }
4217         }
4218       else
4219         {
4220           bitmap_iterator bi;
4221
4222           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4223             {
4224               cand = iv_cand (data, j);
4225               if (!determine_use_iv_cost (data, use, cand))
4226                 bitmap_set_bit (to_clear, j);
4227             }
4228
4229           /* Remove the candidates for that the cost is infinite from
4230              the list of related candidates.  */
4231           bitmap_and_compl_into (use->related_cands, to_clear);
4232           bitmap_clear (to_clear);
4233         }
4234     }
4235
4236   BITMAP_FREE (to_clear);
4237
4238   if (dump_file && (dump_flags & TDF_DETAILS))
4239     {
4240       fprintf (dump_file, "Use-candidate costs:\n");
4241
4242       for (i = 0; i < n_iv_uses (data); i++)
4243         {
4244           use = iv_use (data, i);
4245
4246           fprintf (dump_file, "Use %d:\n", i);
4247           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4248           for (j = 0; j < use->n_map_members; j++)
4249             {
4250               if (!use->cost_map[j].cand
4251                   || infinite_cost_p (use->cost_map[j].cost))
4252                 continue;
4253
4254               fprintf (dump_file, "  %d\t%d\t%d\t",
4255                        use->cost_map[j].cand->id,
4256                        use->cost_map[j].cost.cost,
4257                        use->cost_map[j].cost.complexity);
4258               if (use->cost_map[j].depends_on)
4259                 bitmap_print (dump_file,
4260                               use->cost_map[j].depends_on, "","");
4261               fprintf (dump_file, "\n");
4262             }
4263
4264           fprintf (dump_file, "\n");
4265         }
4266       fprintf (dump_file, "\n");
4267     }
4268 }
4269
4270 /* Determines cost of the candidate CAND.  */
4271
4272 static void
4273 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4274 {
4275   comp_cost cost_base;
4276   unsigned cost, cost_step;
4277   tree base;
4278
4279   if (!cand->iv)
4280     {
4281       cand->cost = 0;
4282       return;
4283     }
4284
4285   /* There are two costs associated with the candidate -- its increment
4286      and its initialization.  The second is almost negligible for any loop
4287      that rolls enough, so we take it just very little into account.  */
4288
4289   base = cand->iv->base;
4290   cost_base = force_var_cost (data, base, NULL);
4291   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4292
4293   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4294
4295   /* Prefer the original ivs unless we may gain something by replacing it.
4296      The reason is to make debugging simpler; so this is not relevant for
4297      artificial ivs created by other optimization passes.  */
4298   if (cand->pos != IP_ORIGINAL
4299       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4300     cost++;
4301
4302   /* Prefer not to insert statements into latch unless there are some
4303      already (so that we do not create unnecessary jumps).  */
4304   if (cand->pos == IP_END
4305       && empty_block_p (ip_end_pos (data->current_loop)))
4306     cost++;
4307
4308   cand->cost = cost;
4309   cand->cost_step = cost_step;
4310 }
4311
4312 /* Determines costs of computation of the candidates.  */
4313
4314 static void
4315 determine_iv_costs (struct ivopts_data *data)
4316 {
4317   unsigned i;
4318
4319   if (dump_file && (dump_flags & TDF_DETAILS))
4320     {
4321       fprintf (dump_file, "Candidate costs:\n");
4322       fprintf (dump_file, "  cand\tcost\n");
4323     }
4324
4325   for (i = 0; i < n_iv_cands (data); i++)
4326     {
4327       struct iv_cand *cand = iv_cand (data, i);
4328
4329       determine_iv_cost (data, cand);
4330
4331       if (dump_file && (dump_flags & TDF_DETAILS))
4332         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4333     }
4334
4335   if (dump_file && (dump_flags & TDF_DETAILS))
4336     fprintf (dump_file, "\n");
4337 }
4338
4339 /* Calculates cost for having SIZE induction variables.  */
4340
4341 static unsigned
4342 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4343 {
4344   /* We add size to the cost, so that we prefer eliminating ivs
4345      if possible.  */
4346   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4347 }
4348
4349 /* For each size of the induction variable set determine the penalty.  */
4350
4351 static void
4352 determine_set_costs (struct ivopts_data *data)
4353 {
4354   unsigned j, n;
4355   gimple phi;
4356   gimple_stmt_iterator psi;
4357   tree op;
4358   struct loop *loop = data->current_loop;
4359   bitmap_iterator bi;
4360
4361   /* We use the following model (definitely improvable, especially the
4362      cost function -- TODO):
4363
4364      We estimate the number of registers available (using MD data), name it A.
4365
4366      We estimate the number of registers used by the loop, name it U.  This
4367      number is obtained as the number of loop phi nodes (not counting virtual
4368      registers and bivs) + the number of variables from outside of the loop.
4369
4370      We set a reserve R (free regs that are used for temporary computations,
4371      etc.).  For now the reserve is a constant 3.
4372
4373      Let I be the number of induction variables.
4374
4375      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4376         make a lot of ivs without a reason).
4377      -- if A - R < U + I <= A, the cost is I * PRES_COST
4378      -- if U + I > A, the cost is I * PRES_COST and
4379         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4380
4381   if (dump_file && (dump_flags & TDF_DETAILS))
4382     {
4383       fprintf (dump_file, "Global costs:\n");
4384       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4385       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4386       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4387     }
4388
4389   n = 0;
4390   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4391     {
4392       phi = gsi_stmt (psi);
4393       op = PHI_RESULT (phi);
4394
4395       if (!is_gimple_reg (op))
4396         continue;
4397
4398       if (get_iv (data, op))
4399         continue;
4400
4401       n++;
4402     }
4403
4404   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4405     {
4406       struct version_info *info = ver_info (data, j);
4407
4408       if (info->inv_id && info->has_nonlin_use)
4409         n++;
4410     }
4411
4412   data->regs_used = n;
4413   if (dump_file && (dump_flags & TDF_DETAILS))
4414     fprintf (dump_file, "  regs_used %d\n", n);
4415
4416   if (dump_file && (dump_flags & TDF_DETAILS))
4417     {
4418       fprintf (dump_file, "  cost for size:\n");
4419       fprintf (dump_file, "  ivs\tcost\n");
4420       for (j = 0; j <= 2 * target_avail_regs; j++)
4421         fprintf (dump_file, "  %d\t%d\n", j,
4422                  ivopts_global_cost_for_size (data, j));
4423       fprintf (dump_file, "\n");
4424     }
4425 }
4426
4427 /* Returns true if A is a cheaper cost pair than B.  */
4428
4429 static bool
4430 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4431 {
4432   int cmp;
4433
4434   if (!a)
4435     return false;
4436
4437   if (!b)
4438     return true;
4439
4440   cmp = compare_costs (a->cost, b->cost);
4441   if (cmp < 0)
4442     return true;
4443
4444   if (cmp > 0)
4445     return false;
4446
4447   /* In case the costs are the same, prefer the cheaper candidate.  */
4448   if (a->cand->cost < b->cand->cost)
4449     return true;
4450
4451   return false;
4452 }
4453
4454 /* Computes the cost field of IVS structure.  */
4455
4456 static void
4457 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4458 {
4459   comp_cost cost = ivs->cand_use_cost;
4460   cost.cost += ivs->cand_cost;
4461   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4462
4463   ivs->cost = cost;
4464 }
4465
4466 /* Remove invariants in set INVS to set IVS.  */
4467
4468 static void
4469 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4470 {
4471   bitmap_iterator bi;
4472   unsigned iid;
4473
4474   if (!invs)
4475     return;
4476
4477   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4478     {
4479       ivs->n_invariant_uses[iid]--;
4480       if (ivs->n_invariant_uses[iid] == 0)
4481         ivs->n_regs--;
4482     }
4483 }
4484
4485 /* Set USE not to be expressed by any candidate in IVS.  */
4486
4487 static void
4488 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4489                  struct iv_use *use)
4490 {
4491   unsigned uid = use->id, cid;
4492   struct cost_pair *cp;
4493
4494   cp = ivs->cand_for_use[uid];
4495   if (!cp)
4496     return;
4497   cid = cp->cand->id;
4498
4499   ivs->bad_uses++;
4500   ivs->cand_for_use[uid] = NULL;
4501   ivs->n_cand_uses[cid]--;
4502
4503   if (ivs->n_cand_uses[cid] == 0)
4504     {
4505       bitmap_clear_bit (ivs->cands, cid);
4506       /* Do not count the pseudocandidates.  */
4507       if (cp->cand->iv)
4508         ivs->n_regs--;
4509       ivs->n_cands--;
4510       ivs->cand_cost -= cp->cand->cost;
4511
4512       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4513     }
4514
4515   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4516
4517   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4518   iv_ca_recount_cost (data, ivs);
4519 }
4520
4521 /* Add invariants in set INVS to set IVS.  */
4522
4523 static void
4524 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4525 {
4526   bitmap_iterator bi;
4527   unsigned iid;
4528
4529   if (!invs)
4530     return;
4531
4532   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4533     {
4534       ivs->n_invariant_uses[iid]++;
4535       if (ivs->n_invariant_uses[iid] == 1)
4536         ivs->n_regs++;
4537     }
4538 }
4539
4540 /* Set cost pair for USE in set IVS to CP.  */
4541
4542 static void
4543 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4544               struct iv_use *use, struct cost_pair *cp)
4545 {
4546   unsigned uid = use->id, cid;
4547
4548   if (ivs->cand_for_use[uid] == cp)
4549     return;
4550
4551   if (ivs->cand_for_use[uid])
4552     iv_ca_set_no_cp (data, ivs, use);
4553
4554   if (cp)
4555     {
4556       cid = cp->cand->id;
4557
4558       ivs->bad_uses--;
4559       ivs->cand_for_use[uid] = cp;
4560       ivs->n_cand_uses[cid]++;
4561       if (ivs->n_cand_uses[cid] == 1)
4562         {
4563           bitmap_set_bit (ivs->cands, cid);
4564           /* Do not count the pseudocandidates.  */
4565           if (cp->cand->iv)
4566             ivs->n_regs++;
4567           ivs->n_cands++;
4568           ivs->cand_cost += cp->cand->cost;
4569
4570           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4571         }
4572
4573       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4574       iv_ca_set_add_invariants (ivs, cp->depends_on);
4575       iv_ca_recount_cost (data, ivs);
4576     }
4577 }
4578
4579 /* Extend set IVS by expressing USE by some of the candidates in it
4580    if possible.  */
4581
4582 static void
4583 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4584                struct iv_use *use)
4585 {
4586   struct cost_pair *best_cp = NULL, *cp;
4587   bitmap_iterator bi;
4588   unsigned i;
4589
4590   gcc_assert (ivs->upto >= use->id);
4591
4592   if (ivs->upto == use->id)
4593     {
4594       ivs->upto++;
4595       ivs->bad_uses++;
4596     }
4597
4598   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4599     {
4600       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4601
4602       if (cheaper_cost_pair (cp, best_cp))
4603         best_cp = cp;
4604     }
4605
4606   iv_ca_set_cp (data, ivs, use, best_cp);
4607 }
4608
4609 /* Get cost for assignment IVS.  */
4610
4611 static comp_cost
4612 iv_ca_cost (struct iv_ca *ivs)
4613 {
4614   /* This was a conditional expression but it triggered a bug in
4615      Sun C 5.5.  */
4616   if (ivs->bad_uses)
4617     return infinite_cost;
4618   else
4619     return ivs->cost;
4620 }
4621
4622 /* Returns true if all dependences of CP are among invariants in IVS.  */
4623
4624 static bool
4625 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4626 {
4627   unsigned i;
4628   bitmap_iterator bi;
4629
4630   if (!cp->depends_on)
4631     return true;
4632
4633   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4634     {
4635       if (ivs->n_invariant_uses[i] == 0)
4636         return false;
4637     }
4638
4639   return true;
4640 }
4641
4642 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4643    it before NEXT_CHANGE.  */
4644
4645 static struct iv_ca_delta *
4646 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4647                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4648 {
4649   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4650
4651   change->use = use;
4652   change->old_cp = old_cp;
4653   change->new_cp = new_cp;
4654   change->next_change = next_change;
4655
4656   return change;
4657 }
4658
4659 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4660    are rewritten.  */
4661
4662 static struct iv_ca_delta *
4663 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4664 {
4665   struct iv_ca_delta *last;
4666
4667   if (!l2)
4668     return l1;
4669
4670   if (!l1)
4671     return l2;
4672
4673   for (last = l1; last->next_change; last = last->next_change)
4674     continue;
4675   last->next_change = l2;
4676
4677   return l1;
4678 }
4679
4680 /* Returns candidate by that USE is expressed in IVS.  */
4681
4682 static struct cost_pair *
4683 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4684 {
4685   return ivs->cand_for_use[use->id];
4686 }
4687
4688 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4689
4690 static struct iv_ca_delta *
4691 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4692 {
4693   struct iv_ca_delta *act, *next, *prev = NULL;
4694   struct cost_pair *tmp;
4695
4696   for (act = delta; act; act = next)
4697     {
4698       next = act->next_change;
4699       act->next_change = prev;
4700       prev = act;
4701
4702       tmp = act->old_cp;
4703       act->old_cp = act->new_cp;
4704       act->new_cp = tmp;
4705     }
4706
4707   return prev;
4708 }
4709
4710 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4711    reverted instead.  */
4712
4713 static void
4714 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4715                     struct iv_ca_delta *delta, bool forward)
4716 {
4717   struct cost_pair *from, *to;
4718   struct iv_ca_delta *act;
4719
4720   if (!forward)
4721     delta = iv_ca_delta_reverse (delta);
4722
4723   for (act = delta; act; act = act->next_change)
4724     {
4725       from = act->old_cp;
4726       to = act->new_cp;
4727       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4728       iv_ca_set_cp (data, ivs, act->use, to);
4729     }
4730
4731   if (!forward)
4732     iv_ca_delta_reverse (delta);
4733 }
4734
4735 /* Returns true if CAND is used in IVS.  */
4736
4737 static bool
4738 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4739 {
4740   return ivs->n_cand_uses[cand->id] > 0;
4741 }
4742
4743 /* Returns number of induction variable candidates in the set IVS.  */
4744
4745 static unsigned
4746 iv_ca_n_cands (struct iv_ca *ivs)
4747 {
4748   return ivs->n_cands;
4749 }
4750
4751 /* Free the list of changes DELTA.  */
4752
4753 static void
4754 iv_ca_delta_free (struct iv_ca_delta **delta)
4755 {
4756   struct iv_ca_delta *act, *next;
4757
4758   for (act = *delta; act; act = next)
4759     {
4760       next = act->next_change;
4761       free (act);
4762     }
4763
4764   *delta = NULL;
4765 }
4766
4767 /* Allocates new iv candidates assignment.  */
4768
4769 static struct iv_ca *
4770 iv_ca_new (struct ivopts_data *data)
4771 {
4772   struct iv_ca *nw = XNEW (struct iv_ca);
4773
4774   nw->upto = 0;
4775   nw->bad_uses = 0;
4776   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4777   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4778   nw->cands = BITMAP_ALLOC (NULL);
4779   nw->n_cands = 0;
4780   nw->n_regs = 0;
4781   nw->cand_use_cost = zero_cost;
4782   nw->cand_cost = 0;
4783   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4784   nw->cost = zero_cost;
4785
4786   return nw;
4787 }
4788
4789 /* Free memory occupied by the set IVS.  */
4790
4791 static void
4792 iv_ca_free (struct iv_ca **ivs)
4793 {
4794   free ((*ivs)->cand_for_use);
4795   free ((*ivs)->n_cand_uses);
4796   BITMAP_FREE ((*ivs)->cands);
4797   free ((*ivs)->n_invariant_uses);
4798   free (*ivs);
4799   *ivs = NULL;
4800 }
4801
4802 /* Dumps IVS to FILE.  */
4803
4804 static void
4805 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4806 {
4807   const char *pref = "  invariants ";
4808   unsigned i;
4809   comp_cost cost = iv_ca_cost (ivs);
4810
4811   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4812   bitmap_print (file, ivs->cands, "  candidates ","\n");
4813
4814   for (i = 1; i <= data->max_inv_id; i++)
4815     if (ivs->n_invariant_uses[i])
4816       {
4817         fprintf (file, "%s%d", pref, i);
4818         pref = ", ";
4819       }
4820   fprintf (file, "\n");
4821 }
4822
4823 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4824    new set, and store differences in DELTA.  Number of induction variables
4825    in the new set is stored to N_IVS.  */
4826
4827 static comp_cost
4828 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4829               struct iv_cand *cand, struct iv_ca_delta **delta,
4830               unsigned *n_ivs)
4831 {
4832   unsigned i;
4833   comp_cost cost;
4834   struct iv_use *use;
4835   struct cost_pair *old_cp, *new_cp;
4836
4837   *delta = NULL;
4838   for (i = 0; i < ivs->upto; i++)
4839     {
4840       use = iv_use (data, i);
4841       old_cp = iv_ca_cand_for_use (ivs, use);
4842
4843       if (old_cp
4844           && old_cp->cand == cand)
4845         continue;
4846
4847       new_cp = get_use_iv_cost (data, use, cand);
4848       if (!new_cp)
4849         continue;
4850
4851       if (!iv_ca_has_deps (ivs, new_cp))
4852         continue;
4853
4854       if (!cheaper_cost_pair (new_cp, old_cp))
4855         continue;
4856
4857       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4858     }
4859
4860   iv_ca_delta_commit (data, ivs, *delta, true);
4861   cost = iv_ca_cost (ivs);
4862   if (n_ivs)
4863     *n_ivs = iv_ca_n_cands (ivs);
4864   iv_ca_delta_commit (data, ivs, *delta, false);
4865
4866   return cost;
4867 }
4868
4869 /* Try narrowing set IVS by removing CAND.  Return the cost of
4870    the new set and store the differences in DELTA.  */
4871
4872 static comp_cost
4873 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4874               struct iv_cand *cand, struct iv_ca_delta **delta)
4875 {
4876   unsigned i, ci;
4877   struct iv_use *use;
4878   struct cost_pair *old_cp, *new_cp, *cp;
4879   bitmap_iterator bi;
4880   struct iv_cand *cnd;
4881   comp_cost cost;
4882
4883   *delta = NULL;
4884   for (i = 0; i < n_iv_uses (data); i++)
4885     {
4886       use = iv_use (data, i);
4887
4888       old_cp = iv_ca_cand_for_use (ivs, use);
4889       if (old_cp->cand != cand)
4890         continue;
4891
4892       new_cp = NULL;
4893
4894       if (data->consider_all_candidates)
4895         {
4896           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4897             {
4898               if (ci == cand->id)
4899                 continue;
4900
4901               cnd = iv_cand (data, ci);
4902
4903               cp = get_use_iv_cost (data, use, cnd);
4904               if (!cp)
4905                 continue;
4906               if (!iv_ca_has_deps (ivs, cp))
4907                 continue;
4908
4909               if (!cheaper_cost_pair (cp, new_cp))
4910                 continue;
4911
4912               new_cp = cp;
4913             }
4914         }
4915       else
4916         {
4917           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4918             {
4919               if (ci == cand->id)
4920                 continue;
4921
4922               cnd = iv_cand (data, ci);
4923
4924               cp = get_use_iv_cost (data, use, cnd);
4925               if (!cp)
4926                 continue;
4927               if (!iv_ca_has_deps (ivs, cp))
4928                 continue;
4929
4930               if (!cheaper_cost_pair (cp, new_cp))
4931                 continue;
4932
4933               new_cp = cp;
4934             }
4935         }
4936
4937       if (!new_cp)
4938         {
4939           iv_ca_delta_free (delta);
4940           return infinite_cost;
4941         }
4942
4943       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4944     }
4945
4946   iv_ca_delta_commit (data, ivs, *delta, true);
4947   cost = iv_ca_cost (ivs);
4948   iv_ca_delta_commit (data, ivs, *delta, false);
4949
4950   return cost;
4951 }
4952
4953 /* Try optimizing the set of candidates IVS by removing candidates different
4954    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4955    differences in DELTA.  */
4956
4957 static comp_cost
4958 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4959              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4960 {
4961   bitmap_iterator bi;
4962   struct iv_ca_delta *act_delta, *best_delta;
4963   unsigned i;
4964   comp_cost best_cost, acost;
4965   struct iv_cand *cand;
4966
4967   best_delta = NULL;
4968   best_cost = iv_ca_cost (ivs);
4969
4970   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4971     {
4972       cand = iv_cand (data, i);
4973
4974       if (cand == except_cand)
4975         continue;
4976
4977       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4978
4979       if (compare_costs (acost, best_cost) < 0)
4980         {
4981           best_cost = acost;
4982           iv_ca_delta_free (&best_delta);
4983           best_delta = act_delta;
4984         }
4985       else
4986         iv_ca_delta_free (&act_delta);
4987     }
4988
4989   if (!best_delta)
4990     {
4991       *delta = NULL;
4992       return best_cost;
4993     }
4994
4995   /* Recurse to possibly remove other unnecessary ivs.  */
4996   iv_ca_delta_commit (data, ivs, best_delta, true);
4997   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4998   iv_ca_delta_commit (data, ivs, best_delta, false);
4999   *delta = iv_ca_delta_join (best_delta, *delta);
5000   return best_cost;
5001 }
5002
5003 /* Tries to extend the sets IVS in the best possible way in order
5004    to express the USE.  */
5005
5006 static bool
5007 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5008                   struct iv_use *use)
5009 {
5010   comp_cost best_cost, act_cost;
5011   unsigned i;
5012   bitmap_iterator bi;
5013   struct iv_cand *cand;
5014   struct iv_ca_delta *best_delta = NULL, *act_delta;
5015   struct cost_pair *cp;
5016
5017   iv_ca_add_use (data, ivs, use);
5018   best_cost = iv_ca_cost (ivs);
5019
5020   cp = iv_ca_cand_for_use (ivs, use);
5021   if (cp)
5022     {
5023       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5024       iv_ca_set_no_cp (data, ivs, use);
5025     }
5026
5027   /* First try important candidates not based on any memory object.  Only if
5028      this fails, try the specific ones.  Rationale -- in loops with many
5029      variables the best choice often is to use just one generic biv.  If we
5030      added here many ivs specific to the uses, the optimization algorithm later
5031      would be likely to get stuck in a local minimum, thus causing us to create
5032      too many ivs.  The approach from few ivs to more seems more likely to be
5033      successful -- starting from few ivs, replacing an expensive use by a
5034      specific iv should always be a win.  */
5035   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5036     {
5037       cand = iv_cand (data, i);
5038
5039       if (cand->iv->base_object != NULL_TREE)
5040         continue;
5041
5042       if (iv_ca_cand_used_p (ivs, cand))
5043         continue;
5044
5045       cp = get_use_iv_cost (data, use, cand);
5046       if (!cp)
5047         continue;
5048
5049       iv_ca_set_cp (data, ivs, use, cp);
5050       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5051       iv_ca_set_no_cp (data, ivs, use);
5052       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5053
5054       if (compare_costs (act_cost, best_cost) < 0)
5055         {
5056           best_cost = act_cost;
5057
5058           iv_ca_delta_free (&best_delta);
5059           best_delta = act_delta;
5060         }
5061       else
5062         iv_ca_delta_free (&act_delta);
5063     }
5064
5065   if (infinite_cost_p (best_cost))
5066     {
5067       for (i = 0; i < use->n_map_members; i++)
5068         {
5069           cp = use->cost_map + i;
5070           cand = cp->cand;
5071           if (!cand)
5072             continue;
5073
5074           /* Already tried this.  */
5075           if (cand->important && cand->iv->base_object == NULL_TREE)
5076             continue;
5077
5078           if (iv_ca_cand_used_p (ivs, cand))
5079             continue;
5080
5081           act_delta = NULL;
5082           iv_ca_set_cp (data, ivs, use, cp);
5083           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5084           iv_ca_set_no_cp (data, ivs, use);
5085           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5086                                        cp, act_delta);
5087
5088           if (compare_costs (act_cost, best_cost) < 0)
5089             {
5090               best_cost = act_cost;
5091
5092               if (best_delta)
5093                 iv_ca_delta_free (&best_delta);
5094               best_delta = act_delta;
5095             }
5096           else
5097             iv_ca_delta_free (&act_delta);
5098         }
5099     }
5100
5101   iv_ca_delta_commit (data, ivs, best_delta, true);
5102   iv_ca_delta_free (&best_delta);
5103
5104   return !infinite_cost_p (best_cost);
5105 }
5106
5107 /* Finds an initial assignment of candidates to uses.  */
5108
5109 static struct iv_ca *
5110 get_initial_solution (struct ivopts_data *data)
5111 {
5112   struct iv_ca *ivs = iv_ca_new (data);
5113   unsigned i;
5114
5115   for (i = 0; i < n_iv_uses (data); i++)
5116     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5117       {
5118         iv_ca_free (&ivs);
5119         return NULL;
5120       }
5121
5122   return ivs;
5123 }
5124
5125 /* Tries to improve set of induction variables IVS.  */
5126
5127 static bool
5128 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5129 {
5130   unsigned i, n_ivs;
5131   comp_cost acost, best_cost = iv_ca_cost (ivs);
5132   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5133   struct iv_cand *cand;
5134
5135   /* Try extending the set of induction variables by one.  */
5136   for (i = 0; i < n_iv_cands (data); i++)
5137     {
5138       cand = iv_cand (data, i);
5139
5140       if (iv_ca_cand_used_p (ivs, cand))
5141         continue;
5142
5143       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5144       if (!act_delta)
5145         continue;
5146
5147       /* If we successfully added the candidate and the set is small enough,
5148          try optimizing it by removing other candidates.  */
5149       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5150         {
5151           iv_ca_delta_commit (data, ivs, act_delta, true);
5152           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5153           iv_ca_delta_commit (data, ivs, act_delta, false);
5154           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5155         }
5156
5157       if (compare_costs (acost, best_cost) < 0)
5158         {
5159           best_cost = acost;
5160           iv_ca_delta_free (&best_delta);
5161           best_delta = act_delta;
5162         }
5163       else
5164         iv_ca_delta_free (&act_delta);
5165     }
5166
5167   if (!best_delta)
5168     {
5169       /* Try removing the candidates from the set instead.  */
5170       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5171
5172       /* Nothing more we can do.  */
5173       if (!best_delta)
5174         return false;
5175     }
5176
5177   iv_ca_delta_commit (data, ivs, best_delta, true);
5178   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5179   iv_ca_delta_free (&best_delta);
5180   return true;
5181 }
5182
5183 /* Attempts to find the optimal set of induction variables.  We do simple
5184    greedy heuristic -- we try to replace at most one candidate in the selected
5185    solution and remove the unused ivs while this improves the cost.  */
5186
5187 static struct iv_ca *
5188 find_optimal_iv_set (struct ivopts_data *data)
5189 {
5190   unsigned i;
5191   struct iv_ca *set;
5192   struct iv_use *use;
5193
5194   /* Get the initial solution.  */
5195   set = get_initial_solution (data);
5196   if (!set)
5197     {
5198       if (dump_file && (dump_flags & TDF_DETAILS))
5199         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5200       return NULL;
5201     }
5202
5203   if (dump_file && (dump_flags & TDF_DETAILS))
5204     {
5205       fprintf (dump_file, "Initial set of candidates:\n");
5206       iv_ca_dump (data, dump_file, set);
5207     }
5208
5209   while (try_improve_iv_set (data, set))
5210     {
5211       if (dump_file && (dump_flags & TDF_DETAILS))
5212         {
5213           fprintf (dump_file, "Improved to:\n");
5214           iv_ca_dump (data, dump_file, set);
5215         }
5216     }
5217
5218   if (dump_file && (dump_flags & TDF_DETAILS))
5219     {
5220       comp_cost cost = iv_ca_cost (set);
5221       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5222     }
5223
5224   for (i = 0; i < n_iv_uses (data); i++)
5225     {
5226       use = iv_use (data, i);
5227       use->selected = iv_ca_cand_for_use (set, use)->cand;
5228     }
5229
5230   return set;
5231 }
5232
5233 /* Creates a new induction variable corresponding to CAND.  */
5234
5235 static void
5236 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5237 {
5238   gimple_stmt_iterator incr_pos;
5239   tree base;
5240   bool after = false;
5241
5242   if (!cand->iv)
5243     return;
5244
5245   switch (cand->pos)
5246     {
5247     case IP_NORMAL:
5248       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5249       break;
5250
5251     case IP_END:
5252       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5253       after = true;
5254       break;
5255
5256     case IP_AFTER_USE:
5257       after = true;
5258       /* fall through */
5259     case IP_BEFORE_USE:
5260       incr_pos = gsi_for_stmt (cand->incremented_at);
5261       break;
5262
5263     case IP_ORIGINAL:
5264       /* Mark that the iv is preserved.  */
5265       name_info (data, cand->var_before)->preserve_biv = true;
5266       name_info (data, cand->var_after)->preserve_biv = true;
5267
5268       /* Rewrite the increment so that it uses var_before directly.  */
5269       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5270
5271       return;
5272     }
5273
5274   gimple_add_tmp_var (cand->var_before);
5275   add_referenced_var (cand->var_before);
5276
5277   base = unshare_expr (cand->iv->base);
5278
5279   create_iv (base, unshare_expr (cand->iv->step),
5280              cand->var_before, data->current_loop,
5281              &incr_pos, after, &cand->var_before, &cand->var_after);
5282 }
5283
5284 /* Creates new induction variables described in SET.  */
5285
5286 static void
5287 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5288 {
5289   unsigned i;
5290   struct iv_cand *cand;
5291   bitmap_iterator bi;
5292
5293   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5294     {
5295       cand = iv_cand (data, i);
5296       create_new_iv (data, cand);
5297     }
5298 }
5299
5300
5301 /* Rewrites USE (definition of iv used in a nonlinear expression)
5302    using candidate CAND.  */
5303
5304 static void
5305 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5306                             struct iv_use *use, struct iv_cand *cand)
5307 {
5308   tree comp;
5309   tree op, tgt;
5310   gimple ass;
5311   gimple_stmt_iterator bsi;
5312
5313   /* An important special case -- if we are asked to express value of
5314      the original iv by itself, just exit; there is no need to
5315      introduce a new computation (that might also need casting the
5316      variable to unsigned and back).  */
5317   if (cand->pos == IP_ORIGINAL
5318       && cand->incremented_at == use->stmt)
5319     {
5320       tree step, ctype, utype;
5321       enum tree_code incr_code = PLUS_EXPR, old_code;
5322
5323       gcc_assert (is_gimple_assign (use->stmt));
5324       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5325
5326       step = cand->iv->step;
5327       ctype = TREE_TYPE (step);
5328       utype = TREE_TYPE (cand->var_after);
5329       if (TREE_CODE (step) == NEGATE_EXPR)
5330         {
5331           incr_code = MINUS_EXPR;
5332           step = TREE_OPERAND (step, 0);
5333         }
5334
5335       /* Check whether we may leave the computation unchanged.
5336          This is the case only if it does not rely on other
5337          computations in the loop -- otherwise, the computation
5338          we rely upon may be removed in remove_unused_ivs,
5339          thus leading to ICE.  */
5340       old_code = gimple_assign_rhs_code (use->stmt);
5341       if (old_code == PLUS_EXPR
5342           || old_code == MINUS_EXPR
5343           || old_code == POINTER_PLUS_EXPR)
5344         {
5345           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5346             op = gimple_assign_rhs2 (use->stmt);
5347           else if (old_code != MINUS_EXPR
5348                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5349             op = gimple_assign_rhs1 (use->stmt);
5350           else
5351             op = NULL_TREE;
5352         }
5353       else
5354         op = NULL_TREE;
5355
5356       if (op
5357           && (TREE_CODE (op) == INTEGER_CST
5358               || operand_equal_p (op, step, 0)))
5359         return;
5360
5361       /* Otherwise, add the necessary computations to express
5362          the iv.  */
5363       op = fold_convert (ctype, cand->var_before);
5364       comp = fold_convert (utype,
5365                            build2 (incr_code, ctype, op,
5366                                    unshare_expr (step)));
5367     }
5368   else
5369     {
5370       comp = get_computation (data->current_loop, use, cand);
5371       gcc_assert (comp != NULL_TREE);
5372     }
5373
5374   switch (gimple_code (use->stmt))
5375     {
5376     case GIMPLE_PHI:
5377       tgt = PHI_RESULT (use->stmt);
5378
5379       /* If we should keep the biv, do not replace it.  */
5380       if (name_info (data, tgt)->preserve_biv)
5381         return;
5382
5383       bsi = gsi_after_labels (gimple_bb (use->stmt));
5384       break;
5385
5386     case GIMPLE_ASSIGN:
5387       tgt = gimple_assign_lhs (use->stmt);
5388       bsi = gsi_for_stmt (use->stmt);
5389       break;
5390
5391     default:
5392       gcc_unreachable ();
5393     }
5394
5395   op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5396                                  true, GSI_SAME_STMT);
5397
5398   if (gimple_code (use->stmt) == GIMPLE_PHI)
5399     {
5400       ass = gimple_build_assign (tgt, op);
5401       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5402
5403       bsi = gsi_for_stmt (use->stmt);
5404       remove_phi_node (&bsi, false);
5405     }
5406   else
5407     {
5408       gimple_assign_set_rhs_from_tree (&bsi, op);
5409       use->stmt = gsi_stmt (bsi);
5410     }
5411 }
5412
5413 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5414    for_each_index.  */
5415
5416 static bool
5417 idx_remove_ssa_names (tree base, tree *idx,
5418                       void *data ATTRIBUTE_UNUSED)
5419 {
5420   tree *op;
5421
5422   if (TREE_CODE (*idx) == SSA_NAME)
5423     *idx = SSA_NAME_VAR (*idx);
5424
5425   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5426     {
5427       op = &TREE_OPERAND (base, 2);
5428       if (*op
5429           && TREE_CODE (*op) == SSA_NAME)
5430         *op = SSA_NAME_VAR (*op);
5431       op = &TREE_OPERAND (base, 3);
5432       if (*op
5433           && TREE_CODE (*op) == SSA_NAME)
5434         *op = SSA_NAME_VAR (*op);
5435     }
5436
5437   return true;
5438 }
5439
5440 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5441
5442 static tree
5443 unshare_and_remove_ssa_names (tree ref)
5444 {
5445   ref = unshare_expr (ref);
5446   for_each_index (&ref, idx_remove_ssa_names, NULL);
5447
5448   return ref;
5449 }
5450
5451 /* Copies the reference information from OLD_REF to NEW_REF.  */
5452
5453 static void
5454 copy_ref_info (tree new_ref, tree old_ref)
5455 {
5456   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5457     copy_mem_ref_info (new_ref, old_ref);
5458   else
5459     TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5460 }
5461
5462 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5463
5464 static void
5465 rewrite_use_address (struct ivopts_data *data,
5466                      struct iv_use *use, struct iv_cand *cand)
5467 {
5468   aff_tree aff;
5469   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5470   tree ref;
5471   bool ok;
5472
5473   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5474   gcc_assert (ok);
5475   unshare_aff_combination (&aff);
5476
5477   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5478   copy_ref_info (ref, *use->op_p);
5479   *use->op_p = ref;
5480 }
5481
5482 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5483    candidate CAND.  */
5484
5485 static void
5486 rewrite_use_compare (struct ivopts_data *data,
5487                      struct iv_use *use, struct iv_cand *cand)
5488 {
5489   tree comp, *var_p, op, bound;
5490   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5491   enum tree_code compare;
5492   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5493   bool ok;
5494
5495   bound = cp->value;
5496   if (bound)
5497     {
5498       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5499       tree var_type = TREE_TYPE (var);
5500       gimple_seq stmts;
5501
5502       compare = iv_elimination_compare (data, use);
5503       bound = unshare_expr (fold_convert (var_type, bound));
5504       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5505       if (stmts)
5506         gsi_insert_seq_on_edge_immediate (
5507                 loop_preheader_edge (data->current_loop),
5508                 stmts);
5509
5510       gimple_cond_set_lhs (use->stmt, var);
5511       gimple_cond_set_code (use->stmt, compare);
5512       gimple_cond_set_rhs (use->stmt, op);
5513       return;
5514     }
5515
5516   /* The induction variable elimination failed; just express the original
5517      giv.  */
5518   comp = get_computation (data->current_loop, use, cand);
5519   gcc_assert (comp != NULL_TREE);
5520
5521   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5522   gcc_assert (ok);
5523
5524   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5525                                      true, GSI_SAME_STMT);
5526 }
5527
5528 /* Rewrites USE using candidate CAND.  */
5529
5530 static void
5531 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5532 {
5533   switch (use->type)
5534     {
5535       case USE_NONLINEAR_EXPR:
5536         rewrite_use_nonlinear_expr (data, use, cand);
5537         break;
5538
5539       case USE_ADDRESS:
5540         rewrite_use_address (data, use, cand);
5541         break;
5542
5543       case USE_COMPARE:
5544         rewrite_use_compare (data, use, cand);
5545         break;
5546
5547       default:
5548         gcc_unreachable ();
5549     }
5550
5551   update_stmt (use->stmt);
5552 }
5553
5554 /* Rewrite the uses using the selected induction variables.  */
5555
5556 static void
5557 rewrite_uses (struct ivopts_data *data)
5558 {
5559   unsigned i;
5560   struct iv_cand *cand;
5561   struct iv_use *use;
5562
5563   for (i = 0; i < n_iv_uses (data); i++)
5564     {
5565       use = iv_use (data, i);
5566       cand = use->selected;
5567       gcc_assert (cand);
5568
5569       rewrite_use (data, use, cand);
5570     }
5571 }
5572
5573 /* Removes the ivs that are not used after rewriting.  */
5574
5575 static void
5576 remove_unused_ivs (struct ivopts_data *data)
5577 {
5578   unsigned j;
5579   bitmap_iterator bi;
5580   bitmap toremove = BITMAP_ALLOC (NULL);
5581
5582   /* Figure out an order in which to release SSA DEFs so that we don't
5583      release something that we'd have to propagate into a debug stmt
5584      afterwards.  */
5585   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5586     {
5587       struct version_info *info;
5588
5589       info = ver_info (data, j);
5590       if (info->iv
5591           && !integer_zerop (info->iv->step)
5592           && !info->inv_id
5593           && !info->iv->have_use_for
5594           && !info->preserve_biv)
5595         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5596     }
5597
5598   release_defs_bitset (toremove);
5599
5600   BITMAP_FREE (toremove);
5601 }
5602
5603 /* Frees data allocated by the optimization of a single loop.  */
5604
5605 static void
5606 free_loop_data (struct ivopts_data *data)
5607 {
5608   unsigned i, j;
5609   bitmap_iterator bi;
5610   tree obj;
5611
5612   if (data->niters)
5613     {
5614       pointer_map_destroy (data->niters);
5615       data->niters = NULL;
5616     }
5617
5618   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5619     {
5620       struct version_info *info;
5621
5622       info = ver_info (data, i);
5623       if (info->iv)
5624         free (info->iv);
5625       info->iv = NULL;
5626       info->has_nonlin_use = false;
5627       info->preserve_biv = false;
5628       info->inv_id = 0;
5629     }
5630   bitmap_clear (data->relevant);
5631   bitmap_clear (data->important_candidates);
5632
5633   for (i = 0; i < n_iv_uses (data); i++)
5634     {
5635       struct iv_use *use = iv_use (data, i);
5636
5637       free (use->iv);
5638       BITMAP_FREE (use->related_cands);
5639       for (j = 0; j < use->n_map_members; j++)
5640         if (use->cost_map[j].depends_on)
5641           BITMAP_FREE (use->cost_map[j].depends_on);
5642       free (use->cost_map);
5643       free (use);
5644     }
5645   VEC_truncate (iv_use_p, data->iv_uses, 0);
5646
5647   for (i = 0; i < n_iv_cands (data); i++)
5648     {
5649       struct iv_cand *cand = iv_cand (data, i);
5650
5651       if (cand->iv)
5652         free (cand->iv);
5653       if (cand->depends_on)
5654         BITMAP_FREE (cand->depends_on);
5655       free (cand);
5656     }
5657   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5658
5659   if (data->version_info_size < num_ssa_names)
5660     {
5661       data->version_info_size = 2 * num_ssa_names;
5662       free (data->version_info);
5663       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5664     }
5665
5666   data->max_inv_id = 0;
5667
5668   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5669     SET_DECL_RTL (obj, NULL_RTX);
5670
5671   VEC_truncate (tree, decl_rtl_to_reset, 0);
5672 }
5673
5674 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5675    loop tree.  */
5676
5677 static void
5678 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5679 {
5680   free_loop_data (data);
5681   free (data->version_info);
5682   BITMAP_FREE (data->relevant);
5683   BITMAP_FREE (data->important_candidates);
5684
5685   VEC_free (tree, heap, decl_rtl_to_reset);
5686   VEC_free (iv_use_p, heap, data->iv_uses);
5687   VEC_free (iv_cand_p, heap, data->iv_candidates);
5688 }
5689
5690 /* Optimizes the LOOP.  Returns true if anything changed.  */
5691
5692 static bool
5693 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5694 {
5695   bool changed = false;
5696   struct iv_ca *iv_ca;
5697   edge exit;
5698   basic_block *body;
5699
5700   gcc_assert (!data->niters);
5701   data->current_loop = loop;
5702   data->speed = optimize_loop_for_speed_p (loop);
5703
5704   if (dump_file && (dump_flags & TDF_DETAILS))
5705     {
5706       fprintf (dump_file, "Processing loop %d\n", loop->num);
5707
5708       exit = single_dom_exit (loop);
5709       if (exit)
5710         {
5711           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5712                    exit->src->index, exit->dest->index);
5713           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5714           fprintf (dump_file, "\n");
5715         }
5716
5717       fprintf (dump_file, "\n");
5718     }
5719
5720   body = get_loop_body (loop);
5721   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5722   free (body);
5723
5724   /* For each ssa name determines whether it behaves as an induction variable
5725      in some loop.  */
5726   if (!find_induction_variables (data))
5727     goto finish;
5728
5729   /* Finds interesting uses (item 1).  */
5730   find_interesting_uses (data);
5731   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5732     goto finish;
5733
5734   /* Finds candidates for the induction variables (item 2).  */
5735   find_iv_candidates (data);
5736
5737   /* Calculates the costs (item 3, part 1).  */
5738   determine_iv_costs (data);
5739   determine_use_iv_costs (data);
5740   determine_set_costs (data);
5741
5742   /* Find the optimal set of induction variables (item 3, part 2).  */
5743   iv_ca = find_optimal_iv_set (data);
5744   if (!iv_ca)
5745     goto finish;
5746   changed = true;
5747
5748   /* Create the new induction variables (item 4, part 1).  */
5749   create_new_ivs (data, iv_ca);
5750   iv_ca_free (&iv_ca);
5751
5752   /* Rewrite the uses (item 4, part 2).  */
5753   rewrite_uses (data);
5754
5755   /* Remove the ivs that are unused after rewriting.  */
5756   remove_unused_ivs (data);
5757
5758   /* We have changed the structure of induction variables; it might happen
5759      that definitions in the scev database refer to some of them that were
5760      eliminated.  */
5761   scev_reset ();
5762
5763 finish:
5764   free_loop_data (data);
5765
5766   return changed;
5767 }
5768
5769 /* Main entry point.  Optimizes induction variables in loops.  */
5770
5771 void
5772 tree_ssa_iv_optimize (void)
5773 {
5774   struct loop *loop;
5775   struct ivopts_data data;
5776   loop_iterator li;
5777
5778   tree_ssa_iv_optimize_init (&data);
5779
5780   /* Optimize the loops starting with the innermost ones.  */
5781   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5782     {
5783       if (dump_file && (dump_flags & TDF_DETAILS))
5784         flow_loop_dump (loop, dump_file, NULL, 1);
5785
5786       tree_ssa_iv_optimize_loop (&data, loop);
5787     }
5788
5789   tree_ssa_iv_optimize_finalize (&data);
5790 }