gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "pointer-set.h"
  87 #include "hashtab.h"
  88 #include "tree-chrec.h"
  89 #include "tree-scalar-evolution.h"
  90 #include "cfgloop.h"
  91 #include "params.h"
  92 #include "langhooks.h"
  93 #include "tree-affine.h"
  94 #include "target.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 /* The expected number of loop iterations.  TODO -- use profiling instead of
 100    this.  */
 101 #define AVG_LOOP_NITER(LOOP) 5
 102
 103
 104 /* Representation of the induction variable.  */
 105 struct iv
 106 {
 107   tree base;            /* Initial value of the iv.  */
 108   tree base_object;     /* A memory object to that the induction variable points.  */
 109   tree step;            /* Step of the iv (constant only).  */
 110   tree ssa_name;        /* The ssa name with the value.  */
 111   bool biv_p;           /* Is it a biv?  */
 112   bool have_use_for;    /* Do we already have a use for it?  */
 113   unsigned use_id;      /* The identifier in the use if it is the case.  */
 114 };
 115
 116 /* Per-ssa version information (induction variable descriptions, etc.).  */
 117 struct version_info
 118 {
 119   tree name;            /* The ssa name.  */
 120   struct iv *iv;        /* Induction variable description.  */
 121   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 122                            an expression that is not an induction variable.  */
 123   unsigned inv_id;      /* Id of an invariant.  */
 124   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 125 };
 126
 127 /* Types of uses.  */
 128 enum use_type
 129 {
 130   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 131   USE_ADDRESS,          /* Use in an address.  */
 132   USE_COMPARE           /* Use is a compare.  */
 133 };
 134
 135 /* Cost of a computation.  */
 136 typedef struct
 137 {
 138   int cost;             /* The runtime cost.  */
 139   unsigned complexity;  /* The estimate of the complexity of the code for
 140                            the computation (in no concrete units --
 141                            complexity field should be larger for more
 142                            complex expressions and addressing modes).  */
 143 } comp_cost;
 144
 145 static const comp_cost zero_cost = {0, 0};
 146 static const comp_cost infinite_cost = {INFTY, INFTY};
 147
 148 /* The candidate - cost pair.  */
 149 struct cost_pair
 150 {
 151   struct iv_cand *cand; /* The candidate.  */
 152   comp_cost cost;       /* The cost.  */
 153   bitmap depends_on;    /* The list of invariants that have to be
 154                            preserved.  */
 155   tree value;           /* For final value elimination, the expression for
 156                            the final value of the iv.  For iv elimination,
 157                            the new bound to compare with.  */
 158 };
 159
 160 /* Use.  */
 161 struct iv_use
 162 {
 163   unsigned id;          /* The id of the use.  */
 164   enum use_type type;   /* Type of the use.  */
 165   struct iv *iv;        /* The induction variable it is based on.  */
 166   gimple stmt;          /* Statement in that it occurs.  */
 167   tree *op_p;           /* The place where it occurs.  */
 168   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 169                            important ones.  */
 170
 171   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 172   struct cost_pair *cost_map;
 173                         /* The costs wrto the iv candidates.  */
 174
 175   struct iv_cand *selected;
 176                         /* The selected candidate.  */
 177 };
 178
 179 /* The position where the iv is computed.  */
 180 enum iv_position
 181 {
 182   IP_NORMAL,            /* At the end, just before the exit condition.  */
 183   IP_END,               /* At the end of the latch block.  */
 184   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 185   IP_AFTER_USE,         /* Immediately after a specific use.  */
 186   IP_ORIGINAL           /* The original biv.  */
 187 };
 188
 189 /* The induction variable candidate.  */
 190 struct iv_cand
 191 {
 192   unsigned id;          /* The number of the candidate.  */
 193   bool important;       /* Whether this is an "important" candidate, i.e. such
 194                            that it should be considered by all uses.  */
 195   enum iv_position pos; /* Where it is computed.  */
 196   gimple incremented_at;/* For original biv, the statement where it is
 197                            incremented.  */
 198   tree var_before;      /* The variable used for it before increment.  */
 199   tree var_after;       /* The variable used for it after increment.  */
 200   struct iv *iv;        /* The value of the candidate.  NULL for
 201                            "pseudocandidate" used to indicate the possibility
 202                            to replace the final value of an iv by direct
 203                            computation of the value.  */
 204   unsigned cost;        /* Cost of the candidate.  */
 205   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 206   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 207                               where it is incremented.  */
 208   bitmap depends_on;    /* The list of invariants that are used in step of the
 209                            biv.  */
 210 };
 211
 212 /* The data used by the induction variable optimizations.  */
 213
 214 typedef struct iv_use *iv_use_p;
 215 DEF_VEC_P(iv_use_p);
 216 DEF_VEC_ALLOC_P(iv_use_p,heap);
 217
 218 typedef struct iv_cand *iv_cand_p;
 219 DEF_VEC_P(iv_cand_p);
 220 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 221
 222 struct ivopts_data
 223 {
 224   /* The currently optimized loop.  */
 225   struct loop *current_loop;
 226
 227   /* Numbers of iterations for all exits of the current loop.  */
 228   struct pointer_map_t *niters;
 229
 230   /* Number of registers used in it.  */
 231   unsigned regs_used;
 232
 233   /* The size of version_info array allocated.  */
 234   unsigned version_info_size;
 235
 236   /* The array of information for the ssa names.  */
 237   struct version_info *version_info;
 238
 239   /* The bitmap of indices in version_info whose value was changed.  */
 240   bitmap relevant;
 241
 242   /* The uses of induction variables.  */
 243   VEC(iv_use_p,heap) *iv_uses;
 244
 245   /* The candidates.  */
 246   VEC(iv_cand_p,heap) *iv_candidates;
 247
 248   /* A bitmap of important candidates.  */
 249   bitmap important_candidates;
 250
 251   /* The maximum invariant id.  */
 252   unsigned max_inv_id;
 253
 254   /* Whether to consider just related and important candidates when replacing a
 255      use.  */
 256   bool consider_all_candidates;
 257
 258   /* Are we optimizing for speed?  */
 259   bool speed;
 260 };
 261
 262 /* An assignment of iv candidates to uses.  */
 263
 264 struct iv_ca
 265 {
 266   /* The number of uses covered by the assignment.  */
 267   unsigned upto;
 268
 269   /* Number of uses that cannot be expressed by the candidates in the set.  */
 270   unsigned bad_uses;
 271
 272   /* Candidate assigned to a use, together with the related costs.  */
 273   struct cost_pair **cand_for_use;
 274
 275   /* Number of times each candidate is used.  */
 276   unsigned *n_cand_uses;
 277
 278   /* The candidates used.  */
 279   bitmap cands;
 280
 281   /* The number of candidates in the set.  */
 282   unsigned n_cands;
 283
 284   /* Total number of registers needed.  */
 285   unsigned n_regs;
 286
 287   /* Total cost of expressing uses.  */
 288   comp_cost cand_use_cost;
 289
 290   /* Total cost of candidates.  */
 291   unsigned cand_cost;
 292
 293   /* Number of times each invariant is used.  */
 294   unsigned *n_invariant_uses;
 295
 296   /* Total cost of the assignment.  */
 297   comp_cost cost;
 298 };
 299
 300 /* Difference of two iv candidate assignments.  */
 301
 302 struct iv_ca_delta
 303 {
 304   /* Changed use.  */
 305   struct iv_use *use;
 306
 307   /* An old assignment (for rollback purposes).  */
 308   struct cost_pair *old_cp;
 309
 310   /* A new assignment.  */
 311   struct cost_pair *new_cp;
 312
 313   /* Next change in the list.  */
 314   struct iv_ca_delta *next_change;
 315 };
 316
 317 /* Bound on number of candidates below that all candidates are considered.  */
 318
 319 #define CONSIDER_ALL_CANDIDATES_BOUND \
 320   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 321
 322 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 323    optimizing such a loop would help, and it would take ages).  */
 324
 325 #define MAX_CONSIDERED_USES \
 326   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 327
 328 /* If there are at most this number of ivs in the set, try removing unnecessary
 329    ivs from the set always.  */
 330
 331 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 332   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 333
 334 /* The list of trees for that the decl_rtl field must be reset is stored
 335    here.  */
 336
 337 static VEC(tree,heap) *decl_rtl_to_reset;
 338
 339 /* Number of uses recorded in DATA.  */
 340
 341 static inline unsigned
 342 n_iv_uses (struct ivopts_data *data)
 343 {
 344   return VEC_length (iv_use_p, data->iv_uses);
 345 }
 346
 347 /* Ith use recorded in DATA.  */
 348
 349 static inline struct iv_use *
 350 iv_use (struct ivopts_data *data, unsigned i)
 351 {
 352   return VEC_index (iv_use_p, data->iv_uses, i);
 353 }
 354
 355 /* Number of candidates recorded in DATA.  */
 356
 357 static inline unsigned
 358 n_iv_cands (struct ivopts_data *data)
 359 {
 360   return VEC_length (iv_cand_p, data->iv_candidates);
 361 }
 362
 363 /* Ith candidate recorded in DATA.  */
 364
 365 static inline struct iv_cand *
 366 iv_cand (struct ivopts_data *data, unsigned i)
 367 {
 368   return VEC_index (iv_cand_p, data->iv_candidates, i);
 369 }
 370
 371 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 372
 373 edge
 374 single_dom_exit (struct loop *loop)
 375 {
 376   edge exit = single_exit (loop);
 377
 378   if (!exit)
 379     return NULL;
 380
 381   if (!just_once_each_iteration_p (loop, exit->src))
 382     return NULL;
 383
 384   return exit;
 385 }
 386
 387 /* Dumps information about the induction variable IV to FILE.  */
 388
 389 extern void dump_iv (FILE *, struct iv *);
 390 void
 391 dump_iv (FILE *file, struct iv *iv)
 392 {
 393   if (iv->ssa_name)
 394     {
 395       fprintf (file, "ssa name ");
 396       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 397       fprintf (file, "\n");
 398     }
 399
 400   fprintf (file, "  type ");
 401   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 402   fprintf (file, "\n");
 403
 404   if (iv->step)
 405     {
 406       fprintf (file, "  base ");
 407       print_generic_expr (file, iv->base, TDF_SLIM);
 408       fprintf (file, "\n");
 409
 410       fprintf (file, "  step ");
 411       print_generic_expr (file, iv->step, TDF_SLIM);
 412       fprintf (file, "\n");
 413     }
 414   else
 415     {
 416       fprintf (file, "  invariant ");
 417       print_generic_expr (file, iv->base, TDF_SLIM);
 418       fprintf (file, "\n");
 419     }
 420
 421   if (iv->base_object)
 422     {
 423       fprintf (file, "  base object ");
 424       print_generic_expr (file, iv->base_object, TDF_SLIM);
 425       fprintf (file, "\n");
 426     }
 427
 428   if (iv->biv_p)
 429     fprintf (file, "  is a biv\n");
 430 }
 431
 432 /* Dumps information about the USE to FILE.  */
 433
 434 extern void dump_use (FILE *, struct iv_use *);
 435 void
 436 dump_use (FILE *file, struct iv_use *use)
 437 {
 438   fprintf (file, "use %d\n", use->id);
 439
 440   switch (use->type)
 441     {
 442     case USE_NONLINEAR_EXPR:
 443       fprintf (file, "  generic\n");
 444       break;
 445
 446     case USE_ADDRESS:
 447       fprintf (file, "  address\n");
 448       break;
 449
 450     case USE_COMPARE:
 451       fprintf (file, "  compare\n");
 452       break;
 453
 454     default:
 455       gcc_unreachable ();
 456     }
 457
 458   fprintf (file, "  in statement ");
 459   print_gimple_stmt (file, use->stmt, 0, 0);
 460   fprintf (file, "\n");
 461
 462   fprintf (file, "  at position ");
 463   if (use->op_p)
 464     print_generic_expr (file, *use->op_p, TDF_SLIM);
 465   fprintf (file, "\n");
 466
 467   dump_iv (file, use->iv);
 468
 469   if (use->related_cands)
 470     {
 471       fprintf (file, "  related candidates ");
 472       dump_bitmap (file, use->related_cands);
 473     }
 474 }
 475
 476 /* Dumps information about the uses to FILE.  */
 477
 478 extern void dump_uses (FILE *, struct ivopts_data *);
 479 void
 480 dump_uses (FILE *file, struct ivopts_data *data)
 481 {
 482   unsigned i;
 483   struct iv_use *use;
 484
 485   for (i = 0; i < n_iv_uses (data); i++)
 486     {
 487       use = iv_use (data, i);
 488
 489       dump_use (file, use);
 490       fprintf (file, "\n");
 491     }
 492 }
 493
 494 /* Dumps information about induction variable candidate CAND to FILE.  */
 495
 496 extern void dump_cand (FILE *, struct iv_cand *);
 497 void
 498 dump_cand (FILE *file, struct iv_cand *cand)
 499 {
 500   struct iv *iv = cand->iv;
 501
 502   fprintf (file, "candidate %d%s\n",
 503            cand->id, cand->important ? " (important)" : "");
 504
 505   if (cand->depends_on)
 506     {
 507       fprintf (file, "  depends on ");
 508       dump_bitmap (file, cand->depends_on);
 509     }
 510
 511   if (!iv)
 512     {
 513       fprintf (file, "  final value replacement\n");
 514       return;
 515     }
 516
 517   switch (cand->pos)
 518     {
 519     case IP_NORMAL:
 520       fprintf (file, "  incremented before exit test\n");
 521       break;
 522
 523     case IP_BEFORE_USE:
 524       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 525       break;
 526
 527     case IP_AFTER_USE:
 528       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 529       break;
 530
 531     case IP_END:
 532       fprintf (file, "  incremented at end\n");
 533       break;
 534
 535     case IP_ORIGINAL:
 536       fprintf (file, "  original biv\n");
 537       break;
 538     }
 539
 540   dump_iv (file, iv);
 541 }
 542
 543 /* Returns the info for ssa version VER.  */
 544
 545 static inline struct version_info *
 546 ver_info (struct ivopts_data *data, unsigned ver)
 547 {
 548   return data->version_info + ver;
 549 }
 550
 551 /* Returns the info for ssa name NAME.  */
 552
 553 static inline struct version_info *
 554 name_info (struct ivopts_data *data, tree name)
 555 {
 556   return ver_info (data, SSA_NAME_VERSION (name));
 557 }
 558
 559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 560    emitted in LOOP.  */
 561
 562 static bool
 563 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 564 {
 565   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 566
 567   gcc_assert (bb);
 568
 569   if (sbb == loop->latch)
 570     return true;
 571
 572   if (sbb != bb)
 573     return false;
 574
 575   return stmt == last_stmt (bb);
 576 }
 577
 578 /* Returns true if STMT if after the place where the original induction
 579    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 580    if the positions are identical.  */
 581
 582 static bool
 583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 584 {
 585   basic_block cand_bb = gimple_bb (cand->incremented_at);
 586   basic_block stmt_bb = gimple_bb (stmt);
 587
 588   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 589     return false;
 590
 591   if (stmt_bb != cand_bb)
 592     return true;
 593
 594   if (true_if_equal
 595       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 596     return true;
 597   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 598 }
 599
 600 /* Returns true if STMT if after the place where the induction variable
 601    CAND is incremented in LOOP.  */
 602
 603 static bool
 604 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 605 {
 606   switch (cand->pos)
 607     {
 608     case IP_END:
 609       return false;
 610
 611     case IP_NORMAL:
 612       return stmt_after_ip_normal_pos (loop, stmt);
 613
 614     case IP_ORIGINAL:
 615     case IP_AFTER_USE:
 616       return stmt_after_inc_pos (cand, stmt, false);
 617
 618     case IP_BEFORE_USE:
 619       return stmt_after_inc_pos (cand, stmt, true);
 620
 621     default:
 622       gcc_unreachable ();
 623     }
 624 }
 625
 626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 627
 628 static bool
 629 abnormal_ssa_name_p (tree exp)
 630 {
 631   if (!exp)
 632     return false;
 633
 634   if (TREE_CODE (exp) != SSA_NAME)
 635     return false;
 636
 637   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 638 }
 639
 640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 641    abnormal phi node.  Callback for for_each_index.  */
 642
 643 static bool
 644 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 645                                   void *data ATTRIBUTE_UNUSED)
 646 {
 647   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 648     {
 649       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 650         return false;
 651       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 652         return false;
 653     }
 654
 655   return !abnormal_ssa_name_p (*index);
 656 }
 657
 658 /* Returns true if EXPR contains a ssa name that occurs in an
 659    abnormal phi node.  */
 660
 661 bool
 662 contains_abnormal_ssa_name_p (tree expr)
 663 {
 664   enum tree_code code;
 665   enum tree_code_class codeclass;
 666
 667   if (!expr)
 668     return false;
 669
 670   code = TREE_CODE (expr);
 671   codeclass = TREE_CODE_CLASS (code);
 672
 673   if (code == SSA_NAME)
 674     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 675
 676   if (code == INTEGER_CST
 677       || is_gimple_min_invariant (expr))
 678     return false;
 679
 680   if (code == ADDR_EXPR)
 681     return !for_each_index (&TREE_OPERAND (expr, 0),
 682                             idx_contains_abnormal_ssa_name_p,
 683                             NULL);
 684
 685   switch (codeclass)
 686     {
 687     case tcc_binary:
 688     case tcc_comparison:
 689       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 690         return true;
 691
 692       /* Fallthru.  */
 693     case tcc_unary:
 694       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 695         return true;
 696
 697       break;
 698
 699     default:
 700       gcc_unreachable ();
 701     }
 702
 703   return false;
 704 }
 705
 706 /*  Returns tree describing number of iterations determined from
 707     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 708
 709 static tree
 710 niter_for_exit (struct ivopts_data *data, edge exit)
 711 {
 712   struct tree_niter_desc desc;
 713   tree niter;
 714   void **slot;
 715
 716   if (!data->niters)
 717     {
 718       data->niters = pointer_map_create ();
 719       slot = NULL;
 720     }
 721   else
 722     slot = pointer_map_contains (data->niters, exit);
 723
 724   if (!slot)
 725     {
 726       /* Try to determine number of iterations.  We must know it
 727          unconditionally (i.e., without possibility of # of iterations
 728          being zero).  Also, we cannot safely work with ssa names that
 729          appear in phi nodes on abnormal edges, so that we do not create
 730          overlapping life ranges for them (PR 27283).  */
 731       if (number_of_iterations_exit (data->current_loop,
 732                                      exit, &desc, true)
 733           && integer_zerop (desc.may_be_zero)
 734           && !contains_abnormal_ssa_name_p (desc.niter))
 735         niter = desc.niter;
 736       else
 737         niter = NULL_TREE;
 738
 739       *pointer_map_insert (data->niters, exit) = niter;
 740     }
 741   else
 742     niter = (tree) *slot;
 743
 744   return niter;
 745 }
 746
 747 /* Returns tree describing number of iterations determined from
 748    single dominating exit of DATA->current_loop, or NULL if something
 749    goes wrong.  */
 750
 751 static tree
 752 niter_for_single_dom_exit (struct ivopts_data *data)
 753 {
 754   edge exit = single_dom_exit (data->current_loop);
 755
 756   if (!exit)
 757     return NULL;
 758
 759   return niter_for_exit (data, exit);
 760 }
 761
 762 /* Initializes data structures used by the iv optimization pass, stored
 763    in DATA.  */
 764
 765 static void
 766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 767 {
 768   data->version_info_size = 2 * num_ssa_names;
 769   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 770   data->relevant = BITMAP_ALLOC (NULL);
 771   data->important_candidates = BITMAP_ALLOC (NULL);
 772   data->max_inv_id = 0;
 773   data->niters = NULL;
 774   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 775   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 776   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 777 }
 778
 779 /* Returns a memory object to that EXPR points.  In case we are able to
 780    determine that it does not point to any such object, NULL is returned.  */
 781
 782 static tree
 783 determine_base_object (tree expr)
 784 {
 785   enum tree_code code = TREE_CODE (expr);
 786   tree base, obj;
 787
 788   /* If this is a pointer casted to any type, we need to determine
 789      the base object for the pointer; so handle conversions before
 790      throwing away non-pointer expressions.  */
 791   if (CONVERT_EXPR_P (expr))
 792     return determine_base_object (TREE_OPERAND (expr, 0));
 793
 794   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 795     return NULL_TREE;
 796
 797   switch (code)
 798     {
 799     case INTEGER_CST:
 800       return NULL_TREE;
 801
 802     case ADDR_EXPR:
 803       obj = TREE_OPERAND (expr, 0);
 804       base = get_base_address (obj);
 805
 806       if (!base)
 807         return expr;
 808
 809       if (TREE_CODE (base) == INDIRECT_REF)
 810         return determine_base_object (TREE_OPERAND (base, 0));
 811
 812       return fold_convert (ptr_type_node,
 813                            build_fold_addr_expr (base));
 814
 815     case POINTER_PLUS_EXPR:
 816       return determine_base_object (TREE_OPERAND (expr, 0));
 817
 818     case PLUS_EXPR:
 819     case MINUS_EXPR:
 820       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 821       gcc_unreachable ();
 822
 823     default:
 824       return fold_convert (ptr_type_node, expr);
 825     }
 826 }
 827
 828 /* Allocates an induction variable with given initial value BASE and step STEP
 829    for loop LOOP.  */
 830
 831 static struct iv *
 832 alloc_iv (tree base, tree step)
 833 {
 834   struct iv *iv = XCNEW (struct iv);
 835   gcc_assert (step != NULL_TREE);
 836
 837   iv->base = base;
 838   iv->base_object = determine_base_object (base);
 839   iv->step = step;
 840   iv->biv_p = false;
 841   iv->have_use_for = false;
 842   iv->use_id = 0;
 843   iv->ssa_name = NULL_TREE;
 844
 845   return iv;
 846 }
 847
 848 /* Sets STEP and BASE for induction variable IV.  */
 849
 850 static void
 851 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 852 {
 853   struct version_info *info = name_info (data, iv);
 854
 855   gcc_assert (!info->iv);
 856
 857   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 858   info->iv = alloc_iv (base, step);
 859   info->iv->ssa_name = iv;
 860 }
 861
 862 /* Finds induction variable declaration for VAR.  */
 863
 864 static struct iv *
 865 get_iv (struct ivopts_data *data, tree var)
 866 {
 867   basic_block bb;
 868   tree type = TREE_TYPE (var);
 869
 870   if (!POINTER_TYPE_P (type)
 871       && !INTEGRAL_TYPE_P (type))
 872     return NULL;
 873
 874   if (!name_info (data, var)->iv)
 875     {
 876       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 877
 878       if (!bb
 879           || !flow_bb_inside_loop_p (data->current_loop, bb))
 880         set_iv (data, var, var, build_int_cst (type, 0));
 881     }
 882
 883   return name_info (data, var)->iv;
 884 }
 885
 886 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 887    not define a simple affine biv with nonzero step.  */
 888
 889 static tree
 890 determine_biv_step (gimple phi)
 891 {
 892   struct loop *loop = gimple_bb (phi)->loop_father;
 893   tree name = PHI_RESULT (phi);
 894   affine_iv iv;
 895
 896   if (!is_gimple_reg (name))
 897     return NULL_TREE;
 898
 899   if (!simple_iv (loop, loop, name, &iv, true))
 900     return NULL_TREE;
 901
 902   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 903 }
 904
 905 /* Finds basic ivs.  */
 906
 907 static bool
 908 find_bivs (struct ivopts_data *data)
 909 {
 910   gimple phi;
 911   tree step, type, base;
 912   bool found = false;
 913   struct loop *loop = data->current_loop;
 914   gimple_stmt_iterator psi;
 915
 916   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 917     {
 918       phi = gsi_stmt (psi);
 919
 920       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 921         continue;
 922
 923       step = determine_biv_step (phi);
 924       if (!step)
 925         continue;
 926
 927       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 928       base = expand_simple_operations (base);
 929       if (contains_abnormal_ssa_name_p (base)
 930           || contains_abnormal_ssa_name_p (step))
 931         continue;
 932
 933       type = TREE_TYPE (PHI_RESULT (phi));
 934       base = fold_convert (type, base);
 935       if (step)
 936         {
 937           if (POINTER_TYPE_P (type))
 938             step = fold_convert (sizetype, step);
 939           else
 940             step = fold_convert (type, step);
 941         }
 942
 943       set_iv (data, PHI_RESULT (phi), base, step);
 944       found = true;
 945     }
 946
 947   return found;
 948 }
 949
 950 /* Marks basic ivs.  */
 951
 952 static void
 953 mark_bivs (struct ivopts_data *data)
 954 {
 955   gimple phi;
 956   tree var;
 957   struct iv *iv, *incr_iv;
 958   struct loop *loop = data->current_loop;
 959   basic_block incr_bb;
 960   gimple_stmt_iterator psi;
 961
 962   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 963     {
 964       phi = gsi_stmt (psi);
 965
 966       iv = get_iv (data, PHI_RESULT (phi));
 967       if (!iv)
 968         continue;
 969
 970       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 971       incr_iv = get_iv (data, var);
 972       if (!incr_iv)
 973         continue;
 974
 975       /* If the increment is in the subloop, ignore it.  */
 976       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 977       if (incr_bb->loop_father != data->current_loop
 978           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 979         continue;
 980
 981       iv->biv_p = true;
 982       incr_iv->biv_p = true;
 983     }
 984 }
 985
 986 /* Checks whether STMT defines a linear induction variable and stores its
 987    parameters to IV.  */
 988
 989 static bool
 990 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
 991 {
 992   tree lhs;
 993   struct loop *loop = data->current_loop;
 994
 995   iv->base = NULL_TREE;
 996   iv->step = NULL_TREE;
 997
 998   if (gimple_code (stmt) != GIMPLE_ASSIGN)
 999     return false;
1000
1001   lhs = gimple_assign_lhs (stmt);
1002   if (TREE_CODE (lhs) != SSA_NAME)
1003     return false;
1004
1005   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1006     return false;
1007   iv->base = expand_simple_operations (iv->base);
1008
1009   if (contains_abnormal_ssa_name_p (iv->base)
1010       || contains_abnormal_ssa_name_p (iv->step))
1011     return false;
1012
1013   return true;
1014 }
1015
1016 /* Finds general ivs in statement STMT.  */
1017
1018 static void
1019 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1020 {
1021   affine_iv iv;
1022
1023   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1024     return;
1025
1026   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1027 }
1028
1029 /* Finds general ivs in basic block BB.  */
1030
1031 static void
1032 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1033 {
1034   gimple_stmt_iterator bsi;
1035
1036   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037     find_givs_in_stmt (data, gsi_stmt (bsi));
1038 }
1039
1040 /* Finds general ivs.  */
1041
1042 static void
1043 find_givs (struct ivopts_data *data)
1044 {
1045   struct loop *loop = data->current_loop;
1046   basic_block *body = get_loop_body_in_dom_order (loop);
1047   unsigned i;
1048
1049   for (i = 0; i < loop->num_nodes; i++)
1050     find_givs_in_bb (data, body[i]);
1051   free (body);
1052 }
1053
1054 /* For each ssa name defined in LOOP determines whether it is an induction
1055    variable and if so, its initial value and step.  */
1056
1057 static bool
1058 find_induction_variables (struct ivopts_data *data)
1059 {
1060   unsigned i;
1061   bitmap_iterator bi;
1062
1063   if (!find_bivs (data))
1064     return false;
1065
1066   find_givs (data);
1067   mark_bivs (data);
1068
1069   if (dump_file && (dump_flags & TDF_DETAILS))
1070     {
1071       tree niter = niter_for_single_dom_exit (data);
1072
1073       if (niter)
1074         {
1075           fprintf (dump_file, "  number of iterations ");
1076           print_generic_expr (dump_file, niter, TDF_SLIM);
1077           fprintf (dump_file, "\n\n");
1078         };
1079
1080       fprintf (dump_file, "Induction variables:\n\n");
1081
1082       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1083         {
1084           if (ver_info (data, i)->iv)
1085             dump_iv (dump_file, ver_info (data, i)->iv);
1086         }
1087     }
1088
1089   return true;
1090 }
1091
1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1093
1094 static struct iv_use *
1095 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1096             gimple stmt, enum use_type use_type)
1097 {
1098   struct iv_use *use = XCNEW (struct iv_use);
1099
1100   use->id = n_iv_uses (data);
1101   use->type = use_type;
1102   use->iv = iv;
1103   use->stmt = stmt;
1104   use->op_p = use_p;
1105   use->related_cands = BITMAP_ALLOC (NULL);
1106
1107   /* To avoid showing ssa name in the dumps, if it was not reset by the
1108      caller.  */
1109   iv->ssa_name = NULL_TREE;
1110
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     dump_use (dump_file, use);
1113
1114   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1115
1116   return use;
1117 }
1118
1119 /* Checks whether OP is a loop-level invariant and if so, records it.
1120    NONLINEAR_USE is true if the invariant is used in a way we do not
1121    handle specially.  */
1122
1123 static void
1124 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1125 {
1126   basic_block bb;
1127   struct version_info *info;
1128
1129   if (TREE_CODE (op) != SSA_NAME
1130       || !is_gimple_reg (op))
1131     return;
1132
1133   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1134   if (bb
1135       && flow_bb_inside_loop_p (data->current_loop, bb))
1136     return;
1137
1138   info = name_info (data, op);
1139   info->name = op;
1140   info->has_nonlin_use |= nonlinear_use;
1141   if (!info->inv_id)
1142     info->inv_id = ++data->max_inv_id;
1143   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1144 }
1145
1146 /* Checks whether the use OP is interesting and if so, records it.  */
1147
1148 static struct iv_use *
1149 find_interesting_uses_op (struct ivopts_data *data, tree op)
1150 {
1151   struct iv *iv;
1152   struct iv *civ;
1153   gimple stmt;
1154   struct iv_use *use;
1155
1156   if (TREE_CODE (op) != SSA_NAME)
1157     return NULL;
1158
1159   iv = get_iv (data, op);
1160   if (!iv)
1161     return NULL;
1162
1163   if (iv->have_use_for)
1164     {
1165       use = iv_use (data, iv->use_id);
1166
1167       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1168       return use;
1169     }
1170
1171   if (integer_zerop (iv->step))
1172     {
1173       record_invariant (data, op, true);
1174       return NULL;
1175     }
1176   iv->have_use_for = true;
1177
1178   civ = XNEW (struct iv);
1179   *civ = *iv;
1180
1181   stmt = SSA_NAME_DEF_STMT (op);
1182   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183               || is_gimple_assign (stmt));
1184
1185   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1186   iv->use_id = use->id;
1187
1188   return use;
1189 }
1190
1191 /* Given a condition in statement STMT, checks whether it is a compare
1192    of an induction variable and an invariant.  If this is the case,
1193    CONTROL_VAR is set to location of the iv, BOUND to the location of
1194    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195    induction variable descriptions, and true is returned.  If this is not
1196    the case, CONTROL_VAR and BOUND are set to the arguments of the
1197    condition and false is returned.  */
1198
1199 static bool
1200 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1201                        tree **control_var, tree **bound,
1202                        struct iv **iv_var, struct iv **iv_bound)
1203 {
1204   /* The objects returned when COND has constant operands.  */
1205   static struct iv const_iv;
1206   static tree zero;
1207   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1209   bool ret = false;
1210
1211   if (gimple_code (stmt) == GIMPLE_COND)
1212     {
1213       op0 = gimple_cond_lhs_ptr (stmt);
1214       op1 = gimple_cond_rhs_ptr (stmt);
1215     }
1216   else
1217     {
1218       op0 = gimple_assign_rhs1_ptr (stmt);
1219       op1 = gimple_assign_rhs2_ptr (stmt);
1220     }
1221
1222   zero = integer_zero_node;
1223   const_iv.step = integer_zero_node;
1224
1225   if (TREE_CODE (*op0) == SSA_NAME)
1226     iv0 = get_iv (data, *op0);
1227   if (TREE_CODE (*op1) == SSA_NAME)
1228     iv1 = get_iv (data, *op1);
1229
1230   /* Exactly one of the compared values must be an iv, and the other one must
1231      be an invariant.  */
1232   if (!iv0 || !iv1)
1233     goto end;
1234
1235   if (integer_zerop (iv0->step))
1236     {
1237       /* Control variable may be on the other side.  */
1238       tmp_op = op0; op0 = op1; op1 = tmp_op;
1239       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1240     }
1241   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1242
1243 end:
1244   if (control_var)
1245     *control_var = op0;;
1246   if (iv_var)
1247     *iv_var = iv0;;
1248   if (bound)
1249     *bound = op1;
1250   if (iv_bound)
1251     *iv_bound = iv1;
1252
1253   return ret;
1254 }
1255
1256 /* Checks whether the condition in STMT is interesting and if so,
1257    records it.  */
1258
1259 static void
1260 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1261 {
1262   tree *var_p, *bound_p;
1263   struct iv *var_iv, *civ;
1264
1265   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1266     {
1267       find_interesting_uses_op (data, *var_p);
1268       find_interesting_uses_op (data, *bound_p);
1269       return;
1270     }
1271
1272   civ = XNEW (struct iv);
1273   *civ = *var_iv;
1274   record_use (data, NULL, civ, stmt, USE_COMPARE);
1275 }
1276
1277 /* Returns true if expression EXPR is obviously invariant in LOOP,
1278    i.e. if all its operands are defined outside of the LOOP.  LOOP
1279    should not be the function body.  */
1280
1281 bool
1282 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1283 {
1284   basic_block def_bb;
1285   unsigned i, len;
1286
1287   gcc_assert (loop_depth (loop) > 0);
1288
1289   if (is_gimple_min_invariant (expr))
1290     return true;
1291
1292   if (TREE_CODE (expr) == SSA_NAME)
1293     {
1294       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1295       if (def_bb
1296           && flow_bb_inside_loop_p (loop, def_bb))
1297         return false;
1298
1299       return true;
1300     }
1301
1302   if (!EXPR_P (expr))
1303     return false;
1304
1305   len = TREE_OPERAND_LENGTH (expr);
1306   for (i = 0; i < len; i++)
1307     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1308       return false;
1309
1310   return true;
1311 }
1312
1313 /* Returns true if statement STMT is obviously invariant in LOOP,
1314    i.e. if all its operands on the RHS are defined outside of the LOOP.
1315    LOOP should not be the function body.  */
1316
1317 bool
1318 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1319 {
1320   unsigned i;
1321   tree lhs;
1322
1323   gcc_assert (loop_depth (loop) > 0);
1324
1325   lhs = gimple_get_lhs (stmt);
1326   for (i = 0; i < gimple_num_ops (stmt); i++)
1327     {
1328       tree op = gimple_op (stmt, i);
1329       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1330         return false;
1331     }
1332
1333   return true;
1334 }
1335
1336 /* Cumulates the steps of indices into DATA and replaces their values with the
1337    initial ones.  Returns false when the value of the index cannot be determined.
1338    Callback for for_each_index.  */
1339
1340 struct ifs_ivopts_data
1341 {
1342   struct ivopts_data *ivopts_data;
1343   gimple stmt;
1344   tree step;
1345 };
1346
1347 static bool
1348 idx_find_step (tree base, tree *idx, void *data)
1349 {
1350   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1351   struct iv *iv;
1352   tree step, iv_base, iv_step, lbound, off;
1353   struct loop *loop = dta->ivopts_data->current_loop;
1354
1355   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1357     return false;
1358
1359   /* If base is a component ref, require that the offset of the reference
1360      be invariant.  */
1361   if (TREE_CODE (base) == COMPONENT_REF)
1362     {
1363       off = component_ref_field_offset (base);
1364       return expr_invariant_in_loop_p (loop, off);
1365     }
1366
1367   /* If base is array, first check whether we will be able to move the
1368      reference out of the loop (in order to take its address in strength
1369      reduction).  In order for this to work we need both lower bound
1370      and step to be loop invariants.  */
1371   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1372     {
1373       /* Moreover, for a range, the size needs to be invariant as well.  */
1374       if (TREE_CODE (base) == ARRAY_RANGE_REF
1375           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1376         return false;
1377
1378       step = array_ref_element_size (base);
1379       lbound = array_ref_low_bound (base);
1380
1381       if (!expr_invariant_in_loop_p (loop, step)
1382           || !expr_invariant_in_loop_p (loop, lbound))
1383         return false;
1384     }
1385
1386   if (TREE_CODE (*idx) != SSA_NAME)
1387     return true;
1388
1389   iv = get_iv (dta->ivopts_data, *idx);
1390   if (!iv)
1391     return false;
1392
1393   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1394           *&x[0], which is not folded and does not trigger the
1395           ARRAY_REF path below.  */
1396   *idx = iv->base;
1397
1398   if (integer_zerop (iv->step))
1399     return true;
1400
1401   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1402     {
1403       step = array_ref_element_size (base);
1404
1405       /* We only handle addresses whose step is an integer constant.  */
1406       if (TREE_CODE (step) != INTEGER_CST)
1407         return false;
1408     }
1409   else
1410     /* The step for pointer arithmetics already is 1 byte.  */
1411     step = build_int_cst (sizetype, 1);
1412
1413   iv_base = iv->base;
1414   iv_step = iv->step;
1415   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416                             sizetype, &iv_base, &iv_step, dta->stmt,
1417                             false))
1418     {
1419       /* The index might wrap.  */
1420       return false;
1421     }
1422
1423   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1424   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1425
1426   return true;
1427 }
1428
1429 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1430    object is passed to it in DATA.  */
1431
1432 static bool
1433 idx_record_use (tree base, tree *idx,
1434                 void *vdata)
1435 {
1436   struct ivopts_data *data = (struct ivopts_data *) vdata;
1437   find_interesting_uses_op (data, *idx);
1438   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1439     {
1440       find_interesting_uses_op (data, array_ref_element_size (base));
1441       find_interesting_uses_op (data, array_ref_low_bound (base));
1442     }
1443   return true;
1444 }
1445
1446 /* If we can prove that TOP = cst * BOT for some constant cst,
1447    store cst to MUL and return true.  Otherwise return false.
1448    The returned value is always sign-extended, regardless of the
1449    signedness of TOP and BOT.  */
1450
1451 static bool
1452 constant_multiple_of (tree top, tree bot, double_int *mul)
1453 {
1454   tree mby;
1455   enum tree_code code;
1456   double_int res, p0, p1;
1457   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1458
1459   STRIP_NOPS (top);
1460   STRIP_NOPS (bot);
1461
1462   if (operand_equal_p (top, bot, 0))
1463     {
1464       *mul = double_int_one;
1465       return true;
1466     }
1467
1468   code = TREE_CODE (top);
1469   switch (code)
1470     {
1471     case MULT_EXPR:
1472       mby = TREE_OPERAND (top, 1);
1473       if (TREE_CODE (mby) != INTEGER_CST)
1474         return false;
1475
1476       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1477         return false;
1478
1479       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1480                               precision);
1481       return true;
1482
1483     case PLUS_EXPR:
1484     case MINUS_EXPR:
1485       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1487         return false;
1488
1489       if (code == MINUS_EXPR)
1490         p1 = double_int_neg (p1);
1491       *mul = double_int_sext (double_int_add (p0, p1), precision);
1492       return true;
1493
1494     case INTEGER_CST:
1495       if (TREE_CODE (bot) != INTEGER_CST)
1496         return false;
1497
1498       p0 = double_int_sext (tree_to_double_int (top), precision);
1499       p1 = double_int_sext (tree_to_double_int (bot), precision);
1500       if (double_int_zero_p (p1))
1501         return false;
1502       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503                               precision);
1504       return double_int_zero_p (res);
1505
1506     default:
1507       return false;
1508     }
1509 }
1510
1511 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1512
1513 static bool
1514 may_be_unaligned_p (tree ref, tree step)
1515 {
1516   tree base;
1517   tree base_type;
1518   HOST_WIDE_INT bitsize;
1519   HOST_WIDE_INT bitpos;
1520   tree toffset;
1521   enum machine_mode mode;
1522   int unsignedp, volatilep;
1523   unsigned base_align;
1524
1525   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526      thus they are not misaligned.  */
1527   if (TREE_CODE (ref) == TARGET_MEM_REF)
1528     return false;
1529
1530   /* The test below is basically copy of what expr.c:normal_inner_ref
1531      does to check whether the object must be loaded by parts when
1532      STRICT_ALIGNMENT is true.  */
1533   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534                               &unsignedp, &volatilep, true);
1535   base_type = TREE_TYPE (base);
1536   base_align = TYPE_ALIGN (base_type);
1537
1538   if (mode != BLKmode)
1539     {
1540       double_int mul;
1541       tree al = build_int_cst (TREE_TYPE (step),
1542                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1543
1544       if (base_align < GET_MODE_ALIGNMENT (mode)
1545           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1546           || bitpos % BITS_PER_UNIT != 0)
1547         return true;
1548
1549       if (!constant_multiple_of (step, al, &mul))
1550         return true;
1551     }
1552
1553   return false;
1554 }
1555
1556 /* Return true if EXPR may be non-addressable.   */
1557
1558 static bool
1559 may_be_nonaddressable_p (tree expr)
1560 {
1561   switch (TREE_CODE (expr))
1562     {
1563     case TARGET_MEM_REF:
1564       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1565          target, thus they are always addressable.  */
1566       return false;
1567
1568     case COMPONENT_REF:
1569       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1570              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1571
1572     case VIEW_CONVERT_EXPR:
1573       /* This kind of view-conversions may wrap non-addressable objects
1574          and make them look addressable.  After some processing the
1575          non-addressability may be uncovered again, causing ADDR_EXPRs
1576          of inappropriate objects to be built.  */
1577       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1578           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1579         return true;
1580
1581       /* ... fall through ... */
1582
1583     case ARRAY_REF:
1584     case ARRAY_RANGE_REF:
1585       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1586
1587     CASE_CONVERT:
1588       return true;
1589
1590     default:
1591       break;
1592     }
1593
1594   return false;
1595 }
1596
1597 /* Finds addresses in *OP_P inside STMT.  */
1598
1599 static void
1600 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1601 {
1602   tree base = *op_p, step = build_int_cst (sizetype, 0);
1603   struct iv *civ;
1604   struct ifs_ivopts_data ifs_ivopts_data;
1605
1606   /* Do not play with volatile memory references.  A bit too conservative,
1607      perhaps, but safe.  */
1608   if (gimple_has_volatile_ops (stmt))
1609     goto fail;
1610
1611   /* Ignore bitfields for now.  Not really something terribly complicated
1612      to handle.  TODO.  */
1613   if (TREE_CODE (base) == BIT_FIELD_REF)
1614     goto fail;
1615
1616   base = unshare_expr (base);
1617
1618   if (TREE_CODE (base) == TARGET_MEM_REF)
1619     {
1620       tree type = build_pointer_type (TREE_TYPE (base));
1621       tree astep;
1622
1623       if (TMR_BASE (base)
1624           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1625         {
1626           civ = get_iv (data, TMR_BASE (base));
1627           if (!civ)
1628             goto fail;
1629
1630           TMR_BASE (base) = civ->base;
1631           step = civ->step;
1632         }
1633       if (TMR_INDEX (base)
1634           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1635         {
1636           civ = get_iv (data, TMR_INDEX (base));
1637           if (!civ)
1638             goto fail;
1639
1640           TMR_INDEX (base) = civ->base;
1641           astep = civ->step;
1642
1643           if (astep)
1644             {
1645               if (TMR_STEP (base))
1646                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1647
1648               step = fold_build2 (PLUS_EXPR, type, step, astep);
1649             }
1650         }
1651
1652       if (integer_zerop (step))
1653         goto fail;
1654       base = tree_mem_ref_addr (type, base);
1655     }
1656   else
1657     {
1658       ifs_ivopts_data.ivopts_data = data;
1659       ifs_ivopts_data.stmt = stmt;
1660       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1661       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1662           || integer_zerop (ifs_ivopts_data.step))
1663         goto fail;
1664       step = ifs_ivopts_data.step;
1665
1666       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1667       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1668
1669       /* Check that the base expression is addressable.  This needs
1670          to be done after substituting bases of IVs into it.  */
1671       if (may_be_nonaddressable_p (base))
1672         goto fail;
1673
1674       /* Moreover, on strict alignment platforms, check that it is
1675          sufficiently aligned.  */
1676       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1677         goto fail;
1678
1679       base = build_fold_addr_expr (base);
1680
1681       /* Substituting bases of IVs into the base expression might
1682          have caused folding opportunities.  */
1683       if (TREE_CODE (base) == ADDR_EXPR)
1684         {
1685           tree *ref = &TREE_OPERAND (base, 0);
1686           while (handled_component_p (*ref))
1687             ref = &TREE_OPERAND (*ref, 0);
1688           if (TREE_CODE (*ref) == INDIRECT_REF)
1689             *ref = fold_indirect_ref (*ref);
1690         }
1691     }
1692
1693   civ = alloc_iv (base, step);
1694   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1695   return;
1696
1697 fail:
1698   for_each_index (op_p, idx_record_use, data);
1699 }
1700
1701 /* Finds and records invariants used in STMT.  */
1702
1703 static void
1704 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1705 {
1706   ssa_op_iter iter;
1707   use_operand_p use_p;
1708   tree op;
1709
1710   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1711     {
1712       op = USE_FROM_PTR (use_p);
1713       record_invariant (data, op, false);
1714     }
1715 }
1716
1717 /* Finds interesting uses of induction variables in the statement STMT.  */
1718
1719 static void
1720 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1721 {
1722   struct iv *iv;
1723   tree op, *lhs, *rhs;
1724   ssa_op_iter iter;
1725   use_operand_p use_p;
1726   enum tree_code code;
1727
1728   find_invariants_stmt (data, stmt);
1729
1730   if (gimple_code (stmt) == GIMPLE_COND)
1731     {
1732       find_interesting_uses_cond (data, stmt);
1733       return;
1734     }
1735
1736   if (is_gimple_assign (stmt))
1737     {
1738       lhs = gimple_assign_lhs_ptr (stmt);
1739       rhs = gimple_assign_rhs1_ptr (stmt);
1740
1741       if (TREE_CODE (*lhs) == SSA_NAME)
1742         {
1743           /* If the statement defines an induction variable, the uses are not
1744              interesting by themselves.  */
1745
1746           iv = get_iv (data, *lhs);
1747
1748           if (iv && !integer_zerop (iv->step))
1749             return;
1750         }
1751
1752       code = gimple_assign_rhs_code (stmt);
1753       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1754           && (REFERENCE_CLASS_P (*rhs)
1755               || is_gimple_val (*rhs)))
1756         {
1757           if (REFERENCE_CLASS_P (*rhs))
1758             find_interesting_uses_address (data, stmt, rhs);
1759           else
1760             find_interesting_uses_op (data, *rhs);
1761
1762           if (REFERENCE_CLASS_P (*lhs))
1763             find_interesting_uses_address (data, stmt, lhs);
1764           return;
1765         }
1766       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1767         {
1768           find_interesting_uses_cond (data, stmt);
1769           return;
1770         }
1771
1772       /* TODO -- we should also handle address uses of type
1773
1774          memory = call (whatever);
1775
1776          and
1777
1778          call (memory).  */
1779     }
1780
1781   if (gimple_code (stmt) == GIMPLE_PHI
1782       && gimple_bb (stmt) == data->current_loop->header)
1783     {
1784       iv = get_iv (data, PHI_RESULT (stmt));
1785
1786       if (iv && !integer_zerop (iv->step))
1787         return;
1788     }
1789
1790   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1791     {
1792       op = USE_FROM_PTR (use_p);
1793
1794       if (TREE_CODE (op) != SSA_NAME)
1795         continue;
1796
1797       iv = get_iv (data, op);
1798       if (!iv)
1799         continue;
1800
1801       find_interesting_uses_op (data, op);
1802     }
1803 }
1804
1805 /* Finds interesting uses of induction variables outside of loops
1806    on loop exit edge EXIT.  */
1807
1808 static void
1809 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1810 {
1811   gimple phi;
1812   gimple_stmt_iterator psi;
1813   tree def;
1814
1815   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1816     {
1817       phi = gsi_stmt (psi);
1818       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1819       if (is_gimple_reg (def))
1820         find_interesting_uses_op (data, def);
1821     }
1822 }
1823
1824 /* Finds uses of the induction variables that are interesting.  */
1825
1826 static void
1827 find_interesting_uses (struct ivopts_data *data)
1828 {
1829   basic_block bb;
1830   gimple_stmt_iterator bsi;
1831   basic_block *body = get_loop_body (data->current_loop);
1832   unsigned i;
1833   struct version_info *info;
1834   edge e;
1835
1836   if (dump_file && (dump_flags & TDF_DETAILS))
1837     fprintf (dump_file, "Uses:\n\n");
1838
1839   for (i = 0; i < data->current_loop->num_nodes; i++)
1840     {
1841       edge_iterator ei;
1842       bb = body[i];
1843
1844       FOR_EACH_EDGE (e, ei, bb->succs)
1845         if (e->dest != EXIT_BLOCK_PTR
1846             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1847           find_interesting_uses_outside (data, e);
1848
1849       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1850         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1851       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1852         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1853     }
1854
1855   if (dump_file && (dump_flags & TDF_DETAILS))
1856     {
1857       bitmap_iterator bi;
1858
1859       fprintf (dump_file, "\n");
1860
1861       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1862         {
1863           info = ver_info (data, i);
1864           if (info->inv_id)
1865             {
1866               fprintf (dump_file, "  ");
1867               print_generic_expr (dump_file, info->name, TDF_SLIM);
1868               fprintf (dump_file, " is invariant (%d)%s\n",
1869                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1870             }
1871         }
1872
1873       fprintf (dump_file, "\n");
1874     }
1875
1876   free (body);
1877 }
1878
1879 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1880    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1881    we are at the top-level of the processed address.  */
1882
1883 static tree
1884 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1885                 unsigned HOST_WIDE_INT *offset)
1886 {
1887   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1888   enum tree_code code;
1889   tree type, orig_type = TREE_TYPE (expr);
1890   unsigned HOST_WIDE_INT off0, off1, st;
1891   tree orig_expr = expr;
1892
1893   STRIP_NOPS (expr);
1894
1895   type = TREE_TYPE (expr);
1896   code = TREE_CODE (expr);
1897   *offset = 0;
1898
1899   switch (code)
1900     {
1901     case INTEGER_CST:
1902       if (!cst_and_fits_in_hwi (expr)
1903           || integer_zerop (expr))
1904         return orig_expr;
1905
1906       *offset = int_cst_value (expr);
1907       return build_int_cst (orig_type, 0);
1908
1909     case POINTER_PLUS_EXPR:
1910     case PLUS_EXPR:
1911     case MINUS_EXPR:
1912       op0 = TREE_OPERAND (expr, 0);
1913       op1 = TREE_OPERAND (expr, 1);
1914
1915       op0 = strip_offset_1 (op0, false, false, &off0);
1916       op1 = strip_offset_1 (op1, false, false, &off1);
1917
1918       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1919       if (op0 == TREE_OPERAND (expr, 0)
1920           && op1 == TREE_OPERAND (expr, 1))
1921         return orig_expr;
1922
1923       if (integer_zerop (op1))
1924         expr = op0;
1925       else if (integer_zerop (op0))
1926         {
1927           if (code == MINUS_EXPR)
1928             expr = fold_build1 (NEGATE_EXPR, type, op1);
1929           else
1930             expr = op1;
1931         }
1932       else
1933         expr = fold_build2 (code, type, op0, op1);
1934
1935       return fold_convert (orig_type, expr);
1936
1937     case MULT_EXPR:
1938       op1 = TREE_OPERAND (expr, 1);
1939       if (!cst_and_fits_in_hwi (op1))
1940         return orig_expr;
1941
1942       op0 = TREE_OPERAND (expr, 0);
1943       op0 = strip_offset_1 (op0, false, false, &off0);
1944       if (op0 == TREE_OPERAND (expr, 0))
1945         return orig_expr;
1946
1947       *offset = off0 * int_cst_value (op1);
1948       if (integer_zerop (op0))
1949         expr = op0;
1950       else
1951         expr = fold_build2 (MULT_EXPR, type, op0, op1);
1952
1953       return fold_convert (orig_type, expr);
1954
1955     case ARRAY_REF:
1956     case ARRAY_RANGE_REF:
1957       if (!inside_addr)
1958         return orig_expr;
1959
1960       step = array_ref_element_size (expr);
1961       if (!cst_and_fits_in_hwi (step))
1962         break;
1963
1964       st = int_cst_value (step);
1965       op1 = TREE_OPERAND (expr, 1);
1966       op1 = strip_offset_1 (op1, false, false, &off1);
1967       *offset = off1 * st;
1968
1969       if (top_compref
1970           && integer_zerop (op1))
1971         {
1972           /* Strip the component reference completely.  */
1973           op0 = TREE_OPERAND (expr, 0);
1974           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1975           *offset += off0;
1976           return op0;
1977         }
1978       break;
1979
1980     case COMPONENT_REF:
1981       if (!inside_addr)
1982         return orig_expr;
1983
1984       tmp = component_ref_field_offset (expr);
1985       if (top_compref
1986           && cst_and_fits_in_hwi (tmp))
1987         {
1988           /* Strip the component reference completely.  */
1989           op0 = TREE_OPERAND (expr, 0);
1990           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1991           *offset = off0 + int_cst_value (tmp);
1992           return op0;
1993         }
1994       break;
1995
1996     case ADDR_EXPR:
1997       op0 = TREE_OPERAND (expr, 0);
1998       op0 = strip_offset_1 (op0, true, true, &off0);
1999       *offset += off0;
2000
2001       if (op0 == TREE_OPERAND (expr, 0))
2002         return orig_expr;
2003
2004       expr = build_fold_addr_expr (op0);
2005       return fold_convert (orig_type, expr);
2006
2007     case INDIRECT_REF:
2008       inside_addr = false;
2009       break;
2010
2011     default:
2012       return orig_expr;
2013     }
2014
2015   /* Default handling of expressions for that we want to recurse into
2016      the first operand.  */
2017   op0 = TREE_OPERAND (expr, 0);
2018   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2019   *offset += off0;
2020
2021   if (op0 == TREE_OPERAND (expr, 0)
2022       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2023     return orig_expr;
2024
2025   expr = copy_node (expr);
2026   TREE_OPERAND (expr, 0) = op0;
2027   if (op1)
2028     TREE_OPERAND (expr, 1) = op1;
2029
2030   /* Inside address, we might strip the top level component references,
2031      thus changing type of the expression.  Handling of ADDR_EXPR
2032      will fix that.  */
2033   expr = fold_convert (orig_type, expr);
2034
2035   return expr;
2036 }
2037
2038 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2039
2040 static tree
2041 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2042 {
2043   return strip_offset_1 (expr, false, false, offset);
2044 }
2045
2046 /* Returns variant of TYPE that can be used as base for different uses.
2047    We return unsigned type with the same precision, which avoids problems
2048    with overflows.  */
2049
2050 static tree
2051 generic_type_for (tree type)
2052 {
2053   if (POINTER_TYPE_P (type))
2054     return unsigned_type_for (type);
2055
2056   if (TYPE_UNSIGNED (type))
2057     return type;
2058
2059   return unsigned_type_for (type);
2060 }
2061
2062 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2063    the bitmap to that we should store it.  */
2064
2065 static struct ivopts_data *fd_ivopts_data;
2066 static tree
2067 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2068 {
2069   bitmap *depends_on = (bitmap *) data;
2070   struct version_info *info;
2071
2072   if (TREE_CODE (*expr_p) != SSA_NAME)
2073     return NULL_TREE;
2074   info = name_info (fd_ivopts_data, *expr_p);
2075
2076   if (!info->inv_id || info->has_nonlin_use)
2077     return NULL_TREE;
2078
2079   if (!*depends_on)
2080     *depends_on = BITMAP_ALLOC (NULL);
2081   bitmap_set_bit (*depends_on, info->inv_id);
2082
2083   return NULL_TREE;
2084 }
2085
2086 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2087    position to POS.  If USE is not NULL, the candidate is set as related to
2088    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2089    replacement of the final value of the iv by a direct computation.  */
2090
2091 static struct iv_cand *
2092 add_candidate_1 (struct ivopts_data *data,
2093                  tree base, tree step, bool important, enum iv_position pos,
2094                  struct iv_use *use, gimple incremented_at)
2095 {
2096   unsigned i;
2097   struct iv_cand *cand = NULL;
2098   tree type, orig_type;
2099
2100   if (base)
2101     {
2102       orig_type = TREE_TYPE (base);
2103       type = generic_type_for (orig_type);
2104       if (type != orig_type)
2105         {
2106           base = fold_convert (type, base);
2107           step = fold_convert (type, step);
2108         }
2109     }
2110
2111   for (i = 0; i < n_iv_cands (data); i++)
2112     {
2113       cand = iv_cand (data, i);
2114
2115       if (cand->pos != pos)
2116         continue;
2117
2118       if (cand->incremented_at != incremented_at
2119           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2120               && cand->ainc_use != use))
2121         continue;
2122
2123       if (!cand->iv)
2124         {
2125           if (!base && !step)
2126             break;
2127
2128           continue;
2129         }
2130
2131       if (!base && !step)
2132         continue;
2133
2134       if (operand_equal_p (base, cand->iv->base, 0)
2135           && operand_equal_p (step, cand->iv->step, 0))
2136         break;
2137     }
2138
2139   if (i == n_iv_cands (data))
2140     {
2141       cand = XCNEW (struct iv_cand);
2142       cand->id = i;
2143
2144       if (!base && !step)
2145         cand->iv = NULL;
2146       else
2147         cand->iv = alloc_iv (base, step);
2148
2149       cand->pos = pos;
2150       if (pos != IP_ORIGINAL && cand->iv)
2151         {
2152           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2153           cand->var_after = cand->var_before;
2154         }
2155       cand->important = important;
2156       cand->incremented_at = incremented_at;
2157       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2158
2159       if (step
2160           && TREE_CODE (step) != INTEGER_CST)
2161         {
2162           fd_ivopts_data = data;
2163           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2164         }
2165
2166       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2167         cand->ainc_use = use;
2168       else
2169         cand->ainc_use = NULL;
2170
2171       if (dump_file && (dump_flags & TDF_DETAILS))
2172         dump_cand (dump_file, cand);
2173     }
2174
2175   if (important && !cand->important)
2176     {
2177       cand->important = true;
2178       if (dump_file && (dump_flags & TDF_DETAILS))
2179         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2180     }
2181
2182   if (use)
2183     {
2184       bitmap_set_bit (use->related_cands, i);
2185       if (dump_file && (dump_flags & TDF_DETAILS))
2186         fprintf (dump_file, "Candidate %d is related to use %d\n",
2187                  cand->id, use->id);
2188     }
2189
2190   return cand;
2191 }
2192
2193 /* Returns true if incrementing the induction variable at the end of the LOOP
2194    is allowed.
2195
2196    The purpose is to avoid splitting latch edge with a biv increment, thus
2197    creating a jump, possibly confusing other optimization passes and leaving
2198    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2199    is not available (so we do not have a better alternative), or if the latch
2200    edge is already nonempty.  */
2201
2202 static bool
2203 allow_ip_end_pos_p (struct loop *loop)
2204 {
2205   if (!ip_normal_pos (loop))
2206     return true;
2207
2208   if (!empty_block_p (ip_end_pos (loop)))
2209     return true;
2210
2211   return false;
2212 }
2213
2214 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2215    Important field is set to IMPORTANT.  */
2216
2217 static void
2218 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2219                         bool important, struct iv_use *use)
2220 {
2221   basic_block use_bb = gimple_bb (use->stmt);
2222   enum machine_mode mem_mode;
2223   unsigned HOST_WIDE_INT cstepi;
2224
2225   /* If we insert the increment in any position other than the standard
2226      ones, we must ensure that it is incremented once per iteration.
2227      It must not be in an inner nested loop, or one side of an if
2228      statement.  */
2229   if (use_bb->loop_father != data->current_loop
2230       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2231       || stmt_could_throw_p (use->stmt)
2232       || !cst_and_fits_in_hwi (step))
2233     return;
2234
2235   cstepi = int_cst_value (step);
2236
2237   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2238   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2239       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2240     {
2241       enum tree_code code = MINUS_EXPR;
2242       tree new_base;
2243       tree new_step = step;
2244
2245       if (POINTER_TYPE_P (TREE_TYPE (base)))
2246         {
2247           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2248           code = POINTER_PLUS_EXPR;
2249         }
2250       else
2251         new_step = fold_convert (TREE_TYPE (base), new_step);
2252       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2253       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2254                        use->stmt);
2255     }
2256   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2257       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2258     {
2259       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2260                        use->stmt);
2261     }
2262 }
2263
2264 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2265    position to POS.  If USE is not NULL, the candidate is set as related to
2266    it.  The candidate computation is scheduled on all available positions.  */
2267
2268 static void
2269 add_candidate (struct ivopts_data *data,
2270                tree base, tree step, bool important, struct iv_use *use)
2271 {
2272   if (ip_normal_pos (data->current_loop))
2273     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2274   if (ip_end_pos (data->current_loop)
2275       && allow_ip_end_pos_p (data->current_loop))
2276     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2277
2278   if (use != NULL && use->type == USE_ADDRESS)
2279     add_autoinc_candidates (data, base, step, important, use);
2280 }
2281
2282 /* Add a standard "0 + 1 * iteration" iv candidate for a
2283    type with SIZE bits.  */
2284
2285 static void
2286 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2287                                      unsigned int size)
2288 {
2289   tree type = lang_hooks.types.type_for_size (size, true);
2290   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2291                  true, NULL);
2292 }
2293
2294 /* Adds standard iv candidates.  */
2295
2296 static void
2297 add_standard_iv_candidates (struct ivopts_data *data)
2298 {
2299   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2300
2301   /* The same for a double-integer type if it is still fast enough.  */
2302   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2303     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2304 }
2305
2306
2307 /* Adds candidates bases on the old induction variable IV.  */
2308
2309 static void
2310 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2311 {
2312   gimple phi;
2313   tree def;
2314   struct iv_cand *cand;
2315
2316   add_candidate (data, iv->base, iv->step, true, NULL);
2317
2318   /* The same, but with initial value zero.  */
2319   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2320     add_candidate (data, size_int (0), iv->step, true, NULL);
2321   else
2322     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2323                    iv->step, true, NULL);
2324
2325   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2326   if (gimple_code (phi) == GIMPLE_PHI)
2327     {
2328       /* Additionally record the possibility of leaving the original iv
2329          untouched.  */
2330       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2331       cand = add_candidate_1 (data,
2332                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2333                               SSA_NAME_DEF_STMT (def));
2334       cand->var_before = iv->ssa_name;
2335       cand->var_after = def;
2336     }
2337 }
2338
2339 /* Adds candidates based on the old induction variables.  */
2340
2341 static void
2342 add_old_ivs_candidates (struct ivopts_data *data)
2343 {
2344   unsigned i;
2345   struct iv *iv;
2346   bitmap_iterator bi;
2347
2348   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2349     {
2350       iv = ver_info (data, i)->iv;
2351       if (iv && iv->biv_p && !integer_zerop (iv->step))
2352         add_old_iv_candidates (data, iv);
2353     }
2354 }
2355
2356 /* Adds candidates based on the value of the induction variable IV and USE.  */
2357
2358 static void
2359 add_iv_value_candidates (struct ivopts_data *data,
2360                          struct iv *iv, struct iv_use *use)
2361 {
2362   unsigned HOST_WIDE_INT offset;
2363   tree base;
2364   tree basetype;
2365
2366   add_candidate (data, iv->base, iv->step, false, use);
2367
2368   /* The same, but with initial value zero.  Make such variable important,
2369      since it is generic enough so that possibly many uses may be based
2370      on it.  */
2371   basetype = TREE_TYPE (iv->base);
2372   if (POINTER_TYPE_P (basetype))
2373     basetype = sizetype;
2374   add_candidate (data, build_int_cst (basetype, 0),
2375                  iv->step, true, use);
2376
2377   /* Third, try removing the constant offset.  Make sure to even
2378      add a candidate for &a[0] vs. (T *)&a.  */
2379   base = strip_offset (iv->base, &offset);
2380   if (offset
2381       || base != iv->base)
2382     add_candidate (data, base, iv->step, false, use);
2383 }
2384
2385 /* Adds candidates based on the uses.  */
2386
2387 static void
2388 add_derived_ivs_candidates (struct ivopts_data *data)
2389 {
2390   unsigned i;
2391
2392   for (i = 0; i < n_iv_uses (data); i++)
2393     {
2394       struct iv_use *use = iv_use (data, i);
2395
2396       if (!use)
2397         continue;
2398
2399       switch (use->type)
2400         {
2401         case USE_NONLINEAR_EXPR:
2402         case USE_COMPARE:
2403         case USE_ADDRESS:
2404           /* Just add the ivs based on the value of the iv used here.  */
2405           add_iv_value_candidates (data, use->iv, use);
2406           break;
2407
2408         default:
2409           gcc_unreachable ();
2410         }
2411     }
2412 }
2413
2414 /* Record important candidates and add them to related_cands bitmaps
2415    if needed.  */
2416
2417 static void
2418 record_important_candidates (struct ivopts_data *data)
2419 {
2420   unsigned i;
2421   struct iv_use *use;
2422
2423   for (i = 0; i < n_iv_cands (data); i++)
2424     {
2425       struct iv_cand *cand = iv_cand (data, i);
2426
2427       if (cand->important)
2428         bitmap_set_bit (data->important_candidates, i);
2429     }
2430
2431   data->consider_all_candidates = (n_iv_cands (data)
2432                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2433
2434   if (data->consider_all_candidates)
2435     {
2436       /* We will not need "related_cands" bitmaps in this case,
2437          so release them to decrease peak memory consumption.  */
2438       for (i = 0; i < n_iv_uses (data); i++)
2439         {
2440           use = iv_use (data, i);
2441           BITMAP_FREE (use->related_cands);
2442         }
2443     }
2444   else
2445     {
2446       /* Add important candidates to the related_cands bitmaps.  */
2447       for (i = 0; i < n_iv_uses (data); i++)
2448         bitmap_ior_into (iv_use (data, i)->related_cands,
2449                          data->important_candidates);
2450     }
2451 }
2452
2453 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2454    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2455    we allocate a simple list to every use.  */
2456
2457 static void
2458 alloc_use_cost_map (struct ivopts_data *data)
2459 {
2460   unsigned i, size, s, j;
2461
2462   for (i = 0; i < n_iv_uses (data); i++)
2463     {
2464       struct iv_use *use = iv_use (data, i);
2465       bitmap_iterator bi;
2466
2467       if (data->consider_all_candidates)
2468         size = n_iv_cands (data);
2469       else
2470         {
2471           s = 0;
2472           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2473             {
2474               s++;
2475             }
2476
2477           /* Round up to the power of two, so that moduling by it is fast.  */
2478           for (size = 1; size < s; size <<= 1)
2479             continue;
2480         }
2481
2482       use->n_map_members = size;
2483       use->cost_map = XCNEWVEC (struct cost_pair, size);
2484     }
2485 }
2486
2487 /* Returns description of computation cost of expression whose runtime
2488    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2489
2490 static comp_cost
2491 new_cost (unsigned runtime, unsigned complexity)
2492 {
2493   comp_cost cost;
2494
2495   cost.cost = runtime;
2496   cost.complexity = complexity;
2497
2498   return cost;
2499 }
2500
2501 /* Adds costs COST1 and COST2.  */
2502
2503 static comp_cost
2504 add_costs (comp_cost cost1, comp_cost cost2)
2505 {
2506   cost1.cost += cost2.cost;
2507   cost1.complexity += cost2.complexity;
2508
2509   return cost1;
2510 }
2511 /* Subtracts costs COST1 and COST2.  */
2512
2513 static comp_cost
2514 sub_costs (comp_cost cost1, comp_cost cost2)
2515 {
2516   cost1.cost -= cost2.cost;
2517   cost1.complexity -= cost2.complexity;
2518
2519   return cost1;
2520 }
2521
2522 /* Returns a negative number if COST1 < COST2, a positive number if
2523    COST1 > COST2, and 0 if COST1 = COST2.  */
2524
2525 static int
2526 compare_costs (comp_cost cost1, comp_cost cost2)
2527 {
2528   if (cost1.cost == cost2.cost)
2529     return cost1.complexity - cost2.complexity;
2530
2531   return cost1.cost - cost2.cost;
2532 }
2533
2534 /* Returns true if COST is infinite.  */
2535
2536 static bool
2537 infinite_cost_p (comp_cost cost)
2538 {
2539   return cost.cost == INFTY;
2540 }
2541
2542 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2543    on invariants DEPENDS_ON and that the value used in expressing it
2544    is VALUE.  */
2545
2546 static void
2547 set_use_iv_cost (struct ivopts_data *data,
2548                  struct iv_use *use, struct iv_cand *cand,
2549                  comp_cost cost, bitmap depends_on, tree value)
2550 {
2551   unsigned i, s;
2552
2553   if (infinite_cost_p (cost))
2554     {
2555       BITMAP_FREE (depends_on);
2556       return;
2557     }
2558
2559   if (data->consider_all_candidates)
2560     {
2561       use->cost_map[cand->id].cand = cand;
2562       use->cost_map[cand->id].cost = cost;
2563       use->cost_map[cand->id].depends_on = depends_on;
2564       use->cost_map[cand->id].value = value;
2565       return;
2566     }
2567
2568   /* n_map_members is a power of two, so this computes modulo.  */
2569   s = cand->id & (use->n_map_members - 1);
2570   for (i = s; i < use->n_map_members; i++)
2571     if (!use->cost_map[i].cand)
2572       goto found;
2573   for (i = 0; i < s; i++)
2574     if (!use->cost_map[i].cand)
2575       goto found;
2576
2577   gcc_unreachable ();
2578
2579 found:
2580   use->cost_map[i].cand = cand;
2581   use->cost_map[i].cost = cost;
2582   use->cost_map[i].depends_on = depends_on;
2583   use->cost_map[i].value = value;
2584 }
2585
2586 /* Gets cost of (USE, CANDIDATE) pair.  */
2587
2588 static struct cost_pair *
2589 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2590                  struct iv_cand *cand)
2591 {
2592   unsigned i, s;
2593   struct cost_pair *ret;
2594
2595   if (!cand)
2596     return NULL;
2597
2598   if (data->consider_all_candidates)
2599     {
2600       ret = use->cost_map + cand->id;
2601       if (!ret->cand)
2602         return NULL;
2603
2604       return ret;
2605     }
2606
2607   /* n_map_members is a power of two, so this computes modulo.  */
2608   s = cand->id & (use->n_map_members - 1);
2609   for (i = s; i < use->n_map_members; i++)
2610     if (use->cost_map[i].cand == cand)
2611       return use->cost_map + i;
2612
2613   for (i = 0; i < s; i++)
2614     if (use->cost_map[i].cand == cand)
2615       return use->cost_map + i;
2616
2617   return NULL;
2618 }
2619
2620 /* Returns estimate on cost of computing SEQ.  */
2621
2622 static unsigned
2623 seq_cost (rtx seq, bool speed)
2624 {
2625   unsigned cost = 0;
2626   rtx set;
2627
2628   for (; seq; seq = NEXT_INSN (seq))
2629     {
2630       set = single_set (seq);
2631       if (set)
2632         cost += rtx_cost (set, SET,speed);
2633       else
2634         cost++;
2635     }
2636
2637   return cost;
2638 }
2639
2640 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2641 static rtx
2642 produce_memory_decl_rtl (tree obj, int *regno)
2643 {
2644   rtx x;
2645
2646   gcc_assert (obj);
2647   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2648     {
2649       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2650       x = gen_rtx_SYMBOL_REF (Pmode, name);
2651       SET_SYMBOL_REF_DECL (x, obj);
2652       x = gen_rtx_MEM (DECL_MODE (obj), x);
2653       targetm.encode_section_info (obj, x, true);
2654     }
2655   else
2656     {
2657       x = gen_raw_REG (Pmode, (*regno)++);
2658       x = gen_rtx_MEM (DECL_MODE (obj), x);
2659     }
2660
2661   return x;
2662 }
2663
2664 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2665    walk_tree.  DATA contains the actual fake register number.  */
2666
2667 static tree
2668 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2669 {
2670   tree obj = NULL_TREE;
2671   rtx x = NULL_RTX;
2672   int *regno = (int *) data;
2673
2674   switch (TREE_CODE (*expr_p))
2675     {
2676     case ADDR_EXPR:
2677       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2678            handled_component_p (*expr_p);
2679            expr_p = &TREE_OPERAND (*expr_p, 0))
2680         continue;
2681       obj = *expr_p;
2682       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2683         x = produce_memory_decl_rtl (obj, regno);
2684       break;
2685
2686     case SSA_NAME:
2687       *ws = 0;
2688       obj = SSA_NAME_VAR (*expr_p);
2689       if (!DECL_RTL_SET_P (obj))
2690         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2691       break;
2692
2693     case VAR_DECL:
2694     case PARM_DECL:
2695     case RESULT_DECL:
2696       *ws = 0;
2697       obj = *expr_p;
2698
2699       if (DECL_RTL_SET_P (obj))
2700         break;
2701
2702       if (DECL_MODE (obj) == BLKmode)
2703         x = produce_memory_decl_rtl (obj, regno);
2704       else
2705         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2706
2707       break;
2708
2709     default:
2710       break;
2711     }
2712
2713   if (x)
2714     {
2715       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2716       SET_DECL_RTL (obj, x);
2717     }
2718
2719   return NULL_TREE;
2720 }
2721
2722 /* Determines cost of the computation of EXPR.  */
2723
2724 static unsigned
2725 computation_cost (tree expr, bool speed)
2726 {
2727   rtx seq, rslt;
2728   tree type = TREE_TYPE (expr);
2729   unsigned cost;
2730   /* Avoid using hard regs in ways which may be unsupported.  */
2731   int regno = LAST_VIRTUAL_REGISTER + 1;
2732   enum function_frequency real_frequency = cfun->function_frequency;
2733
2734   cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2735   crtl->maybe_hot_insn_p = speed;
2736   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2737   start_sequence ();
2738   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2739   seq = get_insns ();
2740   end_sequence ();
2741   default_rtl_profile ();
2742   cfun->function_frequency = real_frequency;
2743
2744   cost = seq_cost (seq, speed);
2745   if (MEM_P (rslt))
2746     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2747
2748   return cost;
2749 }
2750
2751 /* Returns variable containing the value of candidate CAND at statement AT.  */
2752
2753 static tree
2754 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2755 {
2756   if (stmt_after_increment (loop, cand, stmt))
2757     return cand->var_after;
2758   else
2759     return cand->var_before;
2760 }
2761
2762 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2763    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2764
2765 int
2766 tree_int_cst_sign_bit (const_tree t)
2767 {
2768   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2769   unsigned HOST_WIDE_INT w;
2770
2771   if (bitno < HOST_BITS_PER_WIDE_INT)
2772     w = TREE_INT_CST_LOW (t);
2773   else
2774     {
2775       w = TREE_INT_CST_HIGH (t);
2776       bitno -= HOST_BITS_PER_WIDE_INT;
2777     }
2778
2779   return (w >> bitno) & 1;
2780 }
2781
2782 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2783    same precision that is at least as wide as the precision of TYPE, stores
2784    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2785    type of A and B.  */
2786
2787 static tree
2788 determine_common_wider_type (tree *a, tree *b)
2789 {
2790   tree wider_type = NULL;
2791   tree suba, subb;
2792   tree atype = TREE_TYPE (*a);
2793
2794   if (CONVERT_EXPR_P (*a))
2795     {
2796       suba = TREE_OPERAND (*a, 0);
2797       wider_type = TREE_TYPE (suba);
2798       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2799         return atype;
2800     }
2801   else
2802     return atype;
2803
2804   if (CONVERT_EXPR_P (*b))
2805     {
2806       subb = TREE_OPERAND (*b, 0);
2807       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2808         return atype;
2809     }
2810   else
2811     return atype;
2812
2813   *a = suba;
2814   *b = subb;
2815   return wider_type;
2816 }
2817
2818 /* Determines the expression by that USE is expressed from induction variable
2819    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2820    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2821
2822 static bool
2823 get_computation_aff (struct loop *loop,
2824                      struct iv_use *use, struct iv_cand *cand, gimple at,
2825                      struct affine_tree_combination *aff)
2826 {
2827   tree ubase = use->iv->base;
2828   tree ustep = use->iv->step;
2829   tree cbase = cand->iv->base;
2830   tree cstep = cand->iv->step, cstep_common;
2831   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2832   tree common_type, var;
2833   tree uutype;
2834   aff_tree cbase_aff, var_aff;
2835   double_int rat;
2836
2837   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2838     {
2839       /* We do not have a precision to express the values of use.  */
2840       return false;
2841     }
2842
2843   var = var_at_stmt (loop, cand, at);
2844   uutype = unsigned_type_for (utype);
2845
2846   /* If the conversion is not noop, perform it.  */
2847   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2848     {
2849       cstep = fold_convert (uutype, cstep);
2850       cbase = fold_convert (uutype, cbase);
2851       var = fold_convert (uutype, var);
2852     }
2853
2854   if (!constant_multiple_of (ustep, cstep, &rat))
2855     return false;
2856
2857   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2858      type, we achieve better folding by computing their difference in this
2859      wider type, and cast the result to UUTYPE.  We do not need to worry about
2860      overflows, as all the arithmetics will in the end be performed in UUTYPE
2861      anyway.  */
2862   common_type = determine_common_wider_type (&ubase, &cbase);
2863
2864   /* use = ubase - ratio * cbase + ratio * var.  */
2865   tree_to_aff_combination (ubase, common_type, aff);
2866   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2867   tree_to_aff_combination (var, uutype, &var_aff);
2868
2869   /* We need to shift the value if we are after the increment.  */
2870   if (stmt_after_increment (loop, cand, at))
2871     {
2872       aff_tree cstep_aff;
2873
2874       if (common_type != uutype)
2875         cstep_common = fold_convert (common_type, cstep);
2876       else
2877         cstep_common = cstep;
2878
2879       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2880       aff_combination_add (&cbase_aff, &cstep_aff);
2881     }
2882
2883   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2884   aff_combination_add (aff, &cbase_aff);
2885   if (common_type != uutype)
2886     aff_combination_convert (aff, uutype);
2887
2888   aff_combination_scale (&var_aff, rat);
2889   aff_combination_add (aff, &var_aff);
2890
2891   return true;
2892 }
2893
2894 /* Determines the expression by that USE is expressed from induction variable
2895    CAND at statement AT in LOOP.  The computation is unshared.  */
2896
2897 static tree
2898 get_computation_at (struct loop *loop,
2899                     struct iv_use *use, struct iv_cand *cand, gimple at)
2900 {
2901   aff_tree aff;
2902   tree type = TREE_TYPE (use->iv->base);
2903
2904   if (!get_computation_aff (loop, use, cand, at, &aff))
2905     return NULL_TREE;
2906   unshare_aff_combination (&aff);
2907   return fold_convert (type, aff_combination_to_tree (&aff));
2908 }
2909
2910 /* Determines the expression by that USE is expressed from induction variable
2911    CAND in LOOP.  The computation is unshared.  */
2912
2913 static tree
2914 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2915 {
2916   return get_computation_at (loop, use, cand, use->stmt);
2917 }
2918
2919 /* Returns cost of addition in MODE.  */
2920
2921 static unsigned
2922 add_cost (enum machine_mode mode, bool speed)
2923 {
2924   static unsigned costs[NUM_MACHINE_MODES];
2925   rtx seq;
2926   unsigned cost;
2927
2928   if (costs[mode])
2929     return costs[mode];
2930
2931   start_sequence ();
2932   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2933                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2934                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2935                  NULL_RTX);
2936   seq = get_insns ();
2937   end_sequence ();
2938
2939   cost = seq_cost (seq, speed);
2940   if (!cost)
2941     cost = 1;
2942
2943   costs[mode] = cost;
2944
2945   if (dump_file && (dump_flags & TDF_DETAILS))
2946     fprintf (dump_file, "Addition in %s costs %d\n",
2947              GET_MODE_NAME (mode), cost);
2948   return cost;
2949 }
2950
2951 /* Entry in a hashtable of already known costs for multiplication.  */
2952 struct mbc_entry
2953 {
2954   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2955   enum machine_mode mode;       /* In mode.  */
2956   unsigned cost;                /* The cost.  */
2957 };
2958
2959 /* Counts hash value for the ENTRY.  */
2960
2961 static hashval_t
2962 mbc_entry_hash (const void *entry)
2963 {
2964   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2965
2966   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2967 }
2968
2969 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2970
2971 static int
2972 mbc_entry_eq (const void *entry1, const void *entry2)
2973 {
2974   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2975   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2976
2977   return (e1->mode == e2->mode
2978           && e1->cst == e2->cst);
2979 }
2980
2981 /* Returns cost of multiplication by constant CST in MODE.  */
2982
2983 unsigned
2984 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2985 {
2986   static htab_t costs;
2987   struct mbc_entry **cached, act;
2988   rtx seq;
2989   unsigned cost;
2990
2991   if (!costs)
2992     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2993
2994   act.mode = mode;
2995   act.cst = cst;
2996   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2997   if (*cached)
2998     return (*cached)->cost;
2999
3000   *cached = XNEW (struct mbc_entry);
3001   (*cached)->mode = mode;
3002   (*cached)->cst = cst;
3003
3004   start_sequence ();
3005   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3006                gen_int_mode (cst, mode), NULL_RTX, 0);
3007   seq = get_insns ();
3008   end_sequence ();
3009
3010   cost = seq_cost (seq, speed);
3011
3012   if (dump_file && (dump_flags & TDF_DETAILS))
3013     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3014              (int) cst, GET_MODE_NAME (mode), cost);
3015
3016   (*cached)->cost = cost;
3017
3018   return cost;
3019 }
3020
3021 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3022    validity for a memory reference accessing memory of mode MODE.  */
3023
3024 bool
3025 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
3026 {
3027 #define MAX_RATIO 128
3028   static sbitmap valid_mult[MAX_MACHINE_MODE];
3029
3030   if (!valid_mult[mode])
3031     {
3032       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3033       rtx addr;
3034       HOST_WIDE_INT i;
3035
3036       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
3037       sbitmap_zero (valid_mult[mode]);
3038       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3039       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3040         {
3041           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3042           if (memory_address_p (mode, addr))
3043             SET_BIT (valid_mult[mode], i + MAX_RATIO);
3044         }
3045
3046       if (dump_file && (dump_flags & TDF_DETAILS))
3047         {
3048           fprintf (dump_file, "  allowed multipliers:");
3049           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3050             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
3051               fprintf (dump_file, " %d", (int) i);
3052           fprintf (dump_file, "\n");
3053           fprintf (dump_file, "\n");
3054         }
3055     }
3056
3057   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3058     return false;
3059
3060   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
3061 }
3062
3063 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3064    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3065    variable is omitted.  Compute the cost for a memory reference that accesses
3066    a memory location of mode MEM_MODE.
3067
3068    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3069    size of MEM_MODE / RATIO) is available.  To make this determination, we
3070    look at the size of the increment to be made, which is given in CSTEP.
3071    CSTEP may be zero if the step is unknown.
3072    STMT_AFTER_INC is true iff the statement we're looking at is after the
3073    increment of the original biv.
3074
3075    TODO -- there must be some better way.  This all is quite crude.  */
3076
3077 static comp_cost
3078 get_address_cost (bool symbol_present, bool var_present,
3079                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3080                   HOST_WIDE_INT cstep, enum machine_mode mem_mode, bool speed,
3081                   bool stmt_after_inc, bool *may_autoinc)
3082 {
3083   static bool initialized[MAX_MACHINE_MODE];
3084   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3085   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3086   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3087   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3088   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3089   unsigned cost, acost, complexity;
3090   bool offset_p, ratio_p, autoinc;
3091   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3092   unsigned HOST_WIDE_INT mask;
3093   unsigned bits;
3094
3095   if (!initialized[mem_mode])
3096     {
3097       HOST_WIDE_INT i;
3098       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3099       int old_cse_not_expected;
3100       unsigned sym_p, var_p, off_p, rat_p, add_c;
3101       rtx seq, addr, base;
3102       rtx reg0, reg1;
3103
3104       initialized[mem_mode] = true;
3105
3106       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3107
3108       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3109       for (i = start; i <= 1 << 20; i <<= 1)
3110         {
3111           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3112           if (!memory_address_p (mem_mode, addr))
3113             break;
3114         }
3115       max_offset[mem_mode] = i == start ? 0 : i >> 1;
3116       off[mem_mode] = max_offset[mem_mode];
3117
3118       for (i = start; i <= 1 << 20; i <<= 1)
3119         {
3120           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3121           if (!memory_address_p (mem_mode, addr))
3122             break;
3123         }
3124       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3125
3126       if (dump_file && (dump_flags & TDF_DETAILS))
3127         {
3128           fprintf (dump_file, "get_address_cost:\n");
3129           fprintf (dump_file, "  min offset %s %d\n",
3130                    GET_MODE_NAME (mem_mode),
3131                    (int) min_offset[mem_mode]);
3132           fprintf (dump_file, "  max offset %s %d\n",
3133                    GET_MODE_NAME (mem_mode),
3134                    (int) max_offset[mem_mode]);
3135         }
3136
3137       rat[mem_mode] = 1;
3138       for (i = 2; i <= MAX_RATIO; i++)
3139         if (multiplier_allowed_in_address_p (i, mem_mode))
3140           {
3141             rat[mem_mode] = i;
3142             break;
3143           }
3144
3145       /* Compute the cost of various addressing modes.  */
3146       acost = 0;
3147       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3148       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3149
3150       if (HAVE_PRE_DECREMENT)
3151         {
3152           addr = gen_rtx_PRE_DEC (Pmode, reg0);
3153           has_predec[mem_mode] = memory_address_p (mem_mode, addr);
3154         }
3155       if (HAVE_POST_DECREMENT)
3156         {
3157           addr = gen_rtx_POST_DEC (Pmode, reg0);
3158           has_postdec[mem_mode] = memory_address_p (mem_mode, addr);
3159         }
3160       if (HAVE_PRE_INCREMENT)
3161         {
3162           addr = gen_rtx_PRE_INC (Pmode, reg0);
3163           has_preinc[mem_mode] = memory_address_p (mem_mode, addr);
3164         }
3165       if (HAVE_POST_INCREMENT)
3166         {
3167           addr = gen_rtx_POST_INC (Pmode, reg0);
3168           has_postinc[mem_mode] = memory_address_p (mem_mode, addr);
3169         }
3170       for (i = 0; i < 16; i++)
3171         {
3172           sym_p = i & 1;
3173           var_p = (i >> 1) & 1;
3174           off_p = (i >> 2) & 1;
3175           rat_p = (i >> 3) & 1;
3176
3177           addr = reg0;
3178           if (rat_p)
3179             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3180                                    gen_int_mode (rat[mem_mode], Pmode));
3181
3182           if (var_p)
3183             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3184
3185           if (sym_p)
3186             {
3187               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3188               /* ??? We can run into trouble with some backends by presenting
3189                  it with symbols which haven't been properly passed through
3190                  targetm.encode_section_info.  By setting the local bit, we
3191                  enhance the probability of things working.  */
3192               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3193
3194               if (off_p)
3195                 base = gen_rtx_fmt_e (CONST, Pmode,
3196                                       gen_rtx_fmt_ee (PLUS, Pmode,
3197                                                       base,
3198                                                       gen_int_mode (off[mem_mode],
3199                                                                     Pmode)));
3200             }
3201           else if (off_p)
3202             base = gen_int_mode (off[mem_mode], Pmode);
3203           else
3204             base = NULL_RTX;
3205
3206           if (base)
3207             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3208
3209           start_sequence ();
3210           /* To avoid splitting addressing modes, pretend that no cse will
3211              follow.  */
3212           old_cse_not_expected = cse_not_expected;
3213           cse_not_expected = true;
3214           addr = memory_address (mem_mode, addr);
3215           cse_not_expected = old_cse_not_expected;
3216           seq = get_insns ();
3217           end_sequence ();
3218
3219           acost = seq_cost (seq, speed);
3220           acost += address_cost (addr, mem_mode, speed);
3221
3222           if (!acost)
3223             acost = 1;
3224           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3225         }
3226
3227       /* On some targets, it is quite expensive to load symbol to a register,
3228          which makes addresses that contain symbols look much more expensive.
3229          However, the symbol will have to be loaded in any case before the
3230          loop (and quite likely we have it in register already), so it does not
3231          make much sense to penalize them too heavily.  So make some final
3232          tweaks for the SYMBOL_PRESENT modes:
3233
3234          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3235          var is cheaper, use this mode with small penalty.
3236          If VAR_PRESENT is true, try whether the mode with
3237          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3238          if this is the case, use it.  */
3239       add_c = add_cost (Pmode, speed);
3240       for (i = 0; i < 8; i++)
3241         {
3242           var_p = i & 1;
3243           off_p = (i >> 1) & 1;
3244           rat_p = (i >> 2) & 1;
3245
3246           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3247           if (var_p)
3248             acost += add_c;
3249
3250           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3251             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3252         }
3253
3254       if (dump_file && (dump_flags & TDF_DETAILS))
3255         {
3256           fprintf (dump_file, "Address costs:\n");
3257
3258           for (i = 0; i < 16; i++)
3259             {
3260               sym_p = i & 1;
3261               var_p = (i >> 1) & 1;
3262               off_p = (i >> 2) & 1;
3263               rat_p = (i >> 3) & 1;
3264
3265               fprintf (dump_file, "  ");
3266               if (sym_p)
3267                 fprintf (dump_file, "sym + ");
3268               if (var_p)
3269                 fprintf (dump_file, "var + ");
3270               if (off_p)
3271                 fprintf (dump_file, "cst + ");
3272               if (rat_p)
3273                 fprintf (dump_file, "rat * ");
3274
3275               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3276               fprintf (dump_file, "index costs %d\n", acost);
3277             }
3278           if (has_predec[mem_mode] || has_postdec[mem_mode]
3279               || has_preinc[mem_mode] || has_postinc[mem_mode])
3280             fprintf (dump_file, "  May include autoinc/dec\n");
3281           fprintf (dump_file, "\n");
3282         }
3283     }
3284
3285   bits = GET_MODE_BITSIZE (Pmode);
3286   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3287   offset &= mask;
3288   if ((offset >> (bits - 1) & 1))
3289     offset |= ~mask;
3290   s_offset = offset;
3291
3292   autoinc = false;
3293   msize = GET_MODE_SIZE (mem_mode);
3294   autoinc_offset = offset;
3295   if (stmt_after_inc)
3296     autoinc_offset += ratio * cstep;
3297   if (symbol_present || var_present || ratio != 1)
3298     autoinc = false;
3299   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3300                && msize == cstep)
3301            || (has_postdec[mem_mode] && autoinc_offset == 0
3302                && msize == -cstep)
3303            || (has_preinc[mem_mode] && autoinc_offset == msize
3304                && msize == cstep)
3305            || (has_predec[mem_mode] && autoinc_offset == -msize
3306                && msize == -cstep))
3307     autoinc = true;
3308
3309   cost = 0;
3310   offset_p = (s_offset != 0
3311               && min_offset[mem_mode] <= s_offset
3312               && s_offset <= max_offset[mem_mode]);
3313   ratio_p = (ratio != 1
3314              && multiplier_allowed_in_address_p (ratio, mem_mode));
3315
3316   if (ratio != 1 && !ratio_p)
3317     cost += multiply_by_cost (ratio, Pmode, speed);
3318
3319   if (s_offset && !offset_p && !symbol_present)
3320     cost += add_cost (Pmode, speed);
3321
3322   if (may_autoinc)
3323     *may_autoinc = autoinc;
3324   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3325   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3326   return new_cost (cost + acost, complexity);
3327 }
3328
3329 /* Estimates cost of forcing expression EXPR into a variable.  */
3330
3331 static comp_cost
3332 force_expr_to_var_cost (tree expr, bool speed)
3333 {
3334   static bool costs_initialized = false;
3335   static unsigned integer_cost [2];
3336   static unsigned symbol_cost [2];
3337   static unsigned address_cost [2];
3338   tree op0, op1;
3339   comp_cost cost0, cost1, cost;
3340   enum machine_mode mode;
3341
3342   if (!costs_initialized)
3343     {
3344       tree type = build_pointer_type (integer_type_node);
3345       tree var, addr;
3346       rtx x;
3347       int i;
3348
3349       var = create_tmp_var_raw (integer_type_node, "test_var");
3350       TREE_STATIC (var) = 1;
3351       x = produce_memory_decl_rtl (var, NULL);
3352       SET_DECL_RTL (var, x);
3353
3354       addr = build1 (ADDR_EXPR, type, var);
3355
3356
3357       for (i = 0; i < 2; i++)
3358         {
3359           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3360                                                              2000), i);
3361
3362           symbol_cost[i] = computation_cost (addr, i) + 1;
3363
3364           address_cost[i]
3365             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3366                                         addr,
3367                                         build_int_cst (sizetype, 2000)), i) + 1;
3368           if (dump_file && (dump_flags & TDF_DETAILS))
3369             {
3370               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3371               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3372               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3373               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3374               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3375               fprintf (dump_file, "\n");
3376             }
3377         }
3378
3379       costs_initialized = true;
3380     }
3381
3382   STRIP_NOPS (expr);
3383
3384   if (SSA_VAR_P (expr))
3385     return zero_cost;
3386
3387   if (is_gimple_min_invariant (expr))
3388     {
3389       if (TREE_CODE (expr) == INTEGER_CST)
3390         return new_cost (integer_cost [speed], 0);
3391
3392       if (TREE_CODE (expr) == ADDR_EXPR)
3393         {
3394           tree obj = TREE_OPERAND (expr, 0);
3395
3396           if (TREE_CODE (obj) == VAR_DECL
3397               || TREE_CODE (obj) == PARM_DECL
3398               || TREE_CODE (obj) == RESULT_DECL)
3399             return new_cost (symbol_cost [speed], 0);
3400         }
3401
3402       return new_cost (address_cost [speed], 0);
3403     }
3404
3405   switch (TREE_CODE (expr))
3406     {
3407     case POINTER_PLUS_EXPR:
3408     case PLUS_EXPR:
3409     case MINUS_EXPR:
3410     case MULT_EXPR:
3411       op0 = TREE_OPERAND (expr, 0);
3412       op1 = TREE_OPERAND (expr, 1);
3413       STRIP_NOPS (op0);
3414       STRIP_NOPS (op1);
3415
3416       if (is_gimple_val (op0))
3417         cost0 = zero_cost;
3418       else
3419         cost0 = force_expr_to_var_cost (op0, speed);
3420
3421       if (is_gimple_val (op1))
3422         cost1 = zero_cost;
3423       else
3424         cost1 = force_expr_to_var_cost (op1, speed);
3425
3426       break;
3427
3428     case NEGATE_EXPR:
3429       op0 = TREE_OPERAND (expr, 0);
3430       STRIP_NOPS (op0);
3431       op1 = NULL_TREE;
3432
3433       if (is_gimple_val (op0))
3434         cost0 = zero_cost;
3435       else
3436         cost0 = force_expr_to_var_cost (op0, speed);
3437
3438       cost1 = zero_cost;
3439       break;
3440
3441     default:
3442       /* Just an arbitrary value, FIXME.  */
3443       return new_cost (target_spill_cost[speed], 0);
3444     }
3445
3446   mode = TYPE_MODE (TREE_TYPE (expr));
3447   switch (TREE_CODE (expr))
3448     {
3449     case POINTER_PLUS_EXPR:
3450     case PLUS_EXPR:
3451     case MINUS_EXPR:
3452     case NEGATE_EXPR:
3453       cost = new_cost (add_cost (mode, speed), 0);
3454       break;
3455
3456     case MULT_EXPR:
3457       if (cst_and_fits_in_hwi (op0))
3458         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3459       else if (cst_and_fits_in_hwi (op1))
3460         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3461       else
3462         return new_cost (target_spill_cost [speed], 0);
3463       break;
3464
3465     default:
3466       gcc_unreachable ();
3467     }
3468
3469   cost = add_costs (cost, cost0);
3470   cost = add_costs (cost, cost1);
3471
3472   /* Bound the cost by target_spill_cost.  The parts of complicated
3473      computations often are either loop invariant or at least can
3474      be shared between several iv uses, so letting this grow without
3475      limits would not give reasonable results.  */
3476   if (cost.cost > (int) target_spill_cost [speed])
3477     cost.cost = target_spill_cost [speed];
3478
3479   return cost;
3480 }
3481
3482 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3483    invariants the computation depends on.  */
3484
3485 static comp_cost
3486 force_var_cost (struct ivopts_data *data,
3487                 tree expr, bitmap *depends_on)
3488 {
3489   if (depends_on)
3490     {
3491       fd_ivopts_data = data;
3492       walk_tree (&expr, find_depends, depends_on, NULL);
3493     }
3494
3495   return force_expr_to_var_cost (expr, data->speed);
3496 }
3497
3498 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3499    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3500    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3501    invariants the computation depends on.  */
3502
3503 static comp_cost
3504 split_address_cost (struct ivopts_data *data,
3505                     tree addr, bool *symbol_present, bool *var_present,
3506                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3507 {
3508   tree core;
3509   HOST_WIDE_INT bitsize;
3510   HOST_WIDE_INT bitpos;
3511   tree toffset;
3512   enum machine_mode mode;
3513   int unsignedp, volatilep;
3514
3515   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3516                               &unsignedp, &volatilep, false);
3517
3518   if (toffset != 0
3519       || bitpos % BITS_PER_UNIT != 0
3520       || TREE_CODE (core) != VAR_DECL)
3521     {
3522       *symbol_present = false;
3523       *var_present = true;
3524       fd_ivopts_data = data;
3525       walk_tree (&addr, find_depends, depends_on, NULL);
3526       return new_cost (target_spill_cost[data->speed], 0);
3527     }
3528
3529   *offset += bitpos / BITS_PER_UNIT;
3530   if (TREE_STATIC (core)
3531       || DECL_EXTERNAL (core))
3532     {
3533       *symbol_present = true;
3534       *var_present = false;
3535       return zero_cost;
3536     }
3537
3538   *symbol_present = false;
3539   *var_present = true;
3540   return zero_cost;
3541 }
3542
3543 /* Estimates cost of expressing difference of addresses E1 - E2 as
3544    var + symbol + offset.  The value of offset is added to OFFSET,
3545    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3546    part is missing.  DEPENDS_ON is a set of the invariants the computation
3547    depends on.  */
3548
3549 static comp_cost
3550 ptr_difference_cost (struct ivopts_data *data,
3551                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3552                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3553 {
3554   HOST_WIDE_INT diff = 0;
3555   aff_tree aff_e1, aff_e2;
3556   tree type;
3557
3558   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3559
3560   if (ptr_difference_const (e1, e2, &diff))
3561     {
3562       *offset += diff;
3563       *symbol_present = false;
3564       *var_present = false;
3565       return zero_cost;
3566     }
3567
3568   if (integer_zerop (e2))
3569     return split_address_cost (data, TREE_OPERAND (e1, 0),
3570                                symbol_present, var_present, offset, depends_on);
3571
3572   *symbol_present = false;
3573   *var_present = true;
3574
3575   type = signed_type_for (TREE_TYPE (e1));
3576   tree_to_aff_combination (e1, type, &aff_e1);
3577   tree_to_aff_combination (e2, type, &aff_e2);
3578   aff_combination_scale (&aff_e2, double_int_minus_one);
3579   aff_combination_add (&aff_e1, &aff_e2);
3580
3581   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3582 }
3583
3584 /* Estimates cost of expressing difference E1 - E2 as
3585    var + symbol + offset.  The value of offset is added to OFFSET,
3586    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3587    part is missing.  DEPENDS_ON is a set of the invariants the computation
3588    depends on.  */
3589
3590 static comp_cost
3591 difference_cost (struct ivopts_data *data,
3592                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3593                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3594 {
3595   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3596   unsigned HOST_WIDE_INT off1, off2;
3597   aff_tree aff_e1, aff_e2;
3598   tree type;
3599
3600   e1 = strip_offset (e1, &off1);
3601   e2 = strip_offset (e2, &off2);
3602   *offset += off1 - off2;
3603
3604   STRIP_NOPS (e1);
3605   STRIP_NOPS (e2);
3606
3607   if (TREE_CODE (e1) == ADDR_EXPR)
3608     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3609                                 offset, depends_on);
3610   *symbol_present = false;
3611
3612   if (operand_equal_p (e1, e2, 0))
3613     {
3614       *var_present = false;
3615       return zero_cost;
3616     }
3617
3618   *var_present = true;
3619
3620   if (integer_zerop (e2))
3621     return force_var_cost (data, e1, depends_on);
3622
3623   if (integer_zerop (e1))
3624     {
3625       comp_cost cost = force_var_cost (data, e2, depends_on);
3626       cost.cost += multiply_by_cost (-1, mode, data->speed);
3627       return cost;
3628     }
3629
3630   type = signed_type_for (TREE_TYPE (e1));
3631   tree_to_aff_combination (e1, type, &aff_e1);
3632   tree_to_aff_combination (e2, type, &aff_e2);
3633   aff_combination_scale (&aff_e2, double_int_minus_one);
3634   aff_combination_add (&aff_e1, &aff_e2);
3635
3636   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3637 }
3638
3639 /* Determines the cost of the computation by that USE is expressed
3640    from induction variable CAND.  If ADDRESS_P is true, we just need
3641    to create an address from it, otherwise we want to get it into
3642    register.  A set of invariants we depend on is stored in
3643    DEPENDS_ON.  AT is the statement at that the value is computed.
3644    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3645    addressing is likely.  */
3646
3647 static comp_cost
3648 get_computation_cost_at (struct ivopts_data *data,
3649                          struct iv_use *use, struct iv_cand *cand,
3650                          bool address_p, bitmap *depends_on, gimple at,
3651                          bool *can_autoinc)
3652 {
3653   tree ubase = use->iv->base, ustep = use->iv->step;
3654   tree cbase, cstep;
3655   tree utype = TREE_TYPE (ubase), ctype;
3656   unsigned HOST_WIDE_INT cstepi, offset = 0;
3657   HOST_WIDE_INT ratio, aratio;
3658   bool var_present, symbol_present, stmt_is_after_inc;
3659   comp_cost cost;
3660   double_int rat;
3661   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3662
3663   *depends_on = NULL;
3664
3665   /* Only consider real candidates.  */
3666   if (!cand->iv)
3667     return infinite_cost;
3668
3669   cbase = cand->iv->base;
3670   cstep = cand->iv->step;
3671   ctype = TREE_TYPE (cbase);
3672
3673   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3674     {
3675       /* We do not have a precision to express the values of use.  */
3676       return infinite_cost;
3677     }
3678
3679   if (address_p)
3680     {
3681       /* Do not try to express address of an object with computation based
3682          on address of a different object.  This may cause problems in rtl
3683          level alias analysis (that does not expect this to be happening,
3684          as this is illegal in C), and would be unlikely to be useful
3685          anyway.  */
3686       if (use->iv->base_object
3687           && cand->iv->base_object
3688           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3689         return infinite_cost;
3690     }
3691
3692   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3693     {
3694       /* TODO -- add direct handling of this case.  */
3695       goto fallback;
3696     }
3697
3698   /* CSTEPI is removed from the offset in case statement is after the
3699      increment.  If the step is not constant, we use zero instead.
3700      This is a bit imprecise (there is the extra addition), but
3701      redundancy elimination is likely to transform the code so that
3702      it uses value of the variable before increment anyway,
3703      so it is not that much unrealistic.  */
3704   if (cst_and_fits_in_hwi (cstep))
3705     cstepi = int_cst_value (cstep);
3706   else
3707     cstepi = 0;
3708
3709   if (!constant_multiple_of (ustep, cstep, &rat))
3710     return infinite_cost;
3711
3712   if (double_int_fits_in_shwi_p (rat))
3713     ratio = double_int_to_shwi (rat);
3714   else
3715     return infinite_cost;
3716
3717   STRIP_NOPS (cbase);
3718   ctype = TREE_TYPE (cbase);
3719
3720   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3721      or ratio == 1, it is better to handle this like
3722
3723      ubase - ratio * cbase + ratio * var
3724
3725      (also holds in the case ratio == -1, TODO.  */
3726
3727   if (cst_and_fits_in_hwi (cbase))
3728     {
3729       offset = - ratio * int_cst_value (cbase);
3730       cost = difference_cost (data,
3731                               ubase, build_int_cst (utype, 0),
3732                               &symbol_present, &var_present, &offset,
3733                               depends_on);
3734     }
3735   else if (ratio == 1)
3736     {
3737       cost = difference_cost (data,
3738                               ubase, cbase,
3739                               &symbol_present, &var_present, &offset,
3740                               depends_on);
3741     }
3742   else if (address_p
3743            && !POINTER_TYPE_P (ctype)
3744            && multiplier_allowed_in_address_p (ratio,
3745                                                TYPE_MODE (TREE_TYPE (utype))))
3746     {
3747       cbase
3748         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3749       cost = difference_cost (data,
3750                               ubase, cbase,
3751                               &symbol_present, &var_present, &offset,
3752                               depends_on);
3753     }
3754   else
3755     {
3756       cost = force_var_cost (data, cbase, depends_on);
3757       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3758       cost = add_costs (cost,
3759                         difference_cost (data,
3760                                          ubase, build_int_cst (utype, 0),
3761                                          &symbol_present, &var_present,
3762                                          &offset, depends_on));
3763     }
3764
3765   /* If we are after the increment, the value of the candidate is higher by
3766      one iteration.  */
3767   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3768   if (stmt_is_after_inc)
3769     offset -= ratio * cstepi;
3770
3771   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3772      (symbol/var1/const parts may be omitted).  If we are looking for an
3773      address, find the cost of addressing this.  */
3774   if (address_p)
3775     return add_costs (cost,
3776                       get_address_cost (symbol_present, var_present,
3777                                         offset, ratio, cstepi,
3778                                         TYPE_MODE (TREE_TYPE (utype)),
3779                                         speed, stmt_is_after_inc,
3780                                         can_autoinc));
3781
3782   /* Otherwise estimate the costs for computing the expression.  */
3783   if (!symbol_present && !var_present && !offset)
3784     {
3785       if (ratio != 1)
3786         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3787       return cost;
3788     }
3789
3790   /* Symbol + offset should be compile-time computable so consider that they
3791       are added once to the variable, if present.  */
3792   if (var_present && (symbol_present || offset))
3793     cost.cost += add_cost (TYPE_MODE (ctype), speed)
3794                  / AVG_LOOP_NITER (data->current_loop);
3795
3796   /* Having offset does not affect runtime cost in case it is added to
3797      symbol, but it increases complexity.  */
3798   if (offset)
3799     cost.complexity++;
3800
3801   cost.cost += add_cost (TYPE_MODE (ctype), speed);
3802
3803   aratio = ratio > 0 ? ratio : -ratio;
3804   if (aratio != 1)
3805     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3806
3807 fallback:
3808   if (can_autoinc)
3809     *can_autoinc = false;
3810
3811   {
3812     /* Just get the expression, expand it and measure the cost.  */
3813     tree comp = get_computation_at (data->current_loop, use, cand, at);
3814
3815     if (!comp)
3816       return infinite_cost;
3817
3818     if (address_p)
3819       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3820
3821     return new_cost (computation_cost (comp, speed), 0);
3822   }
3823 }
3824
3825 /* Determines the cost of the computation by that USE is expressed
3826    from induction variable CAND.  If ADDRESS_P is true, we just need
3827    to create an address from it, otherwise we want to get it into
3828    register.  A set of invariants we depend on is stored in
3829    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
3830    autoinc addressing is likely.  */
3831
3832 static comp_cost
3833 get_computation_cost (struct ivopts_data *data,
3834                       struct iv_use *use, struct iv_cand *cand,
3835                       bool address_p, bitmap *depends_on, bool *can_autoinc)
3836 {
3837   return get_computation_cost_at (data,
3838                                   use, cand, address_p, depends_on, use->stmt,
3839                                   can_autoinc);
3840 }
3841
3842 /* Determines cost of basing replacement of USE on CAND in a generic
3843    expression.  */
3844
3845 static bool
3846 determine_use_iv_cost_generic (struct ivopts_data *data,
3847                                struct iv_use *use, struct iv_cand *cand)
3848 {
3849   bitmap depends_on;
3850   comp_cost cost;
3851
3852   /* The simple case first -- if we need to express value of the preserved
3853      original biv, the cost is 0.  This also prevents us from counting the
3854      cost of increment twice -- once at this use and once in the cost of
3855      the candidate.  */
3856   if (cand->pos == IP_ORIGINAL
3857       && cand->incremented_at == use->stmt)
3858     {
3859       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3860       return true;
3861     }
3862
3863   cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3864   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3865
3866   return !infinite_cost_p (cost);
3867 }
3868
3869 /* Determines cost of basing replacement of USE on CAND in an address.  */
3870
3871 static bool
3872 determine_use_iv_cost_address (struct ivopts_data *data,
3873                                struct iv_use *use, struct iv_cand *cand)
3874 {
3875   bitmap depends_on;
3876   bool can_autoinc;
3877   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3878                                          &can_autoinc);
3879
3880   if (cand->ainc_use == use)
3881     {
3882       if (can_autoinc)
3883         cost.cost -= cand->cost_step;
3884       /* If we generated the candidate solely for exploiting autoincrement
3885          opportunities, and it turns out it can't be used, set the cost to
3886          infinity to make sure we ignore it.  */
3887       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3888         cost = infinite_cost;
3889     }
3890   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3891
3892   return !infinite_cost_p (cost);
3893 }
3894
3895 /* Computes value of candidate CAND at position AT in iteration NITER, and
3896    stores it to VAL.  */
3897
3898 static void
3899 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3900                aff_tree *val)
3901 {
3902   aff_tree step, delta, nit;
3903   struct iv *iv = cand->iv;
3904   tree type = TREE_TYPE (iv->base);
3905   tree steptype = type;
3906   if (POINTER_TYPE_P (type))
3907     steptype = sizetype;
3908
3909   tree_to_aff_combination (iv->step, steptype, &step);
3910   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3911   aff_combination_convert (&nit, steptype);
3912   aff_combination_mult (&nit, &step, &delta);
3913   if (stmt_after_increment (loop, cand, at))
3914     aff_combination_add (&delta, &step);
3915
3916   tree_to_aff_combination (iv->base, type, val);
3917   aff_combination_add (val, &delta);
3918 }
3919
3920 /* Returns period of induction variable iv.  */
3921
3922 static tree
3923 iv_period (struct iv *iv)
3924 {
3925   tree step = iv->step, period, type;
3926   tree pow2div;
3927
3928   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3929
3930   /* Period of the iv is gcd (step, type range).  Since type range is power
3931      of two, it suffices to determine the maximum power of two that divides
3932      step.  */
3933   pow2div = num_ending_zeros (step);
3934   type = unsigned_type_for (TREE_TYPE (step));
3935
3936   period = build_low_bits_mask (type,
3937                                 (TYPE_PRECISION (type)
3938                                  - tree_low_cst (pow2div, 1)));
3939
3940   return period;
3941 }
3942
3943 /* Returns the comparison operator used when eliminating the iv USE.  */
3944
3945 static enum tree_code
3946 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3947 {
3948   struct loop *loop = data->current_loop;
3949   basic_block ex_bb;
3950   edge exit;
3951
3952   ex_bb = gimple_bb (use->stmt);
3953   exit = EDGE_SUCC (ex_bb, 0);
3954   if (flow_bb_inside_loop_p (loop, exit->dest))
3955     exit = EDGE_SUCC (ex_bb, 1);
3956
3957   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3958 }
3959
3960 /* Check whether it is possible to express the condition in USE by comparison
3961    of candidate CAND.  If so, store the value compared with to BOUND.  */
3962
3963 static bool
3964 may_eliminate_iv (struct ivopts_data *data,
3965                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3966 {
3967   basic_block ex_bb;
3968   edge exit;
3969   tree nit, period;
3970   struct loop *loop = data->current_loop;
3971   aff_tree bnd;
3972
3973   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3974     return false;
3975
3976   /* For now works only for exits that dominate the loop latch.
3977      TODO: extend to other conditions inside loop body.  */
3978   ex_bb = gimple_bb (use->stmt);
3979   if (use->stmt != last_stmt (ex_bb)
3980       || gimple_code (use->stmt) != GIMPLE_COND
3981       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3982     return false;
3983
3984   exit = EDGE_SUCC (ex_bb, 0);
3985   if (flow_bb_inside_loop_p (loop, exit->dest))
3986     exit = EDGE_SUCC (ex_bb, 1);
3987   if (flow_bb_inside_loop_p (loop, exit->dest))
3988     return false;
3989
3990   nit = niter_for_exit (data, exit);
3991   if (!nit)
3992     return false;
3993
3994   /* Determine whether we can use the variable to test the exit condition.
3995      This is the case iff the period of the induction variable is greater
3996      than the number of iterations for which the exit condition is true.  */
3997   period = iv_period (cand->iv);
3998
3999   /* If the number of iterations is constant, compare against it directly.  */
4000   if (TREE_CODE (nit) == INTEGER_CST)
4001     {
4002       if (!tree_int_cst_lt (nit, period))
4003         return false;
4004     }
4005
4006   /* If not, and if this is the only possible exit of the loop, see whether
4007      we can get a conservative estimate on the number of iterations of the
4008      entire loop and compare against that instead.  */
4009   else if (loop_only_exit_p (loop, exit))
4010     {
4011       double_int period_value, max_niter;
4012       if (!estimated_loop_iterations (loop, true, &max_niter))
4013         return false;
4014       period_value = tree_to_double_int (period);
4015       if (double_int_ucmp (max_niter, period_value) >= 0)
4016         return false;
4017     }
4018
4019   /* Otherwise, punt.  */
4020   else
4021     return false;
4022
4023   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4024
4025   *bound = aff_combination_to_tree (&bnd);
4026   /* It is unlikely that computing the number of iterations using division
4027      would be more profitable than keeping the original induction variable.  */
4028   if (expression_expensive_p (*bound))
4029     return false;
4030   return true;
4031 }
4032
4033 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4034
4035 static bool
4036 determine_use_iv_cost_condition (struct ivopts_data *data,
4037                                  struct iv_use *use, struct iv_cand *cand)
4038 {
4039   tree bound = NULL_TREE;
4040   struct iv *cmp_iv;
4041   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4042   comp_cost elim_cost, express_cost, cost;
4043   bool ok;
4044
4045   /* Only consider real candidates.  */
4046   if (!cand->iv)
4047     {
4048       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4049       return false;
4050     }
4051
4052   /* Try iv elimination.  */
4053   if (may_eliminate_iv (data, use, cand, &bound))
4054     {
4055       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4056       /* The bound is a loop invariant, so it will be only computed
4057          once.  */
4058       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4059     }
4060   else
4061     elim_cost = infinite_cost;
4062
4063   /* Try expressing the original giv.  If it is compared with an invariant,
4064      note that we cannot get rid of it.  */
4065   ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
4066   gcc_assert (ok);
4067
4068   express_cost = get_computation_cost (data, use, cand, false,
4069                                        &depends_on_express, NULL);
4070   fd_ivopts_data = data;
4071   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4072
4073   /* Choose the better approach, preferring the eliminated IV. */
4074   if (compare_costs (elim_cost, express_cost) <= 0)
4075     {
4076       cost = elim_cost;
4077       depends_on = depends_on_elim;
4078       depends_on_elim = NULL;
4079     }
4080   else
4081     {
4082       cost = express_cost;
4083       depends_on = depends_on_express;
4084       depends_on_express = NULL;
4085       bound = NULL_TREE;
4086     }
4087
4088   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4089
4090   if (depends_on_elim)
4091     BITMAP_FREE (depends_on_elim);
4092   if (depends_on_express)
4093     BITMAP_FREE (depends_on_express);
4094
4095   return !infinite_cost_p (cost);
4096 }
4097
4098 /* Determines cost of basing replacement of USE on CAND.  Returns false
4099    if USE cannot be based on CAND.  */
4100
4101 static bool
4102 determine_use_iv_cost (struct ivopts_data *data,
4103                        struct iv_use *use, struct iv_cand *cand)
4104 {
4105   switch (use->type)
4106     {
4107     case USE_NONLINEAR_EXPR:
4108       return determine_use_iv_cost_generic (data, use, cand);
4109
4110     case USE_ADDRESS:
4111       return determine_use_iv_cost_address (data, use, cand);
4112
4113     case USE_COMPARE:
4114       return determine_use_iv_cost_condition (data, use, cand);
4115
4116     default:
4117       gcc_unreachable ();
4118     }
4119 }
4120
4121 /* Return true if get_computation_cost indicates that autoincrement is
4122    a possibility for the pair of USE and CAND, false otherwise.  */
4123
4124 static bool
4125 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4126                            struct iv_cand *cand)
4127 {
4128   bitmap depends_on;
4129   bool can_autoinc;
4130   comp_cost cost;
4131
4132   if (use->type != USE_ADDRESS)
4133     return false;
4134
4135   cost = get_computation_cost (data, use, cand, true, &depends_on,
4136                                &can_autoinc);
4137
4138   BITMAP_FREE (depends_on);
4139
4140   return !infinite_cost_p (cost) && can_autoinc;
4141 }
4142
4143 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4144    use that allows autoincrement, and set their AINC_USE if possible.  */
4145
4146 static void
4147 set_autoinc_for_original_candidates (struct ivopts_data *data)
4148 {
4149   unsigned i, j;
4150
4151   for (i = 0; i < n_iv_cands (data); i++)
4152     {
4153       struct iv_cand *cand = iv_cand (data, i);
4154       struct iv_use *closest = NULL;
4155       if (cand->pos != IP_ORIGINAL)
4156         continue;
4157       for (j = 0; j < n_iv_uses (data); j++)
4158         {
4159           struct iv_use *use = iv_use (data, j);
4160           unsigned uid = gimple_uid (use->stmt);
4161           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4162               || uid > gimple_uid (cand->incremented_at))
4163             continue;
4164           if (closest == NULL || uid > gimple_uid (closest->stmt))
4165             closest = use;
4166         }
4167       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4168         continue;
4169       cand->ainc_use = closest;
4170     }
4171 }
4172
4173 /* Finds the candidates for the induction variables.  */
4174
4175 static void
4176 find_iv_candidates (struct ivopts_data *data)
4177 {
4178   /* Add commonly used ivs.  */
4179   add_standard_iv_candidates (data);
4180
4181   /* Add old induction variables.  */
4182   add_old_ivs_candidates (data);
4183
4184   /* Add induction variables derived from uses.  */
4185   add_derived_ivs_candidates (data);
4186
4187   set_autoinc_for_original_candidates (data);
4188
4189   /* Record the important candidates.  */
4190   record_important_candidates (data);
4191 }
4192
4193 /* Determines costs of basing the use of the iv on an iv candidate.  */
4194
4195 static void
4196 determine_use_iv_costs (struct ivopts_data *data)
4197 {
4198   unsigned i, j;
4199   struct iv_use *use;
4200   struct iv_cand *cand;
4201   bitmap to_clear = BITMAP_ALLOC (NULL);
4202
4203   alloc_use_cost_map (data);
4204
4205   for (i = 0; i < n_iv_uses (data); i++)
4206     {
4207       use = iv_use (data, i);
4208
4209       if (data->consider_all_candidates)
4210         {
4211           for (j = 0; j < n_iv_cands (data); j++)
4212             {
4213               cand = iv_cand (data, j);
4214               determine_use_iv_cost (data, use, cand);
4215             }
4216         }
4217       else
4218         {
4219           bitmap_iterator bi;
4220
4221           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4222             {
4223               cand = iv_cand (data, j);
4224               if (!determine_use_iv_cost (data, use, cand))
4225                 bitmap_set_bit (to_clear, j);
4226             }
4227
4228           /* Remove the candidates for that the cost is infinite from
4229              the list of related candidates.  */
4230           bitmap_and_compl_into (use->related_cands, to_clear);
4231           bitmap_clear (to_clear);
4232         }
4233     }
4234
4235   BITMAP_FREE (to_clear);
4236
4237   if (dump_file && (dump_flags & TDF_DETAILS))
4238     {
4239       fprintf (dump_file, "Use-candidate costs:\n");
4240
4241       for (i = 0; i < n_iv_uses (data); i++)
4242         {
4243           use = iv_use (data, i);
4244
4245           fprintf (dump_file, "Use %d:\n", i);
4246           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4247           for (j = 0; j < use->n_map_members; j++)
4248             {
4249               if (!use->cost_map[j].cand
4250                   || infinite_cost_p (use->cost_map[j].cost))
4251                 continue;
4252
4253               fprintf (dump_file, "  %d\t%d\t%d\t",
4254                        use->cost_map[j].cand->id,
4255                        use->cost_map[j].cost.cost,
4256                        use->cost_map[j].cost.complexity);
4257               if (use->cost_map[j].depends_on)
4258                 bitmap_print (dump_file,
4259                               use->cost_map[j].depends_on, "","");
4260               fprintf (dump_file, "\n");
4261             }
4262
4263           fprintf (dump_file, "\n");
4264         }
4265       fprintf (dump_file, "\n");
4266     }
4267 }
4268
4269 /* Determines cost of the candidate CAND.  */
4270
4271 static void
4272 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4273 {
4274   comp_cost cost_base;
4275   unsigned cost, cost_step;
4276   tree base;
4277
4278   if (!cand->iv)
4279     {
4280       cand->cost = 0;
4281       return;
4282     }
4283
4284   /* There are two costs associated with the candidate -- its increment
4285      and its initialization.  The second is almost negligible for any loop
4286      that rolls enough, so we take it just very little into account.  */
4287
4288   base = cand->iv->base;
4289   cost_base = force_var_cost (data, base, NULL);
4290   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4291
4292   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4293
4294   /* Prefer the original ivs unless we may gain something by replacing it.
4295      The reason is to make debugging simpler; so this is not relevant for
4296      artificial ivs created by other optimization passes.  */
4297   if (cand->pos != IP_ORIGINAL
4298       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4299     cost++;
4300
4301   /* Prefer not to insert statements into latch unless there are some
4302      already (so that we do not create unnecessary jumps).  */
4303   if (cand->pos == IP_END
4304       && empty_block_p (ip_end_pos (data->current_loop)))
4305     cost++;
4306
4307   cand->cost = cost;
4308   cand->cost_step = cost_step;
4309 }
4310
4311 /* Determines costs of computation of the candidates.  */
4312
4313 static void
4314 determine_iv_costs (struct ivopts_data *data)
4315 {
4316   unsigned i;
4317
4318   if (dump_file && (dump_flags & TDF_DETAILS))
4319     {
4320       fprintf (dump_file, "Candidate costs:\n");
4321       fprintf (dump_file, "  cand\tcost\n");
4322     }
4323
4324   for (i = 0; i < n_iv_cands (data); i++)
4325     {
4326       struct iv_cand *cand = iv_cand (data, i);
4327
4328       determine_iv_cost (data, cand);
4329
4330       if (dump_file && (dump_flags & TDF_DETAILS))
4331         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4332     }
4333
4334   if (dump_file && (dump_flags & TDF_DETAILS))
4335     fprintf (dump_file, "\n");
4336 }
4337
4338 /* Calculates cost for having SIZE induction variables.  */
4339
4340 static unsigned
4341 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4342 {
4343   /* We add size to the cost, so that we prefer eliminating ivs
4344      if possible.  */
4345   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4346 }
4347
4348 /* For each size of the induction variable set determine the penalty.  */
4349
4350 static void
4351 determine_set_costs (struct ivopts_data *data)
4352 {
4353   unsigned j, n;
4354   gimple phi;
4355   gimple_stmt_iterator psi;
4356   tree op;
4357   struct loop *loop = data->current_loop;
4358   bitmap_iterator bi;
4359
4360   /* We use the following model (definitely improvable, especially the
4361      cost function -- TODO):
4362
4363      We estimate the number of registers available (using MD data), name it A.
4364
4365      We estimate the number of registers used by the loop, name it U.  This
4366      number is obtained as the number of loop phi nodes (not counting virtual
4367      registers and bivs) + the number of variables from outside of the loop.
4368
4369      We set a reserve R (free regs that are used for temporary computations,
4370      etc.).  For now the reserve is a constant 3.
4371
4372      Let I be the number of induction variables.
4373
4374      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4375         make a lot of ivs without a reason).
4376      -- if A - R < U + I <= A, the cost is I * PRES_COST
4377      -- if U + I > A, the cost is I * PRES_COST and
4378         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4379
4380   if (dump_file && (dump_flags & TDF_DETAILS))
4381     {
4382       fprintf (dump_file, "Global costs:\n");
4383       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4384       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4385       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4386     }
4387
4388   n = 0;
4389   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4390     {
4391       phi = gsi_stmt (psi);
4392       op = PHI_RESULT (phi);
4393
4394       if (!is_gimple_reg (op))
4395         continue;
4396
4397       if (get_iv (data, op))
4398         continue;
4399
4400       n++;
4401     }
4402
4403   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4404     {
4405       struct version_info *info = ver_info (data, j);
4406
4407       if (info->inv_id && info->has_nonlin_use)
4408         n++;
4409     }
4410
4411   data->regs_used = n;
4412   if (dump_file && (dump_flags & TDF_DETAILS))
4413     fprintf (dump_file, "  regs_used %d\n", n);
4414
4415   if (dump_file && (dump_flags & TDF_DETAILS))
4416     {
4417       fprintf (dump_file, "  cost for size:\n");
4418       fprintf (dump_file, "  ivs\tcost\n");
4419       for (j = 0; j <= 2 * target_avail_regs; j++)
4420         fprintf (dump_file, "  %d\t%d\n", j,
4421                  ivopts_global_cost_for_size (data, j));
4422       fprintf (dump_file, "\n");
4423     }
4424 }
4425
4426 /* Returns true if A is a cheaper cost pair than B.  */
4427
4428 static bool
4429 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4430 {
4431   int cmp;
4432
4433   if (!a)
4434     return false;
4435
4436   if (!b)
4437     return true;
4438
4439   cmp = compare_costs (a->cost, b->cost);
4440   if (cmp < 0)
4441     return true;
4442
4443   if (cmp > 0)
4444     return false;
4445
4446   /* In case the costs are the same, prefer the cheaper candidate.  */
4447   if (a->cand->cost < b->cand->cost)
4448     return true;
4449
4450   return false;
4451 }
4452
4453 /* Computes the cost field of IVS structure.  */
4454
4455 static void
4456 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4457 {
4458   comp_cost cost = ivs->cand_use_cost;
4459   cost.cost += ivs->cand_cost;
4460   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4461
4462   ivs->cost = cost;
4463 }
4464
4465 /* Remove invariants in set INVS to set IVS.  */
4466
4467 static void
4468 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4469 {
4470   bitmap_iterator bi;
4471   unsigned iid;
4472
4473   if (!invs)
4474     return;
4475
4476   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4477     {
4478       ivs->n_invariant_uses[iid]--;
4479       if (ivs->n_invariant_uses[iid] == 0)
4480         ivs->n_regs--;
4481     }
4482 }
4483
4484 /* Set USE not to be expressed by any candidate in IVS.  */
4485
4486 static void
4487 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4488                  struct iv_use *use)
4489 {
4490   unsigned uid = use->id, cid;
4491   struct cost_pair *cp;
4492
4493   cp = ivs->cand_for_use[uid];
4494   if (!cp)
4495     return;
4496   cid = cp->cand->id;
4497
4498   ivs->bad_uses++;
4499   ivs->cand_for_use[uid] = NULL;
4500   ivs->n_cand_uses[cid]--;
4501
4502   if (ivs->n_cand_uses[cid] == 0)
4503     {
4504       bitmap_clear_bit (ivs->cands, cid);
4505       /* Do not count the pseudocandidates.  */
4506       if (cp->cand->iv)
4507         ivs->n_regs--;
4508       ivs->n_cands--;
4509       ivs->cand_cost -= cp->cand->cost;
4510
4511       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4512     }
4513
4514   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4515
4516   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4517   iv_ca_recount_cost (data, ivs);
4518 }
4519
4520 /* Add invariants in set INVS to set IVS.  */
4521
4522 static void
4523 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4524 {
4525   bitmap_iterator bi;
4526   unsigned iid;
4527
4528   if (!invs)
4529     return;
4530
4531   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4532     {
4533       ivs->n_invariant_uses[iid]++;
4534       if (ivs->n_invariant_uses[iid] == 1)
4535         ivs->n_regs++;
4536     }
4537 }
4538
4539 /* Set cost pair for USE in set IVS to CP.  */
4540
4541 static void
4542 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4543               struct iv_use *use, struct cost_pair *cp)
4544 {
4545   unsigned uid = use->id, cid;
4546
4547   if (ivs->cand_for_use[uid] == cp)
4548     return;
4549
4550   if (ivs->cand_for_use[uid])
4551     iv_ca_set_no_cp (data, ivs, use);
4552
4553   if (cp)
4554     {
4555       cid = cp->cand->id;
4556
4557       ivs->bad_uses--;
4558       ivs->cand_for_use[uid] = cp;
4559       ivs->n_cand_uses[cid]++;
4560       if (ivs->n_cand_uses[cid] == 1)
4561         {
4562           bitmap_set_bit (ivs->cands, cid);
4563           /* Do not count the pseudocandidates.  */
4564           if (cp->cand->iv)
4565             ivs->n_regs++;
4566           ivs->n_cands++;
4567           ivs->cand_cost += cp->cand->cost;
4568
4569           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4570         }
4571
4572       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4573       iv_ca_set_add_invariants (ivs, cp->depends_on);
4574       iv_ca_recount_cost (data, ivs);
4575     }
4576 }
4577
4578 /* Extend set IVS by expressing USE by some of the candidates in it
4579    if possible.  */
4580
4581 static void
4582 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4583                struct iv_use *use)
4584 {
4585   struct cost_pair *best_cp = NULL, *cp;
4586   bitmap_iterator bi;
4587   unsigned i;
4588
4589   gcc_assert (ivs->upto >= use->id);
4590
4591   if (ivs->upto == use->id)
4592     {
4593       ivs->upto++;
4594       ivs->bad_uses++;
4595     }
4596
4597   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4598     {
4599       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4600
4601       if (cheaper_cost_pair (cp, best_cp))
4602         best_cp = cp;
4603     }
4604
4605   iv_ca_set_cp (data, ivs, use, best_cp);
4606 }
4607
4608 /* Get cost for assignment IVS.  */
4609
4610 static comp_cost
4611 iv_ca_cost (struct iv_ca *ivs)
4612 {
4613   /* This was a conditional expression but it triggered a bug in
4614      Sun C 5.5.  */
4615   if (ivs->bad_uses)
4616     return infinite_cost;
4617   else
4618     return ivs->cost;
4619 }
4620
4621 /* Returns true if all dependences of CP are among invariants in IVS.  */
4622
4623 static bool
4624 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4625 {
4626   unsigned i;
4627   bitmap_iterator bi;
4628
4629   if (!cp->depends_on)
4630     return true;
4631
4632   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4633     {
4634       if (ivs->n_invariant_uses[i] == 0)
4635         return false;
4636     }
4637
4638   return true;
4639 }
4640
4641 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4642    it before NEXT_CHANGE.  */
4643
4644 static struct iv_ca_delta *
4645 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4646                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4647 {
4648   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4649
4650   change->use = use;
4651   change->old_cp = old_cp;
4652   change->new_cp = new_cp;
4653   change->next_change = next_change;
4654
4655   return change;
4656 }
4657
4658 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4659    are rewritten.  */
4660
4661 static struct iv_ca_delta *
4662 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4663 {
4664   struct iv_ca_delta *last;
4665
4666   if (!l2)
4667     return l1;
4668
4669   if (!l1)
4670     return l2;
4671
4672   for (last = l1; last->next_change; last = last->next_change)
4673     continue;
4674   last->next_change = l2;
4675
4676   return l1;
4677 }
4678
4679 /* Returns candidate by that USE is expressed in IVS.  */
4680
4681 static struct cost_pair *
4682 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4683 {
4684   return ivs->cand_for_use[use->id];
4685 }
4686
4687 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4688
4689 static struct iv_ca_delta *
4690 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4691 {
4692   struct iv_ca_delta *act, *next, *prev = NULL;
4693   struct cost_pair *tmp;
4694
4695   for (act = delta; act; act = next)
4696     {
4697       next = act->next_change;
4698       act->next_change = prev;
4699       prev = act;
4700
4701       tmp = act->old_cp;
4702       act->old_cp = act->new_cp;
4703       act->new_cp = tmp;
4704     }
4705
4706   return prev;
4707 }
4708
4709 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4710    reverted instead.  */
4711
4712 static void
4713 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4714                     struct iv_ca_delta *delta, bool forward)
4715 {
4716   struct cost_pair *from, *to;
4717   struct iv_ca_delta *act;
4718
4719   if (!forward)
4720     delta = iv_ca_delta_reverse (delta);
4721
4722   for (act = delta; act; act = act->next_change)
4723     {
4724       from = act->old_cp;
4725       to = act->new_cp;
4726       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4727       iv_ca_set_cp (data, ivs, act->use, to);
4728     }
4729
4730   if (!forward)
4731     iv_ca_delta_reverse (delta);
4732 }
4733
4734 /* Returns true if CAND is used in IVS.  */
4735
4736 static bool
4737 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4738 {
4739   return ivs->n_cand_uses[cand->id] > 0;
4740 }
4741
4742 /* Returns number of induction variable candidates in the set IVS.  */
4743
4744 static unsigned
4745 iv_ca_n_cands (struct iv_ca *ivs)
4746 {
4747   return ivs->n_cands;
4748 }
4749
4750 /* Free the list of changes DELTA.  */
4751
4752 static void
4753 iv_ca_delta_free (struct iv_ca_delta **delta)
4754 {
4755   struct iv_ca_delta *act, *next;
4756
4757   for (act = *delta; act; act = next)
4758     {
4759       next = act->next_change;
4760       free (act);
4761     }
4762
4763   *delta = NULL;
4764 }
4765
4766 /* Allocates new iv candidates assignment.  */
4767
4768 static struct iv_ca *
4769 iv_ca_new (struct ivopts_data *data)
4770 {
4771   struct iv_ca *nw = XNEW (struct iv_ca);
4772
4773   nw->upto = 0;
4774   nw->bad_uses = 0;
4775   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4776   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4777   nw->cands = BITMAP_ALLOC (NULL);
4778   nw->n_cands = 0;
4779   nw->n_regs = 0;
4780   nw->cand_use_cost = zero_cost;
4781   nw->cand_cost = 0;
4782   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4783   nw->cost = zero_cost;
4784
4785   return nw;
4786 }
4787
4788 /* Free memory occupied by the set IVS.  */
4789
4790 static void
4791 iv_ca_free (struct iv_ca **ivs)
4792 {
4793   free ((*ivs)->cand_for_use);
4794   free ((*ivs)->n_cand_uses);
4795   BITMAP_FREE ((*ivs)->cands);
4796   free ((*ivs)->n_invariant_uses);
4797   free (*ivs);
4798   *ivs = NULL;
4799 }
4800
4801 /* Dumps IVS to FILE.  */
4802
4803 static void
4804 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4805 {
4806   const char *pref = "  invariants ";
4807   unsigned i;
4808   comp_cost cost = iv_ca_cost (ivs);
4809
4810   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4811   bitmap_print (file, ivs->cands, "  candidates ","\n");
4812
4813   for (i = 1; i <= data->max_inv_id; i++)
4814     if (ivs->n_invariant_uses[i])
4815       {
4816         fprintf (file, "%s%d", pref, i);
4817         pref = ", ";
4818       }
4819   fprintf (file, "\n");
4820 }
4821
4822 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4823    new set, and store differences in DELTA.  Number of induction variables
4824    in the new set is stored to N_IVS.  */
4825
4826 static comp_cost
4827 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4828               struct iv_cand *cand, struct iv_ca_delta **delta,
4829               unsigned *n_ivs)
4830 {
4831   unsigned i;
4832   comp_cost cost;
4833   struct iv_use *use;
4834   struct cost_pair *old_cp, *new_cp;
4835
4836   *delta = NULL;
4837   for (i = 0; i < ivs->upto; i++)
4838     {
4839       use = iv_use (data, i);
4840       old_cp = iv_ca_cand_for_use (ivs, use);
4841
4842       if (old_cp
4843           && old_cp->cand == cand)
4844         continue;
4845
4846       new_cp = get_use_iv_cost (data, use, cand);
4847       if (!new_cp)
4848         continue;
4849
4850       if (!iv_ca_has_deps (ivs, new_cp))
4851         continue;
4852
4853       if (!cheaper_cost_pair (new_cp, old_cp))
4854         continue;
4855
4856       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4857     }
4858
4859   iv_ca_delta_commit (data, ivs, *delta, true);
4860   cost = iv_ca_cost (ivs);
4861   if (n_ivs)
4862     *n_ivs = iv_ca_n_cands (ivs);
4863   iv_ca_delta_commit (data, ivs, *delta, false);
4864
4865   return cost;
4866 }
4867
4868 /* Try narrowing set IVS by removing CAND.  Return the cost of
4869    the new set and store the differences in DELTA.  */
4870
4871 static comp_cost
4872 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4873               struct iv_cand *cand, struct iv_ca_delta **delta)
4874 {
4875   unsigned i, ci;
4876   struct iv_use *use;
4877   struct cost_pair *old_cp, *new_cp, *cp;
4878   bitmap_iterator bi;
4879   struct iv_cand *cnd;
4880   comp_cost cost;
4881
4882   *delta = NULL;
4883   for (i = 0; i < n_iv_uses (data); i++)
4884     {
4885       use = iv_use (data, i);
4886
4887       old_cp = iv_ca_cand_for_use (ivs, use);
4888       if (old_cp->cand != cand)
4889         continue;
4890
4891       new_cp = NULL;
4892
4893       if (data->consider_all_candidates)
4894         {
4895           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4896             {
4897               if (ci == cand->id)
4898                 continue;
4899
4900               cnd = iv_cand (data, ci);
4901
4902               cp = get_use_iv_cost (data, use, cnd);
4903               if (!cp)
4904                 continue;
4905               if (!iv_ca_has_deps (ivs, cp))
4906                 continue;
4907
4908               if (!cheaper_cost_pair (cp, new_cp))
4909                 continue;
4910
4911               new_cp = cp;
4912             }
4913         }
4914       else
4915         {
4916           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4917             {
4918               if (ci == cand->id)
4919                 continue;
4920
4921               cnd = iv_cand (data, ci);
4922
4923               cp = get_use_iv_cost (data, use, cnd);
4924               if (!cp)
4925                 continue;
4926               if (!iv_ca_has_deps (ivs, cp))
4927                 continue;
4928
4929               if (!cheaper_cost_pair (cp, new_cp))
4930                 continue;
4931
4932               new_cp = cp;
4933             }
4934         }
4935
4936       if (!new_cp)
4937         {
4938           iv_ca_delta_free (delta);
4939           return infinite_cost;
4940         }
4941
4942       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4943     }
4944
4945   iv_ca_delta_commit (data, ivs, *delta, true);
4946   cost = iv_ca_cost (ivs);
4947   iv_ca_delta_commit (data, ivs, *delta, false);
4948
4949   return cost;
4950 }
4951
4952 /* Try optimizing the set of candidates IVS by removing candidates different
4953    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4954    differences in DELTA.  */
4955
4956 static comp_cost
4957 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4958              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4959 {
4960   bitmap_iterator bi;
4961   struct iv_ca_delta *act_delta, *best_delta;
4962   unsigned i;
4963   comp_cost best_cost, acost;
4964   struct iv_cand *cand;
4965
4966   best_delta = NULL;
4967   best_cost = iv_ca_cost (ivs);
4968
4969   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4970     {
4971       cand = iv_cand (data, i);
4972
4973       if (cand == except_cand)
4974         continue;
4975
4976       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4977
4978       if (compare_costs (acost, best_cost) < 0)
4979         {
4980           best_cost = acost;
4981           iv_ca_delta_free (&best_delta);
4982           best_delta = act_delta;
4983         }
4984       else
4985         iv_ca_delta_free (&act_delta);
4986     }
4987
4988   if (!best_delta)
4989     {
4990       *delta = NULL;
4991       return best_cost;
4992     }
4993
4994   /* Recurse to possibly remove other unnecessary ivs.  */
4995   iv_ca_delta_commit (data, ivs, best_delta, true);
4996   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4997   iv_ca_delta_commit (data, ivs, best_delta, false);
4998   *delta = iv_ca_delta_join (best_delta, *delta);
4999   return best_cost;
5000 }
5001
5002 /* Tries to extend the sets IVS in the best possible way in order
5003    to express the USE.  */
5004
5005 static bool
5006 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5007                   struct iv_use *use)
5008 {
5009   comp_cost best_cost, act_cost;
5010   unsigned i;
5011   bitmap_iterator bi;
5012   struct iv_cand *cand;
5013   struct iv_ca_delta *best_delta = NULL, *act_delta;
5014   struct cost_pair *cp;
5015
5016   iv_ca_add_use (data, ivs, use);
5017   best_cost = iv_ca_cost (ivs);
5018
5019   cp = iv_ca_cand_for_use (ivs, use);
5020   if (cp)
5021     {
5022       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5023       iv_ca_set_no_cp (data, ivs, use);
5024     }
5025
5026   /* First try important candidates not based on any memory object.  Only if
5027      this fails, try the specific ones.  Rationale -- in loops with many
5028      variables the best choice often is to use just one generic biv.  If we
5029      added here many ivs specific to the uses, the optimization algorithm later
5030      would be likely to get stuck in a local minimum, thus causing us to create
5031      too many ivs.  The approach from few ivs to more seems more likely to be
5032      successful -- starting from few ivs, replacing an expensive use by a
5033      specific iv should always be a win.  */
5034   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5035     {
5036       cand = iv_cand (data, i);
5037
5038       if (cand->iv->base_object != NULL_TREE)
5039         continue;
5040
5041       if (iv_ca_cand_used_p (ivs, cand))
5042         continue;
5043
5044       cp = get_use_iv_cost (data, use, cand);
5045       if (!cp)
5046         continue;
5047
5048       iv_ca_set_cp (data, ivs, use, cp);
5049       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5050       iv_ca_set_no_cp (data, ivs, use);
5051       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5052
5053       if (compare_costs (act_cost, best_cost) < 0)
5054         {
5055           best_cost = act_cost;
5056
5057           iv_ca_delta_free (&best_delta);
5058           best_delta = act_delta;
5059         }
5060       else
5061         iv_ca_delta_free (&act_delta);
5062     }
5063
5064   if (infinite_cost_p (best_cost))
5065     {
5066       for (i = 0; i < use->n_map_members; i++)
5067         {
5068           cp = use->cost_map + i;
5069           cand = cp->cand;
5070           if (!cand)
5071             continue;
5072
5073           /* Already tried this.  */
5074           if (cand->important && cand->iv->base_object == NULL_TREE)
5075             continue;
5076
5077           if (iv_ca_cand_used_p (ivs, cand))
5078             continue;
5079
5080           act_delta = NULL;
5081           iv_ca_set_cp (data, ivs, use, cp);
5082           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5083           iv_ca_set_no_cp (data, ivs, use);
5084           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5085                                        cp, act_delta);
5086
5087           if (compare_costs (act_cost, best_cost) < 0)
5088             {
5089               best_cost = act_cost;
5090
5091               if (best_delta)
5092                 iv_ca_delta_free (&best_delta);
5093               best_delta = act_delta;
5094             }
5095           else
5096             iv_ca_delta_free (&act_delta);
5097         }
5098     }
5099
5100   iv_ca_delta_commit (data, ivs, best_delta, true);
5101   iv_ca_delta_free (&best_delta);
5102
5103   return !infinite_cost_p (best_cost);
5104 }
5105
5106 /* Finds an initial assignment of candidates to uses.  */
5107
5108 static struct iv_ca *
5109 get_initial_solution (struct ivopts_data *data)
5110 {
5111   struct iv_ca *ivs = iv_ca_new (data);
5112   unsigned i;
5113
5114   for (i = 0; i < n_iv_uses (data); i++)
5115     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5116       {
5117         iv_ca_free (&ivs);
5118         return NULL;
5119       }
5120
5121   return ivs;
5122 }
5123
5124 /* Tries to improve set of induction variables IVS.  */
5125
5126 static bool
5127 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5128 {
5129   unsigned i, n_ivs;
5130   comp_cost acost, best_cost = iv_ca_cost (ivs);
5131   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5132   struct iv_cand *cand;
5133
5134   /* Try extending the set of induction variables by one.  */
5135   for (i = 0; i < n_iv_cands (data); i++)
5136     {
5137       cand = iv_cand (data, i);
5138
5139       if (iv_ca_cand_used_p (ivs, cand))
5140         continue;
5141
5142       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5143       if (!act_delta)
5144         continue;
5145
5146       /* If we successfully added the candidate and the set is small enough,
5147          try optimizing it by removing other candidates.  */
5148       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5149         {
5150           iv_ca_delta_commit (data, ivs, act_delta, true);
5151           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5152           iv_ca_delta_commit (data, ivs, act_delta, false);
5153           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5154         }
5155
5156       if (compare_costs (acost, best_cost) < 0)
5157         {
5158           best_cost = acost;
5159           iv_ca_delta_free (&best_delta);
5160           best_delta = act_delta;
5161         }
5162       else
5163         iv_ca_delta_free (&act_delta);
5164     }
5165
5166   if (!best_delta)
5167     {
5168       /* Try removing the candidates from the set instead.  */
5169       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5170
5171       /* Nothing more we can do.  */
5172       if (!best_delta)
5173         return false;
5174     }
5175
5176   iv_ca_delta_commit (data, ivs, best_delta, true);
5177   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5178   iv_ca_delta_free (&best_delta);
5179   return true;
5180 }
5181
5182 /* Attempts to find the optimal set of induction variables.  We do simple
5183    greedy heuristic -- we try to replace at most one candidate in the selected
5184    solution and remove the unused ivs while this improves the cost.  */
5185
5186 static struct iv_ca *
5187 find_optimal_iv_set (struct ivopts_data *data)
5188 {
5189   unsigned i;
5190   struct iv_ca *set;
5191   struct iv_use *use;
5192
5193   /* Get the initial solution.  */
5194   set = get_initial_solution (data);
5195   if (!set)
5196     {
5197       if (dump_file && (dump_flags & TDF_DETAILS))
5198         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5199       return NULL;
5200     }
5201
5202   if (dump_file && (dump_flags & TDF_DETAILS))
5203     {
5204       fprintf (dump_file, "Initial set of candidates:\n");
5205       iv_ca_dump (data, dump_file, set);
5206     }
5207
5208   while (try_improve_iv_set (data, set))
5209     {
5210       if (dump_file && (dump_flags & TDF_DETAILS))
5211         {
5212           fprintf (dump_file, "Improved to:\n");
5213           iv_ca_dump (data, dump_file, set);
5214         }
5215     }
5216
5217   if (dump_file && (dump_flags & TDF_DETAILS))
5218     {
5219       comp_cost cost = iv_ca_cost (set);
5220       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5221     }
5222
5223   for (i = 0; i < n_iv_uses (data); i++)
5224     {
5225       use = iv_use (data, i);
5226       use->selected = iv_ca_cand_for_use (set, use)->cand;
5227     }
5228
5229   return set;
5230 }
5231
5232 /* Creates a new induction variable corresponding to CAND.  */
5233
5234 static void
5235 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5236 {
5237   gimple_stmt_iterator incr_pos;
5238   tree base;
5239   bool after = false;
5240
5241   if (!cand->iv)
5242     return;
5243
5244   switch (cand->pos)
5245     {
5246     case IP_NORMAL:
5247       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5248       break;
5249
5250     case IP_END:
5251       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5252       after = true;
5253       break;
5254
5255     case IP_AFTER_USE:
5256       after = true;
5257       /* fall through */
5258     case IP_BEFORE_USE:
5259       incr_pos = gsi_for_stmt (cand->incremented_at);
5260       break;
5261
5262     case IP_ORIGINAL:
5263       /* Mark that the iv is preserved.  */
5264       name_info (data, cand->var_before)->preserve_biv = true;
5265       name_info (data, cand->var_after)->preserve_biv = true;
5266
5267       /* Rewrite the increment so that it uses var_before directly.  */
5268       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5269
5270       return;
5271     }
5272
5273   gimple_add_tmp_var (cand->var_before);
5274   add_referenced_var (cand->var_before);
5275
5276   base = unshare_expr (cand->iv->base);
5277
5278   create_iv (base, unshare_expr (cand->iv->step),
5279              cand->var_before, data->current_loop,
5280              &incr_pos, after, &cand->var_before, &cand->var_after);
5281 }
5282
5283 /* Creates new induction variables described in SET.  */
5284
5285 static void
5286 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5287 {
5288   unsigned i;
5289   struct iv_cand *cand;
5290   bitmap_iterator bi;
5291
5292   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5293     {
5294       cand = iv_cand (data, i);
5295       create_new_iv (data, cand);
5296     }
5297 }
5298
5299 /* Returns the phi-node in BB with result RESULT.  */
5300
5301 static gimple
5302 get_phi_with_result (basic_block bb, tree result)
5303 {
5304   gimple_stmt_iterator i = gsi_start_phis (bb);
5305
5306   for (; !gsi_end_p (i); gsi_next (&i))
5307     if (gimple_phi_result (gsi_stmt (i)) == result)
5308       return gsi_stmt (i);
5309
5310   gcc_unreachable ();
5311   return NULL;
5312 }
5313
5314
5315 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5316    is true, remove also the ssa name defined by the statement.  */
5317
5318 static void
5319 remove_statement (gimple stmt, bool including_defined_name)
5320 {
5321   if (gimple_code (stmt) == GIMPLE_PHI)
5322     {
5323       gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5324                                          gimple_phi_result (stmt));
5325       gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5326       remove_phi_node (&bsi, including_defined_name);
5327     }
5328   else
5329     {
5330       gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5331       gsi_remove (&bsi, true);
5332       release_defs (stmt);
5333     }
5334 }
5335
5336 /* Rewrites USE (definition of iv used in a nonlinear expression)
5337    using candidate CAND.  */
5338
5339 static void
5340 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5341                             struct iv_use *use, struct iv_cand *cand)
5342 {
5343   tree comp;
5344   tree op, tgt;
5345   gimple ass;
5346   gimple_stmt_iterator bsi;
5347
5348   /* An important special case -- if we are asked to express value of
5349      the original iv by itself, just exit; there is no need to
5350      introduce a new computation (that might also need casting the
5351      variable to unsigned and back).  */
5352   if (cand->pos == IP_ORIGINAL
5353       && cand->incremented_at == use->stmt)
5354     {
5355       tree step, ctype, utype;
5356       enum tree_code incr_code = PLUS_EXPR, old_code;
5357
5358       gcc_assert (is_gimple_assign (use->stmt));
5359       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5360
5361       step = cand->iv->step;
5362       ctype = TREE_TYPE (step);
5363       utype = TREE_TYPE (cand->var_after);
5364       if (TREE_CODE (step) == NEGATE_EXPR)
5365         {
5366           incr_code = MINUS_EXPR;
5367           step = TREE_OPERAND (step, 0);
5368         }
5369
5370       /* Check whether we may leave the computation unchanged.
5371          This is the case only if it does not rely on other
5372          computations in the loop -- otherwise, the computation
5373          we rely upon may be removed in remove_unused_ivs,
5374          thus leading to ICE.  */
5375       old_code = gimple_assign_rhs_code (use->stmt);
5376       if (old_code == PLUS_EXPR
5377           || old_code == MINUS_EXPR
5378           || old_code == POINTER_PLUS_EXPR)
5379         {
5380           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5381             op = gimple_assign_rhs2 (use->stmt);
5382           else if (old_code != MINUS_EXPR
5383                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5384             op = gimple_assign_rhs1 (use->stmt);
5385           else
5386             op = NULL_TREE;
5387         }
5388       else
5389         op = NULL_TREE;
5390
5391       if (op
5392           && (TREE_CODE (op) == INTEGER_CST
5393               || operand_equal_p (op, step, 0)))
5394         return;
5395
5396       /* Otherwise, add the necessary computations to express
5397          the iv.  */
5398       op = fold_convert (ctype, cand->var_before);
5399       comp = fold_convert (utype,
5400                            build2 (incr_code, ctype, op,
5401                                    unshare_expr (step)));
5402     }
5403   else
5404     {
5405       comp = get_computation (data->current_loop, use, cand);
5406       gcc_assert (comp != NULL_TREE);
5407     }
5408
5409   switch (gimple_code (use->stmt))
5410     {
5411     case GIMPLE_PHI:
5412       tgt = PHI_RESULT (use->stmt);
5413
5414       /* If we should keep the biv, do not replace it.  */
5415       if (name_info (data, tgt)->preserve_biv)
5416         return;
5417
5418       bsi = gsi_after_labels (gimple_bb (use->stmt));
5419       break;
5420
5421     case GIMPLE_ASSIGN:
5422       tgt = gimple_assign_lhs (use->stmt);
5423       bsi = gsi_for_stmt (use->stmt);
5424       break;
5425
5426     default:
5427       gcc_unreachable ();
5428     }
5429
5430   op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5431                                  true, GSI_SAME_STMT);
5432
5433   if (gimple_code (use->stmt) == GIMPLE_PHI)
5434     {
5435       ass = gimple_build_assign (tgt, op);
5436       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5437       remove_statement (use->stmt, false);
5438     }
5439   else
5440     {
5441       gimple_assign_set_rhs_from_tree (&bsi, op);
5442       use->stmt = gsi_stmt (bsi);
5443     }
5444 }
5445
5446 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5447    for_each_index.  */
5448
5449 static bool
5450 idx_remove_ssa_names (tree base, tree *idx,
5451                       void *data ATTRIBUTE_UNUSED)
5452 {
5453   tree *op;
5454
5455   if (TREE_CODE (*idx) == SSA_NAME)
5456     *idx = SSA_NAME_VAR (*idx);
5457
5458   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5459     {
5460       op = &TREE_OPERAND (base, 2);
5461       if (*op
5462           && TREE_CODE (*op) == SSA_NAME)
5463         *op = SSA_NAME_VAR (*op);
5464       op = &TREE_OPERAND (base, 3);
5465       if (*op
5466           && TREE_CODE (*op) == SSA_NAME)
5467         *op = SSA_NAME_VAR (*op);
5468     }
5469
5470   return true;
5471 }
5472
5473 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5474
5475 static tree
5476 unshare_and_remove_ssa_names (tree ref)
5477 {
5478   ref = unshare_expr (ref);
5479   for_each_index (&ref, idx_remove_ssa_names, NULL);
5480
5481   return ref;
5482 }
5483
5484 /* Copies the reference information from OLD_REF to NEW_REF.  */
5485
5486 static void
5487 copy_ref_info (tree new_ref, tree old_ref)
5488 {
5489   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5490     copy_mem_ref_info (new_ref, old_ref);
5491   else
5492     TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5493 }
5494
5495 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5496
5497 static void
5498 rewrite_use_address (struct ivopts_data *data,
5499                      struct iv_use *use, struct iv_cand *cand)
5500 {
5501   aff_tree aff;
5502   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5503   tree ref;
5504   bool ok;
5505
5506   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5507   gcc_assert (ok);
5508   unshare_aff_combination (&aff);
5509
5510   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5511   copy_ref_info (ref, *use->op_p);
5512   *use->op_p = ref;
5513 }
5514
5515 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5516    candidate CAND.  */
5517
5518 static void
5519 rewrite_use_compare (struct ivopts_data *data,
5520                      struct iv_use *use, struct iv_cand *cand)
5521 {
5522   tree comp, *var_p, op, bound;
5523   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5524   enum tree_code compare;
5525   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5526   bool ok;
5527
5528   bound = cp->value;
5529   if (bound)
5530     {
5531       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5532       tree var_type = TREE_TYPE (var);
5533       gimple_seq stmts;
5534
5535       compare = iv_elimination_compare (data, use);
5536       bound = unshare_expr (fold_convert (var_type, bound));
5537       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5538       if (stmts)
5539         gsi_insert_seq_on_edge_immediate (
5540                 loop_preheader_edge (data->current_loop),
5541                 stmts);
5542
5543       gimple_cond_set_lhs (use->stmt, var);
5544       gimple_cond_set_code (use->stmt, compare);
5545       gimple_cond_set_rhs (use->stmt, op);
5546       return;
5547     }
5548
5549   /* The induction variable elimination failed; just express the original
5550      giv.  */
5551   comp = get_computation (data->current_loop, use, cand);
5552   gcc_assert (comp != NULL_TREE);
5553
5554   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5555   gcc_assert (ok);
5556
5557   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5558                                      true, GSI_SAME_STMT);
5559 }
5560
5561 /* Rewrites USE using candidate CAND.  */
5562
5563 static void
5564 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5565 {
5566   switch (use->type)
5567     {
5568       case USE_NONLINEAR_EXPR:
5569         rewrite_use_nonlinear_expr (data, use, cand);
5570         break;
5571
5572       case USE_ADDRESS:
5573         rewrite_use_address (data, use, cand);
5574         break;
5575
5576       case USE_COMPARE:
5577         rewrite_use_compare (data, use, cand);
5578         break;
5579
5580       default:
5581         gcc_unreachable ();
5582     }
5583
5584   update_stmt (use->stmt);
5585 }
5586
5587 /* Rewrite the uses using the selected induction variables.  */
5588
5589 static void
5590 rewrite_uses (struct ivopts_data *data)
5591 {
5592   unsigned i;
5593   struct iv_cand *cand;
5594   struct iv_use *use;
5595
5596   for (i = 0; i < n_iv_uses (data); i++)
5597     {
5598       use = iv_use (data, i);
5599       cand = use->selected;
5600       gcc_assert (cand);
5601
5602       rewrite_use (data, use, cand);
5603     }
5604 }
5605
5606 /* Removes the ivs that are not used after rewriting.  */
5607
5608 static void
5609 remove_unused_ivs (struct ivopts_data *data)
5610 {
5611   unsigned j;
5612   bitmap_iterator bi;
5613
5614   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5615     {
5616       struct version_info *info;
5617
5618       info = ver_info (data, j);
5619       if (info->iv
5620           && !integer_zerop (info->iv->step)
5621           && !info->inv_id
5622           && !info->iv->have_use_for
5623           && !info->preserve_biv)
5624         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5625     }
5626 }
5627
5628 /* Frees data allocated by the optimization of a single loop.  */
5629
5630 static void
5631 free_loop_data (struct ivopts_data *data)
5632 {
5633   unsigned i, j;
5634   bitmap_iterator bi;
5635   tree obj;
5636
5637   if (data->niters)
5638     {
5639       pointer_map_destroy (data->niters);
5640       data->niters = NULL;
5641     }
5642
5643   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5644     {
5645       struct version_info *info;
5646
5647       info = ver_info (data, i);
5648       if (info->iv)
5649         free (info->iv);
5650       info->iv = NULL;
5651       info->has_nonlin_use = false;
5652       info->preserve_biv = false;
5653       info->inv_id = 0;
5654     }
5655   bitmap_clear (data->relevant);
5656   bitmap_clear (data->important_candidates);
5657
5658   for (i = 0; i < n_iv_uses (data); i++)
5659     {
5660       struct iv_use *use = iv_use (data, i);
5661
5662       free (use->iv);
5663       BITMAP_FREE (use->related_cands);
5664       for (j = 0; j < use->n_map_members; j++)
5665         if (use->cost_map[j].depends_on)
5666           BITMAP_FREE (use->cost_map[j].depends_on);
5667       free (use->cost_map);
5668       free (use);
5669     }
5670   VEC_truncate (iv_use_p, data->iv_uses, 0);
5671
5672   for (i = 0; i < n_iv_cands (data); i++)
5673     {
5674       struct iv_cand *cand = iv_cand (data, i);
5675
5676       if (cand->iv)
5677         free (cand->iv);
5678       if (cand->depends_on)
5679         BITMAP_FREE (cand->depends_on);
5680       free (cand);
5681     }
5682   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5683
5684   if (data->version_info_size < num_ssa_names)
5685     {
5686       data->version_info_size = 2 * num_ssa_names;
5687       free (data->version_info);
5688       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5689     }
5690
5691   data->max_inv_id = 0;
5692
5693   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5694     SET_DECL_RTL (obj, NULL_RTX);
5695
5696   VEC_truncate (tree, decl_rtl_to_reset, 0);
5697 }
5698
5699 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5700    loop tree.  */
5701
5702 static void
5703 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5704 {
5705   free_loop_data (data);
5706   free (data->version_info);
5707   BITMAP_FREE (data->relevant);
5708   BITMAP_FREE (data->important_candidates);
5709
5710   VEC_free (tree, heap, decl_rtl_to_reset);
5711   VEC_free (iv_use_p, heap, data->iv_uses);
5712   VEC_free (iv_cand_p, heap, data->iv_candidates);
5713 }
5714
5715 /* Optimizes the LOOP.  Returns true if anything changed.  */
5716
5717 static bool
5718 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5719 {
5720   bool changed = false;
5721   struct iv_ca *iv_ca;
5722   edge exit;
5723   basic_block *body;
5724
5725   gcc_assert (!data->niters);
5726   data->current_loop = loop;
5727   data->speed = optimize_loop_for_speed_p (loop);
5728
5729   if (dump_file && (dump_flags & TDF_DETAILS))
5730     {
5731       fprintf (dump_file, "Processing loop %d\n", loop->num);
5732
5733       exit = single_dom_exit (loop);
5734       if (exit)
5735         {
5736           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5737                    exit->src->index, exit->dest->index);
5738           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5739           fprintf (dump_file, "\n");
5740         }
5741
5742       fprintf (dump_file, "\n");
5743     }
5744
5745   body = get_loop_body (loop);
5746   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5747   free (body);
5748
5749   /* For each ssa name determines whether it behaves as an induction variable
5750      in some loop.  */
5751   if (!find_induction_variables (data))
5752     goto finish;
5753
5754   /* Finds interesting uses (item 1).  */
5755   find_interesting_uses (data);
5756   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5757     goto finish;
5758
5759   /* Finds candidates for the induction variables (item 2).  */
5760   find_iv_candidates (data);
5761
5762   /* Calculates the costs (item 3, part 1).  */
5763   determine_iv_costs (data);
5764   determine_use_iv_costs (data);
5765   determine_set_costs (data);
5766
5767   /* Find the optimal set of induction variables (item 3, part 2).  */
5768   iv_ca = find_optimal_iv_set (data);
5769   if (!iv_ca)
5770     goto finish;
5771   changed = true;
5772
5773   /* Create the new induction variables (item 4, part 1).  */
5774   create_new_ivs (data, iv_ca);
5775   iv_ca_free (&iv_ca);
5776
5777   /* Rewrite the uses (item 4, part 2).  */
5778   rewrite_uses (data);
5779
5780   /* Remove the ivs that are unused after rewriting.  */
5781   remove_unused_ivs (data);
5782
5783   /* We have changed the structure of induction variables; it might happen
5784      that definitions in the scev database refer to some of them that were
5785      eliminated.  */
5786   scev_reset ();
5787
5788 finish:
5789   free_loop_data (data);
5790
5791   return changed;
5792 }
5793
5794 /* Main entry point.  Optimizes induction variables in loops.  */
5795
5796 void
5797 tree_ssa_iv_optimize (void)
5798 {
5799   struct loop *loop;
5800   struct ivopts_data data;
5801   loop_iterator li;
5802
5803   tree_ssa_iv_optimize_init (&data);
5804
5805   /* Optimize the loops starting with the innermost ones.  */
5806   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5807     {
5808       if (dump_file && (dump_flags & TDF_DETAILS))
5809         flow_loop_dump (loop, dump_file, NULL, 1);
5810
5811       tree_ssa_iv_optimize_loop (&data, loop);
5812     }
5813
5814   tree_ssa_iv_optimize_finalize (&data);
5815 }