gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146 };
 147
 148 /* Use.  */
 149 struct iv_use
 150 {
 151   unsigned id;          /* The id of the use.  */
 152   enum use_type type;   /* Type of the use.  */
 153   struct iv *iv;        /* The induction variable it is based on.  */
 154   tree stmt;            /* Statement in that it occurs.  */
 155   tree *op_p;           /* The place where it occurs.  */
 156   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 157                            important ones.  */
 158
 159   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 160   struct cost_pair *cost_map;
 161                         /* The costs wrto the iv candidates.  */
 162
 163   struct iv_cand *selected;
 164                         /* The selected candidate.  */
 165 };
 166
 167 /* The position where the iv is computed.  */
 168 enum iv_position
 169 {
 170   IP_NORMAL,            /* At the end, just before the exit condition.  */
 171   IP_END,               /* At the end of the latch block.  */
 172   IP_ORIGINAL           /* The original biv.  */
 173 };
 174
 175 /* The induction variable candidate.  */
 176 struct iv_cand
 177 {
 178   unsigned id;          /* The number of the candidate.  */
 179   bool important;       /* Whether this is an "important" candidate, i.e. such
 180                            that it should be considered by all uses.  */
 181   enum iv_position pos; /* Where it is computed.  */
 182   tree incremented_at;  /* For original biv, the statement where it is
 183                            incremented.  */
 184   tree var_before;      /* The variable used for it before increment.  */
 185   tree var_after;       /* The variable used for it after increment.  */
 186   struct iv *iv;        /* The value of the candidate.  NULL for
 187                            "pseudocandidate" used to indicate the possibility
 188                            to replace the final value of an iv by direct
 189                            computation of the value.  */
 190   unsigned cost;        /* Cost of the candidate.  */
 191 };
 192
 193 /* The data used by the induction variable optimizations.  */
 194
 195 struct ivopts_data
 196 {
 197   /* The currently optimized loop.  */
 198   struct loop *current_loop;
 199
 200   /* Numbers of iterations for all exits of the current loop.  */
 201   htab_t niters;
 202
 203   /* The size of version_info array allocated.  */
 204   unsigned version_info_size;
 205
 206   /* The array of information for the ssa names.  */
 207   struct version_info *version_info;
 208
 209   /* The bitmap of indices in version_info whose value was changed.  */
 210   bitmap relevant;
 211
 212   /* The maximum invariant id.  */
 213   unsigned max_inv_id;
 214
 215   /* The uses of induction variables.  */
 216   varray_type iv_uses;
 217
 218   /* The candidates.  */
 219   varray_type iv_candidates;
 220
 221   /* A bitmap of important candidates.  */
 222   bitmap important_candidates;
 223
 224   /* Whether to consider just related and important candidates when replacing a
 225      use.  */
 226   bool consider_all_candidates;
 227 };
 228
 229 /* An assignment of iv candidates to uses.  */
 230
 231 struct iv_ca
 232 {
 233   /* The number of uses covered by the assignment.  */
 234   unsigned upto;
 235
 236   /* Number of uses that cannot be expressed by the candidates in the set.  */
 237   unsigned bad_uses;
 238
 239   /* Candidate assigned to a use, together with the related costs.  */
 240   struct cost_pair **cand_for_use;
 241
 242   /* Number of times each candidate is used.  */
 243   unsigned *n_cand_uses;
 244
 245   /* The candidates used.  */
 246   bitmap cands;
 247
 248   /* The number of candidates in the set.  */
 249   unsigned n_cands;
 250
 251   /* Total number of registers needed.  */
 252   unsigned n_regs;
 253
 254   /* Total cost of expressing uses.  */
 255   unsigned cand_use_cost;
 256
 257   /* Total cost of candidates.  */
 258   unsigned cand_cost;
 259
 260   /* Number of times each invariant is used.  */
 261   unsigned *n_invariant_uses;
 262
 263   /* Total cost of the assignment.  */
 264   unsigned cost;
 265 };
 266
 267 /* Difference of two iv candidate assignments.  */
 268
 269 struct iv_ca_delta
 270 {
 271   /* Changed use.  */
 272   struct iv_use *use;
 273
 274   /* An old assignment (for rollback purposes).  */
 275   struct cost_pair *old_cp;
 276
 277   /* A new assignment.  */
 278   struct cost_pair *new_cp;
 279
 280   /* Next change in the list.  */
 281   struct iv_ca_delta *next_change;
 282 };
 283
 284 /* Bound on number of candidates below that all candidates are considered.  */
 285
 286 #define CONSIDER_ALL_CANDIDATES_BOUND \
 287   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 288
 289 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 290    optimizing such a loop would help, and it would take ages).  */
 291
 292 #define MAX_CONSIDERED_USES \
 293   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 294
 295 /* If there are at most this number of ivs in the set, try removing unnecessary
 296    ivs from the set always.  */
 297
 298 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 299   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 300
 301 /* The list of trees for that the decl_rtl field must be reset is stored
 302    here.  */
 303
 304 static varray_type decl_rtl_to_reset;
 305
 306 /* Number of uses recorded in DATA.  */
 307
 308 static inline unsigned
 309 n_iv_uses (struct ivopts_data *data)
 310 {
 311   return VARRAY_ACTIVE_SIZE (data->iv_uses);
 312 }
 313
 314 /* Ith use recorded in DATA.  */
 315
 316 static inline struct iv_use *
 317 iv_use (struct ivopts_data *data, unsigned i)
 318 {
 319   return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
 320 }
 321
 322 /* Number of candidates recorded in DATA.  */
 323
 324 static inline unsigned
 325 n_iv_cands (struct ivopts_data *data)
 326 {
 327   return VARRAY_ACTIVE_SIZE (data->iv_candidates);
 328 }
 329
 330 /* Ith candidate recorded in DATA.  */
 331
 332 static inline struct iv_cand *
 333 iv_cand (struct ivopts_data *data, unsigned i)
 334 {
 335   return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
 336 }
 337
 338 /* The data for LOOP.  */
 339
 340 static inline struct loop_data *
 341 loop_data (struct loop *loop)
 342 {
 343   return loop->aux;
 344 }
 345
 346 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 347
 348 static edge
 349 single_dom_exit (struct loop *loop)
 350 {
 351   edge exit = loop->single_exit;
 352
 353   if (!exit)
 354     return NULL;
 355
 356   if (!just_once_each_iteration_p (loop, exit->src))
 357     return NULL;
 358
 359   return exit;
 360 }
 361
 362 /* Dumps information about the induction variable IV to FILE.  */
 363
 364 extern void dump_iv (FILE *, struct iv *);
 365 void
 366 dump_iv (FILE *file, struct iv *iv)
 367 {
 368   if (iv->ssa_name)
 369     {
 370       fprintf (file, "ssa name ");
 371       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 372       fprintf (file, "\n");
 373     }
 374
 375   fprintf (file, "  type ");
 376   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 377   fprintf (file, "\n");
 378
 379   if (iv->step)
 380     {
 381       fprintf (file, "  base ");
 382       print_generic_expr (file, iv->base, TDF_SLIM);
 383       fprintf (file, "\n");
 384
 385       fprintf (file, "  step ");
 386       print_generic_expr (file, iv->step, TDF_SLIM);
 387       fprintf (file, "\n");
 388     }
 389   else
 390     {
 391       fprintf (file, "  invariant ");
 392       print_generic_expr (file, iv->base, TDF_SLIM);
 393       fprintf (file, "\n");
 394     }
 395
 396   if (iv->base_object)
 397     {
 398       fprintf (file, "  base object ");
 399       print_generic_expr (file, iv->base_object, TDF_SLIM);
 400       fprintf (file, "\n");
 401     }
 402
 403   if (iv->biv_p)
 404     fprintf (file, "  is a biv\n");
 405 }
 406
 407 /* Dumps information about the USE to FILE.  */
 408
 409 extern void dump_use (FILE *, struct iv_use *);
 410 void
 411 dump_use (FILE *file, struct iv_use *use)
 412 {
 413   fprintf (file, "use %d\n", use->id);
 414
 415   switch (use->type)
 416     {
 417     case USE_NONLINEAR_EXPR:
 418       fprintf (file, "  generic\n");
 419       break;
 420
 421     case USE_OUTER:
 422       fprintf (file, "  outside\n");
 423       break;
 424
 425     case USE_ADDRESS:
 426       fprintf (file, "  address\n");
 427       break;
 428
 429     case USE_COMPARE:
 430       fprintf (file, "  compare\n");
 431       break;
 432
 433     default:
 434       gcc_unreachable ();
 435     }
 436
 437   fprintf (file, "  in statement ");
 438   print_generic_expr (file, use->stmt, TDF_SLIM);
 439   fprintf (file, "\n");
 440
 441   fprintf (file, "  at position ");
 442   if (use->op_p)
 443     print_generic_expr (file, *use->op_p, TDF_SLIM);
 444   fprintf (file, "\n");
 445
 446   dump_iv (file, use->iv);
 447
 448   if (use->related_cands)
 449     {
 450       fprintf (file, "  related candidates ");
 451       dump_bitmap (file, use->related_cands);
 452     }
 453 }
 454
 455 /* Dumps information about the uses to FILE.  */
 456
 457 extern void dump_uses (FILE *, struct ivopts_data *);
 458 void
 459 dump_uses (FILE *file, struct ivopts_data *data)
 460 {
 461   unsigned i;
 462   struct iv_use *use;
 463
 464   for (i = 0; i < n_iv_uses (data); i++)
 465     {
 466       use = iv_use (data, i);
 467
 468       dump_use (file, use);
 469       fprintf (file, "\n");
 470     }
 471 }
 472
 473 /* Dumps information about induction variable candidate CAND to FILE.  */
 474
 475 extern void dump_cand (FILE *, struct iv_cand *);
 476 void
 477 dump_cand (FILE *file, struct iv_cand *cand)
 478 {
 479   struct iv *iv = cand->iv;
 480
 481   fprintf (file, "candidate %d%s\n",
 482            cand->id, cand->important ? " (important)" : "");
 483
 484   if (!iv)
 485     {
 486       fprintf (file, "  final value replacement\n");
 487       return;
 488     }
 489
 490   switch (cand->pos)
 491     {
 492     case IP_NORMAL:
 493       fprintf (file, "  incremented before exit test\n");
 494       break;
 495
 496     case IP_END:
 497       fprintf (file, "  incremented at end\n");
 498       break;
 499
 500     case IP_ORIGINAL:
 501       fprintf (file, "  original biv\n");
 502       break;
 503     }
 504
 505   dump_iv (file, iv);
 506 }
 507
 508 /* Returns the info for ssa version VER.  */
 509
 510 static inline struct version_info *
 511 ver_info (struct ivopts_data *data, unsigned ver)
 512 {
 513   return data->version_info + ver;
 514 }
 515
 516 /* Returns the info for ssa name NAME.  */
 517
 518 static inline struct version_info *
 519 name_info (struct ivopts_data *data, tree name)
 520 {
 521   return ver_info (data, SSA_NAME_VERSION (name));
 522 }
 523
 524 /* Checks whether there exists number X such that X * B = A, counting modulo
 525    2^BITS.  */
 526
 527 static bool
 528 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 529         HOST_WIDE_INT *x)
 530 {
 531   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 532   unsigned HOST_WIDE_INT inv, ex, val;
 533   unsigned i;
 534
 535   a &= mask;
 536   b &= mask;
 537
 538   /* First divide the whole equation by 2 as long as possible.  */
 539   while (!(a & 1) && !(b & 1))
 540     {
 541       a >>= 1;
 542       b >>= 1;
 543       bits--;
 544       mask >>= 1;
 545     }
 546
 547   if (!(b & 1))
 548     {
 549       /* If b is still even, a is odd and there is no such x.  */
 550       return false;
 551     }
 552
 553   /* Find the inverse of b.  We compute it as
 554      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 555   inv = 1;
 556   ex = b;
 557   for (i = 0; i < bits - 1; i++)
 558     {
 559       inv = (inv * ex) & mask;
 560       ex = (ex * ex) & mask;
 561     }
 562
 563   val = (a * inv) & mask;
 564
 565   gcc_assert (((val * b) & mask) == a);
 566
 567   if ((val >> (bits - 1)) & 1)
 568     val |= ~mask;
 569
 570   *x = val;
 571
 572   return true;
 573 }
 574
 575 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 576    emitted in LOOP.  */
 577
 578 static bool
 579 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 580 {
 581   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 582
 583   gcc_assert (bb);
 584
 585   if (sbb == loop->latch)
 586     return true;
 587
 588   if (sbb != bb)
 589     return false;
 590
 591   return stmt == last_stmt (bb);
 592 }
 593
 594 /* Returns true if STMT if after the place where the original induction
 595    variable CAND is incremented.  */
 596
 597 static bool
 598 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 599 {
 600   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 601   basic_block stmt_bb = bb_for_stmt (stmt);
 602   block_stmt_iterator bsi;
 603
 604   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 605     return false;
 606
 607   if (stmt_bb != cand_bb)
 608     return true;
 609
 610   /* Scan the block from the end, since the original ivs are usually
 611      incremented at the end of the loop body.  */
 612   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 613     {
 614       if (bsi_stmt (bsi) == cand->incremented_at)
 615         return false;
 616       if (bsi_stmt (bsi) == stmt)
 617         return true;
 618     }
 619 }
 620
 621 /* Returns true if STMT if after the place where the induction variable
 622    CAND is incremented in LOOP.  */
 623
 624 static bool
 625 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 626 {
 627   switch (cand->pos)
 628     {
 629     case IP_END:
 630       return false;
 631
 632     case IP_NORMAL:
 633       return stmt_after_ip_normal_pos (loop, stmt);
 634
 635     case IP_ORIGINAL:
 636       return stmt_after_ip_original_pos (cand, stmt);
 637
 638     default:
 639       gcc_unreachable ();
 640     }
 641 }
 642
 643 /* Element of the table in that we cache the numbers of iterations obtained
 644    from exits of the loop.  */
 645
 646 struct nfe_cache_elt
 647 {
 648   /* The edge for that the number of iterations is cached.  */
 649   edge exit;
 650
 651   /* True if the # of iterations was successfully determined.  */
 652   bool valid_p;
 653
 654   /* Description of # of iterations.  */
 655   struct tree_niter_desc niter;
 656 };
 657
 658 /* Hash function for nfe_cache_elt E.  */
 659
 660 static hashval_t
 661 nfe_hash (const void *e)
 662 {
 663   const struct nfe_cache_elt *elt = e;
 664
 665   return htab_hash_pointer (elt->exit);
 666 }
 667
 668 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 669
 670 static int
 671 nfe_eq (const void *e1, const void *e2)
 672 {
 673   const struct nfe_cache_elt *elt1 = e1;
 674
 675   return elt1->exit == e2;
 676 }
 677
 678 /*  Returns structure describing number of iterations determined from
 679     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 680
 681 static struct tree_niter_desc *
 682 niter_for_exit (struct ivopts_data *data, edge exit)
 683 {
 684   struct nfe_cache_elt *nfe_desc;
 685   PTR *slot;
 686
 687   slot = htab_find_slot_with_hash (data->niters, exit,
 688                                    htab_hash_pointer (exit),
 689                                    INSERT);
 690
 691   if (!*slot)
 692     {
 693       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 694       nfe_desc->exit = exit;
 695       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 696                                                      exit, &nfe_desc->niter);
 697       *slot = nfe_desc;
 698     }
 699   else
 700     nfe_desc = *slot;
 701
 702   if (!nfe_desc->valid_p)
 703     return NULL;
 704
 705   return &nfe_desc->niter;
 706 }
 707
 708 /* Returns structure describing number of iterations determined from
 709    single dominating exit of DATA->current_loop, or NULL if something
 710    goes wrong.  */
 711
 712 static struct tree_niter_desc *
 713 niter_for_single_dom_exit (struct ivopts_data *data)
 714 {
 715   edge exit = single_dom_exit (data->current_loop);
 716
 717   if (!exit)
 718     return NULL;
 719
 720   return niter_for_exit (data, exit);
 721 }
 722
 723 /* Initializes data structures used by the iv optimization pass, stored
 724    in DATA.  LOOPS is the loop tree.  */
 725
 726 static void
 727 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 728 {
 729   unsigned i;
 730
 731   data->version_info_size = 2 * num_ssa_names;
 732   data->version_info = xcalloc (data->version_info_size,
 733                                 sizeof (struct version_info));
 734   data->relevant = BITMAP_ALLOC (NULL);
 735   data->important_candidates = BITMAP_ALLOC (NULL);
 736   data->max_inv_id = 0;
 737   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 738
 739   for (i = 1; i < loops->num; i++)
 740     if (loops->parray[i])
 741       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 742
 743   VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
 744   VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
 745   VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
 746 }
 747
 748 /* Returns a memory object to that EXPR points.  In case we are able to
 749    determine that it does not point to any such object, NULL is returned.  */
 750
 751 static tree
 752 determine_base_object (tree expr)
 753 {
 754   enum tree_code code = TREE_CODE (expr);
 755   tree base, obj, op0, op1;
 756
 757   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 758     return NULL_TREE;
 759
 760   switch (code)
 761     {
 762     case INTEGER_CST:
 763       return NULL_TREE;
 764
 765     case ADDR_EXPR:
 766       obj = TREE_OPERAND (expr, 0);
 767       base = get_base_address (obj);
 768
 769       if (!base)
 770         return expr;
 771
 772       if (TREE_CODE (base) == INDIRECT_REF)
 773         return determine_base_object (TREE_OPERAND (base, 0));
 774
 775       return fold (build1 (ADDR_EXPR, ptr_type_node, base));
 776
 777     case PLUS_EXPR:
 778     case MINUS_EXPR:
 779       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 780       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 781
 782       if (!op1)
 783         return op0;
 784
 785       if (!op0)
 786         return (code == PLUS_EXPR
 787                 ? op1
 788                 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
 789
 790       return fold (build (code, ptr_type_node, op0, op1));
 791
 792     case NOP_EXPR:
 793     case CONVERT_EXPR:
 794       return determine_base_object (TREE_OPERAND (expr, 0));
 795
 796     default:
 797       return fold_convert (ptr_type_node, expr);
 798     }
 799 }
 800
 801 /* Allocates an induction variable with given initial value BASE and step STEP
 802    for loop LOOP.  */
 803
 804 static struct iv *
 805 alloc_iv (tree base, tree step)
 806 {
 807   struct iv *iv = xcalloc (1, sizeof (struct iv));
 808
 809   if (step && integer_zerop (step))
 810     step = NULL_TREE;
 811
 812   iv->base = base;
 813   iv->base_object = determine_base_object (base);
 814   iv->step = step;
 815   iv->biv_p = false;
 816   iv->have_use_for = false;
 817   iv->use_id = 0;
 818   iv->ssa_name = NULL_TREE;
 819
 820   return iv;
 821 }
 822
 823 /* Sets STEP and BASE for induction variable IV.  */
 824
 825 static void
 826 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 827 {
 828   struct version_info *info = name_info (data, iv);
 829
 830   gcc_assert (!info->iv);
 831
 832   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 833   info->iv = alloc_iv (base, step);
 834   info->iv->ssa_name = iv;
 835 }
 836
 837 /* Finds induction variable declaration for VAR.  */
 838
 839 static struct iv *
 840 get_iv (struct ivopts_data *data, tree var)
 841 {
 842   basic_block bb;
 843
 844   if (!name_info (data, var)->iv)
 845     {
 846       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 847
 848       if (!bb
 849           || !flow_bb_inside_loop_p (data->current_loop, bb))
 850         set_iv (data, var, var, NULL_TREE);
 851     }
 852
 853   return name_info (data, var)->iv;
 854 }
 855
 856 /* Determines the step of a biv defined in PHI.  */
 857
 858 static tree
 859 determine_biv_step (tree phi)
 860 {
 861   struct loop *loop = bb_for_stmt (phi)->loop_father;
 862   tree name = PHI_RESULT (phi), base, step;
 863   tree type = TREE_TYPE (name);
 864
 865   if (!is_gimple_reg (name))
 866     return NULL_TREE;
 867
 868   if (!simple_iv (loop, phi, name, &base, &step))
 869     return NULL_TREE;
 870
 871   if (!step)
 872     return build_int_cst (type, 0);
 873
 874   return step;
 875 }
 876
 877 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 878
 879 static bool
 880 abnormal_ssa_name_p (tree exp)
 881 {
 882   if (!exp)
 883     return false;
 884
 885   if (TREE_CODE (exp) != SSA_NAME)
 886     return false;
 887
 888   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 889 }
 890
 891 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 892    abnormal phi node.  Callback for for_each_index.  */
 893
 894 static bool
 895 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 896                                   void *data ATTRIBUTE_UNUSED)
 897 {
 898   if (TREE_CODE (base) == ARRAY_REF)
 899     {
 900       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 901         return false;
 902       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 903         return false;
 904     }
 905
 906   return !abnormal_ssa_name_p (*index);
 907 }
 908
 909 /* Returns true if EXPR contains a ssa name that occurs in an
 910    abnormal phi node.  */
 911
 912 static bool
 913 contains_abnormal_ssa_name_p (tree expr)
 914 {
 915   enum tree_code code = TREE_CODE (expr);
 916   enum tree_code_class class = TREE_CODE_CLASS (code);
 917
 918   if (code == SSA_NAME)
 919     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 920
 921   if (code == INTEGER_CST
 922       || is_gimple_min_invariant (expr))
 923     return false;
 924
 925   if (code == ADDR_EXPR)
 926     return !for_each_index (&TREE_OPERAND (expr, 0),
 927                             idx_contains_abnormal_ssa_name_p,
 928                             NULL);
 929
 930   switch (class)
 931     {
 932     case tcc_binary:
 933     case tcc_comparison:
 934       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 935         return true;
 936
 937       /* Fallthru.  */
 938     case tcc_unary:
 939       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 940         return true;
 941
 942       break;
 943
 944     default:
 945       gcc_unreachable ();
 946     }
 947
 948   return false;
 949 }
 950
 951 /* Finds basic ivs.  */
 952
 953 static bool
 954 find_bivs (struct ivopts_data *data)
 955 {
 956   tree phi, step, type, base;
 957   bool found = false;
 958   struct loop *loop = data->current_loop;
 959
 960   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 961     {
 962       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 963         continue;
 964
 965       step = determine_biv_step (phi);
 966
 967       if (!step)
 968         continue;
 969       if (cst_and_fits_in_hwi (step)
 970           && int_cst_value (step) == 0)
 971         continue;
 972
 973       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 974       if (contains_abnormal_ssa_name_p (base))
 975         continue;
 976
 977       type = TREE_TYPE (PHI_RESULT (phi));
 978       base = fold_convert (type, base);
 979       step = fold_convert (type, step);
 980
 981       /* FIXME: We do not handle induction variables whose step does
 982          not satisfy cst_and_fits_in_hwi.  */
 983       if (!cst_and_fits_in_hwi (step))
 984         continue;
 985
 986       set_iv (data, PHI_RESULT (phi), base, step);
 987       found = true;
 988     }
 989
 990   return found;
 991 }
 992
 993 /* Marks basic ivs.  */
 994
 995 static void
 996 mark_bivs (struct ivopts_data *data)
 997 {
 998   tree phi, var;
 999   struct iv *iv, *incr_iv;
1000   struct loop *loop = data->current_loop;
1001   basic_block incr_bb;
1002
1003   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1004     {
1005       iv = get_iv (data, PHI_RESULT (phi));
1006       if (!iv)
1007         continue;
1008
1009       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010       incr_iv = get_iv (data, var);
1011       if (!incr_iv)
1012         continue;
1013
1014       /* If the increment is in the subloop, ignore it.  */
1015       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016       if (incr_bb->loop_father != data->current_loop
1017           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018         continue;
1019
1020       iv->biv_p = true;
1021       incr_iv->biv_p = true;
1022     }
1023 }
1024
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026    parameters to BASE and STEP.  */
1027
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1030                         tree *base, tree *step)
1031 {
1032   tree lhs;
1033   struct loop *loop = data->current_loop;
1034
1035   *base = NULL_TREE;
1036   *step = NULL_TREE;
1037
1038   if (TREE_CODE (stmt) != MODIFY_EXPR)
1039     return false;
1040
1041   lhs = TREE_OPERAND (stmt, 0);
1042   if (TREE_CODE (lhs) != SSA_NAME)
1043     return false;
1044
1045   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1046     return false;
1047
1048   /* FIXME: We do not handle induction variables whose step does
1049      not satisfy cst_and_fits_in_hwi.  */
1050   if (!zero_p (*step)
1051       && !cst_and_fits_in_hwi (*step))
1052     return false;
1053
1054   if (contains_abnormal_ssa_name_p (*base))
1055     return false;
1056
1057   return true;
1058 }
1059
1060 /* Finds general ivs in statement STMT.  */
1061
1062 static void
1063 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1064 {
1065   tree base, step;
1066
1067   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1068     return;
1069
1070   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1071 }
1072
1073 /* Finds general ivs in basic block BB.  */
1074
1075 static void
1076 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1077 {
1078   block_stmt_iterator bsi;
1079
1080   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1081     find_givs_in_stmt (data, bsi_stmt (bsi));
1082 }
1083
1084 /* Finds general ivs.  */
1085
1086 static void
1087 find_givs (struct ivopts_data *data)
1088 {
1089   struct loop *loop = data->current_loop;
1090   basic_block *body = get_loop_body_in_dom_order (loop);
1091   unsigned i;
1092
1093   for (i = 0; i < loop->num_nodes; i++)
1094     find_givs_in_bb (data, body[i]);
1095   free (body);
1096 }
1097
1098 /* For each ssa name defined in LOOP determines whether it is an induction
1099    variable and if so, its initial value and step.  */
1100
1101 static bool
1102 find_induction_variables (struct ivopts_data *data)
1103 {
1104   unsigned i;
1105   bitmap_iterator bi;
1106
1107   if (!find_bivs (data))
1108     return false;
1109
1110   find_givs (data);
1111   mark_bivs (data);
1112
1113   if (dump_file && (dump_flags & TDF_DETAILS))
1114     {
1115       struct tree_niter_desc *niter;
1116
1117       niter = niter_for_single_dom_exit (data);
1118
1119       if (niter)
1120         {
1121           fprintf (dump_file, "  number of iterations ");
1122           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1123           fprintf (dump_file, "\n");
1124
1125           fprintf (dump_file, "  may be zero if ");
1126           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1127           fprintf (dump_file, "\n");
1128           fprintf (dump_file, "\n");
1129         };
1130
1131       fprintf (dump_file, "Induction variables:\n\n");
1132
1133       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1134         {
1135           if (ver_info (data, i)->iv)
1136             dump_iv (dump_file, ver_info (data, i)->iv);
1137         }
1138     }
1139
1140   return true;
1141 }
1142
1143 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1144
1145 static struct iv_use *
1146 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1147             tree stmt, enum use_type use_type)
1148 {
1149   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1150
1151   use->id = n_iv_uses (data);
1152   use->type = use_type;
1153   use->iv = iv;
1154   use->stmt = stmt;
1155   use->op_p = use_p;
1156   use->related_cands = BITMAP_ALLOC (NULL);
1157
1158   /* To avoid showing ssa name in the dumps, if it was not reset by the
1159      caller.  */
1160   iv->ssa_name = NULL_TREE;
1161
1162   if (dump_file && (dump_flags & TDF_DETAILS))
1163     dump_use (dump_file, use);
1164
1165   VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1166
1167   return use;
1168 }
1169
1170 /* Checks whether OP is a loop-level invariant and if so, records it.
1171    NONLINEAR_USE is true if the invariant is used in a way we do not
1172    handle specially.  */
1173
1174 static void
1175 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1176 {
1177   basic_block bb;
1178   struct version_info *info;
1179
1180   if (TREE_CODE (op) != SSA_NAME
1181       || !is_gimple_reg (op))
1182     return;
1183
1184   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1185   if (bb
1186       && flow_bb_inside_loop_p (data->current_loop, bb))
1187     return;
1188
1189   info = name_info (data, op);
1190   info->name = op;
1191   info->has_nonlin_use |= nonlinear_use;
1192   if (!info->inv_id)
1193     info->inv_id = ++data->max_inv_id;
1194   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1195 }
1196
1197 /* Checks whether the use OP is interesting and if so, records it
1198    as TYPE.  */
1199
1200 static struct iv_use *
1201 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1202                                        enum use_type type)
1203 {
1204   struct iv *iv;
1205   struct iv *civ;
1206   tree stmt;
1207   struct iv_use *use;
1208
1209   if (TREE_CODE (op) != SSA_NAME)
1210     return NULL;
1211
1212   iv = get_iv (data, op);
1213   if (!iv)
1214     return NULL;
1215
1216   if (iv->have_use_for)
1217     {
1218       use = iv_use (data, iv->use_id);
1219
1220       gcc_assert (use->type == USE_NONLINEAR_EXPR
1221                   || use->type == USE_OUTER);
1222
1223       if (type == USE_NONLINEAR_EXPR)
1224         use->type = USE_NONLINEAR_EXPR;
1225       return use;
1226     }
1227
1228   if (zero_p (iv->step))
1229     {
1230       record_invariant (data, op, true);
1231       return NULL;
1232     }
1233   iv->have_use_for = true;
1234
1235   civ = xmalloc (sizeof (struct iv));
1236   *civ = *iv;
1237
1238   stmt = SSA_NAME_DEF_STMT (op);
1239   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1240               || TREE_CODE (stmt) == MODIFY_EXPR);
1241
1242   use = record_use (data, NULL, civ, stmt, type);
1243   iv->use_id = use->id;
1244
1245   return use;
1246 }
1247
1248 /* Checks whether the use OP is interesting and if so, records it.  */
1249
1250 static struct iv_use *
1251 find_interesting_uses_op (struct ivopts_data *data, tree op)
1252 {
1253   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1254 }
1255
1256 /* Records a definition of induction variable OP that is used outside of the
1257    loop.  */
1258
1259 static struct iv_use *
1260 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1261 {
1262   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1263 }
1264
1265 /* Checks whether the condition *COND_P in STMT is interesting
1266    and if so, records it.  */
1267
1268 static void
1269 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1270 {
1271   tree *op0_p;
1272   tree *op1_p;
1273   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1274   struct iv const_iv;
1275   tree zero = integer_zero_node;
1276
1277   const_iv.step = NULL_TREE;
1278
1279   if (integer_zerop (*cond_p)
1280       || integer_nonzerop (*cond_p))
1281     return;
1282
1283   if (TREE_CODE (*cond_p) == SSA_NAME)
1284     {
1285       op0_p = cond_p;
1286       op1_p = &zero;
1287     }
1288   else
1289     {
1290       op0_p = &TREE_OPERAND (*cond_p, 0);
1291       op1_p = &TREE_OPERAND (*cond_p, 1);
1292     }
1293
1294   if (TREE_CODE (*op0_p) == SSA_NAME)
1295     iv0 = get_iv (data, *op0_p);
1296   else
1297     iv0 = &const_iv;
1298
1299   if (TREE_CODE (*op1_p) == SSA_NAME)
1300     iv1 = get_iv (data, *op1_p);
1301   else
1302     iv1 = &const_iv;
1303
1304   if (/* When comparing with non-invariant value, we may not do any senseful
1305          induction variable elimination.  */
1306       (!iv0 || !iv1)
1307       /* Eliminating condition based on two ivs would be nontrivial.
1308          ??? TODO -- it is not really important to handle this case.  */
1309       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1310     {
1311       find_interesting_uses_op (data, *op0_p);
1312       find_interesting_uses_op (data, *op1_p);
1313       return;
1314     }
1315
1316   if (zero_p (iv0->step) && zero_p (iv1->step))
1317     {
1318       /* If both are invariants, this is a work for unswitching.  */
1319       return;
1320     }
1321
1322   civ = xmalloc (sizeof (struct iv));
1323   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1324   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1325 }
1326
1327 /* Returns true if expression EXPR is obviously invariant in LOOP,
1328    i.e. if all its operands are defined outside of the LOOP.  */
1329
1330 bool
1331 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1332 {
1333   basic_block def_bb;
1334   unsigned i, len;
1335
1336   if (is_gimple_min_invariant (expr))
1337     return true;
1338
1339   if (TREE_CODE (expr) == SSA_NAME)
1340     {
1341       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1342       if (def_bb
1343           && flow_bb_inside_loop_p (loop, def_bb))
1344         return false;
1345
1346       return true;
1347     }
1348
1349   if (!EXPR_P (expr))
1350     return false;
1351
1352   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1353   for (i = 0; i < len; i++)
1354     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1355       return false;
1356
1357   return true;
1358 }
1359
1360 /* Cumulates the steps of indices into DATA and replaces their values with the
1361    initial ones.  Returns false when the value of the index cannot be determined.
1362    Callback for for_each_index.  */
1363
1364 struct ifs_ivopts_data
1365 {
1366   struct ivopts_data *ivopts_data;
1367   tree stmt;
1368   tree *step_p;
1369 };
1370
1371 static bool
1372 idx_find_step (tree base, tree *idx, void *data)
1373 {
1374   struct ifs_ivopts_data *dta = data;
1375   struct iv *iv;
1376   tree step, type, iv_type, iv_step, lbound, off;
1377   struct loop *loop = dta->ivopts_data->current_loop;
1378
1379   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1380       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1381     return false;
1382
1383   /* If base is a component ref, require that the offset of the reference
1384      be invariant.  */
1385   if (TREE_CODE (base) == COMPONENT_REF)
1386     {
1387       off = component_ref_field_offset (base);
1388       return expr_invariant_in_loop_p (loop, off);
1389     }
1390
1391   /* If base is array, first check whether we will be able to move the
1392      reference out of the loop (in order to take its address in strength
1393      reduction).  In order for this to work we need both lower bound
1394      and step to be loop invariants.  */
1395   if (TREE_CODE (base) == ARRAY_REF)
1396     {
1397       step = array_ref_element_size (base);
1398       lbound = array_ref_low_bound (base);
1399
1400       if (!expr_invariant_in_loop_p (loop, step)
1401           || !expr_invariant_in_loop_p (loop, lbound))
1402         return false;
1403     }
1404
1405   if (TREE_CODE (*idx) != SSA_NAME)
1406     return true;
1407
1408   iv = get_iv (dta->ivopts_data, *idx);
1409   if (!iv)
1410     return false;
1411
1412   *idx = iv->base;
1413
1414   if (!iv->step)
1415     return true;
1416
1417   iv_type = TREE_TYPE (iv->base);
1418   type = build_pointer_type (TREE_TYPE (base));
1419   if (TREE_CODE (base) == ARRAY_REF)
1420     {
1421       step = array_ref_element_size (base);
1422
1423       /* We only handle addresses whose step is an integer constant.  */
1424       if (TREE_CODE (step) != INTEGER_CST)
1425         return false;
1426     }
1427   else
1428     /* The step for pointer arithmetics already is 1 byte.  */
1429     step = build_int_cst (type, 1);
1430
1431   if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1432     iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1433                                           type, iv->base, iv->step, dta->stmt);
1434   else
1435     iv_step = fold_convert (iv_type, iv->step);
1436
1437   if (!iv_step)
1438     {
1439       /* The index might wrap.  */
1440       return false;
1441     }
1442
1443   step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1444
1445   if (!*dta->step_p)
1446     *dta->step_p = step;
1447   else
1448     *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1449                                             *dta->step_p, step);
1450
1451   return true;
1452 }
1453
1454 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1455    object is passed to it in DATA.  */
1456
1457 static bool
1458 idx_record_use (tree base, tree *idx,
1459                 void *data)
1460 {
1461   find_interesting_uses_op (data, *idx);
1462   if (TREE_CODE (base) == ARRAY_REF)
1463     {
1464       find_interesting_uses_op (data, array_ref_element_size (base));
1465       find_interesting_uses_op (data, array_ref_low_bound (base));
1466     }
1467   return true;
1468 }
1469
1470 /* Returns true if memory reference REF may be unaligned.  */
1471
1472 static bool
1473 may_be_unaligned_p (tree ref)
1474 {
1475   tree base;
1476   tree base_type;
1477   HOST_WIDE_INT bitsize;
1478   HOST_WIDE_INT bitpos;
1479   tree toffset;
1480   enum machine_mode mode;
1481   int unsignedp, volatilep;
1482   unsigned base_align;
1483
1484   /* The test below is basically copy of what expr.c:normal_inner_ref
1485      does to check whether the object must be loaded by parts when
1486      STRICT_ALIGNMENT is true.  */
1487   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1488                               &unsignedp, &volatilep, true);
1489   base_type = TREE_TYPE (base);
1490   base_align = TYPE_ALIGN (base_type);
1491
1492   if (mode != BLKmode
1493       && (base_align < GET_MODE_ALIGNMENT (mode)
1494           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1495           || bitpos % BITS_PER_UNIT != 0))
1496     return true;
1497
1498   return false;
1499 }
1500
1501 /* Finds addresses in *OP_P inside STMT.  */
1502
1503 static void
1504 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1505 {
1506   tree base = unshare_expr (*op_p), step = NULL;
1507   struct iv *civ;
1508   struct ifs_ivopts_data ifs_ivopts_data;
1509
1510   /* Ignore bitfields for now.  Not really something terribly complicated
1511      to handle.  TODO.  */
1512   if (TREE_CODE (base) == COMPONENT_REF
1513       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1514     goto fail;
1515
1516   if (STRICT_ALIGNMENT
1517       && may_be_unaligned_p (base))
1518     goto fail;
1519
1520   ifs_ivopts_data.ivopts_data = data;
1521   ifs_ivopts_data.stmt = stmt;
1522   ifs_ivopts_data.step_p = &step;
1523   if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1524       || zero_p (step))
1525     goto fail;
1526
1527   gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1528   gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1529
1530   if (TREE_CODE (base) == INDIRECT_REF)
1531     base = TREE_OPERAND (base, 0);
1532   else
1533     base = build_addr (base);
1534
1535   civ = alloc_iv (base, step);
1536   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1537   return;
1538
1539 fail:
1540   for_each_index (op_p, idx_record_use, data);
1541 }
1542
1543 /* Finds and records invariants used in STMT.  */
1544
1545 static void
1546 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1547 {
1548   use_optype uses = NULL;
1549   unsigned i, n;
1550   tree op;
1551
1552   if (TREE_CODE (stmt) == PHI_NODE)
1553     n = PHI_NUM_ARGS (stmt);
1554   else
1555     {
1556       get_stmt_operands (stmt);
1557       uses = STMT_USE_OPS (stmt);
1558       n = NUM_USES (uses);
1559     }
1560
1561   for (i = 0; i < n; i++)
1562     {
1563       if (TREE_CODE (stmt) == PHI_NODE)
1564         op = PHI_ARG_DEF (stmt, i);
1565       else
1566         op = USE_OP (uses, i);
1567
1568       record_invariant (data, op, false);
1569     }
1570 }
1571
1572 /* Finds interesting uses of induction variables in the statement STMT.  */
1573
1574 static void
1575 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1576 {
1577   struct iv *iv;
1578   tree op, lhs, rhs;
1579   use_optype uses = NULL;
1580   unsigned i, n;
1581
1582   find_invariants_stmt (data, stmt);
1583
1584   if (TREE_CODE (stmt) == COND_EXPR)
1585     {
1586       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1587       return;
1588     }
1589
1590   if (TREE_CODE (stmt) == MODIFY_EXPR)
1591     {
1592       lhs = TREE_OPERAND (stmt, 0);
1593       rhs = TREE_OPERAND (stmt, 1);
1594
1595       if (TREE_CODE (lhs) == SSA_NAME)
1596         {
1597           /* If the statement defines an induction variable, the uses are not
1598              interesting by themselves.  */
1599
1600           iv = get_iv (data, lhs);
1601
1602           if (iv && !zero_p (iv->step))
1603             return;
1604         }
1605
1606       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1607         {
1608         case tcc_comparison:
1609           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1610           return;
1611
1612         case tcc_reference:
1613           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1614           if (REFERENCE_CLASS_P (lhs))
1615             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1616           return;
1617
1618         default: ;
1619         }
1620
1621       if (REFERENCE_CLASS_P (lhs)
1622           && is_gimple_val (rhs))
1623         {
1624           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1625           find_interesting_uses_op (data, rhs);
1626           return;
1627         }
1628
1629       /* TODO -- we should also handle address uses of type
1630
1631          memory = call (whatever);
1632
1633          and
1634
1635          call (memory).  */
1636     }
1637
1638   if (TREE_CODE (stmt) == PHI_NODE
1639       && bb_for_stmt (stmt) == data->current_loop->header)
1640     {
1641       lhs = PHI_RESULT (stmt);
1642       iv = get_iv (data, lhs);
1643
1644       if (iv && !zero_p (iv->step))
1645         return;
1646     }
1647
1648   if (TREE_CODE (stmt) == PHI_NODE)
1649     n = PHI_NUM_ARGS (stmt);
1650   else
1651     {
1652       uses = STMT_USE_OPS (stmt);
1653       n = NUM_USES (uses);
1654     }
1655
1656   for (i = 0; i < n; i++)
1657     {
1658       if (TREE_CODE (stmt) == PHI_NODE)
1659         op = PHI_ARG_DEF (stmt, i);
1660       else
1661         op = USE_OP (uses, i);
1662
1663       if (TREE_CODE (op) != SSA_NAME)
1664         continue;
1665
1666       iv = get_iv (data, op);
1667       if (!iv)
1668         continue;
1669
1670       find_interesting_uses_op (data, op);
1671     }
1672 }
1673
1674 /* Finds interesting uses of induction variables outside of loops
1675    on loop exit edge EXIT.  */
1676
1677 static void
1678 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1679 {
1680   tree phi, def;
1681
1682   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1683     {
1684       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1685       find_interesting_uses_outer (data, def);
1686     }
1687 }
1688
1689 /* Finds uses of the induction variables that are interesting.  */
1690
1691 static void
1692 find_interesting_uses (struct ivopts_data *data)
1693 {
1694   basic_block bb;
1695   block_stmt_iterator bsi;
1696   tree phi;
1697   basic_block *body = get_loop_body (data->current_loop);
1698   unsigned i;
1699   struct version_info *info;
1700   edge e;
1701
1702   if (dump_file && (dump_flags & TDF_DETAILS))
1703     fprintf (dump_file, "Uses:\n\n");
1704
1705   for (i = 0; i < data->current_loop->num_nodes; i++)
1706     {
1707       edge_iterator ei;
1708       bb = body[i];
1709
1710       FOR_EACH_EDGE (e, ei, bb->succs)
1711         if (e->dest != EXIT_BLOCK_PTR
1712             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1713           find_interesting_uses_outside (data, e);
1714
1715       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1716         find_interesting_uses_stmt (data, phi);
1717       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1718         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1719     }
1720
1721   if (dump_file && (dump_flags & TDF_DETAILS))
1722     {
1723       bitmap_iterator bi;
1724
1725       fprintf (dump_file, "\n");
1726
1727       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1728         {
1729           info = ver_info (data, i);
1730           if (info->inv_id)
1731             {
1732               fprintf (dump_file, "  ");
1733               print_generic_expr (dump_file, info->name, TDF_SLIM);
1734               fprintf (dump_file, " is invariant (%d)%s\n",
1735                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1736             }
1737         }
1738
1739       fprintf (dump_file, "\n");
1740     }
1741
1742   free (body);
1743 }
1744
1745 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1746    is true, assume we are inside an address.  */
1747
1748 static tree
1749 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1750 {
1751   tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1752   enum tree_code code;
1753   tree type, orig_type = TREE_TYPE (expr);
1754   unsigned HOST_WIDE_INT off0, off1, st;
1755   tree orig_expr = expr;
1756
1757   STRIP_NOPS (expr);
1758   type = TREE_TYPE (expr);
1759   code = TREE_CODE (expr);
1760   *offset = 0;
1761
1762   switch (code)
1763     {
1764     case INTEGER_CST:
1765       if (!cst_and_fits_in_hwi (expr)
1766           || zero_p (expr))
1767         return orig_expr;
1768
1769       *offset = int_cst_value (expr);
1770       return build_int_cst_type (orig_type, 0);
1771
1772     case PLUS_EXPR:
1773     case MINUS_EXPR:
1774       op0 = TREE_OPERAND (expr, 0);
1775       op1 = TREE_OPERAND (expr, 1);
1776
1777       op0 = strip_offset (op0, false, &off0);
1778       op1 = strip_offset (op1, false, &off1);
1779
1780       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1781       if (op0 == TREE_OPERAND (expr, 0)
1782           && op1 == TREE_OPERAND (expr, 1))
1783         return orig_expr;
1784
1785       if (zero_p (op1))
1786         expr = op0;
1787       else if (zero_p (op0))
1788         {
1789           if (code == PLUS_EXPR)
1790             expr = op1;
1791           else
1792             expr = build1 (NEGATE_EXPR, type, op1);
1793         }
1794       else
1795         expr = build2 (code, type, op0, op1);
1796
1797       return fold_convert (orig_type, expr);
1798
1799     case ARRAY_REF:
1800       if (!inside_addr)
1801         return orig_expr;
1802
1803       step = array_ref_element_size (expr);
1804       if (!cst_and_fits_in_hwi (step))
1805         break;
1806
1807       st = int_cst_value (step);
1808       op1 = TREE_OPERAND (expr, 1);
1809       op1 = strip_offset (op1, false, &off1);
1810       *offset = off1 * st;
1811       break;
1812
1813     case COMPONENT_REF:
1814       if (!inside_addr)
1815         return orig_expr;
1816       break;
1817
1818     case ADDR_EXPR:
1819       inside_addr = true;
1820       break;
1821
1822     default:
1823       return orig_expr;
1824     }
1825
1826   /* Default handling of expressions for that we want to recurse into
1827      the first operand.  */
1828   op0 = TREE_OPERAND (expr, 0);
1829   op0 = strip_offset (op0, inside_addr, &off0);
1830   *offset += off0;
1831
1832   if (op0 == TREE_OPERAND (expr, 0)
1833       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1834     return orig_expr;
1835
1836   expr = copy_node (expr);
1837   TREE_OPERAND (expr, 0) = op0;
1838   if (op1)
1839     TREE_OPERAND (expr, 1) = op1;
1840
1841   return fold_convert (orig_type, expr);
1842 }
1843
1844 /* Returns variant of TYPE that can be used as base for different uses.
1845    For integer types, we return unsigned variant of the type, which
1846    avoids problems with overflows.  For pointer types, we return void *.  */
1847
1848 static tree
1849 generic_type_for (tree type)
1850 {
1851   if (POINTER_TYPE_P (type))
1852     return ptr_type_node;
1853
1854   if (TYPE_UNSIGNED (type))
1855     return type;
1856
1857   return unsigned_type_for (type);
1858 }
1859
1860 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1861    position to POS.  If USE is not NULL, the candidate is set as related to
1862    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1863    replacement of the final value of the iv by a direct computation.  */
1864
1865 static struct iv_cand *
1866 add_candidate_1 (struct ivopts_data *data,
1867                  tree base, tree step, bool important, enum iv_position pos,
1868                  struct iv_use *use, tree incremented_at)
1869 {
1870   unsigned i;
1871   struct iv_cand *cand = NULL;
1872   tree type, orig_type;
1873
1874   if (base)
1875     {
1876       orig_type = TREE_TYPE (base);
1877       type = generic_type_for (orig_type);
1878       if (type != orig_type)
1879         {
1880           base = fold_convert (type, base);
1881           if (step)
1882             step = fold_convert (type, step);
1883         }
1884     }
1885
1886   for (i = 0; i < n_iv_cands (data); i++)
1887     {
1888       cand = iv_cand (data, i);
1889
1890       if (cand->pos != pos)
1891         continue;
1892
1893       if (cand->incremented_at != incremented_at)
1894         continue;
1895
1896       if (!cand->iv)
1897         {
1898           if (!base && !step)
1899             break;
1900
1901           continue;
1902         }
1903
1904       if (!base && !step)
1905         continue;
1906
1907       if (!operand_equal_p (base, cand->iv->base, 0))
1908         continue;
1909
1910       if (zero_p (cand->iv->step))
1911         {
1912           if (zero_p (step))
1913             break;
1914         }
1915       else
1916         {
1917           if (step && operand_equal_p (step, cand->iv->step, 0))
1918             break;
1919         }
1920     }
1921
1922   if (i == n_iv_cands (data))
1923     {
1924       cand = xcalloc (1, sizeof (struct iv_cand));
1925       cand->id = i;
1926
1927       if (!base && !step)
1928         cand->iv = NULL;
1929       else
1930         cand->iv = alloc_iv (base, step);
1931
1932       cand->pos = pos;
1933       if (pos != IP_ORIGINAL && cand->iv)
1934         {
1935           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1936           cand->var_after = cand->var_before;
1937         }
1938       cand->important = important;
1939       cand->incremented_at = incremented_at;
1940       VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1941
1942       if (dump_file && (dump_flags & TDF_DETAILS))
1943         dump_cand (dump_file, cand);
1944     }
1945
1946   if (important && !cand->important)
1947     {
1948       cand->important = true;
1949       if (dump_file && (dump_flags & TDF_DETAILS))
1950         fprintf (dump_file, "Candidate %d is important\n", cand->id);
1951     }
1952
1953   if (use)
1954     {
1955       bitmap_set_bit (use->related_cands, i);
1956       if (dump_file && (dump_flags & TDF_DETAILS))
1957         fprintf (dump_file, "Candidate %d is related to use %d\n",
1958                  cand->id, use->id);
1959     }
1960
1961   return cand;
1962 }
1963
1964 /* Returns true if incrementing the induction variable at the end of the LOOP
1965    is allowed.
1966
1967    The purpose is to avoid splitting latch edge with a biv increment, thus
1968    creating a jump, possibly confusing other optimization passes and leaving
1969    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
1970    is not available (so we do not have a better alternative), or if the latch
1971    edge is already nonempty.  */
1972
1973 static bool
1974 allow_ip_end_pos_p (struct loop *loop)
1975 {
1976   if (!ip_normal_pos (loop))
1977     return true;
1978
1979   if (!empty_block_p (ip_end_pos (loop)))
1980     return true;
1981
1982   return false;
1983 }
1984
1985 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1986    position to POS.  If USE is not NULL, the candidate is set as related to
1987    it.  The candidate computation is scheduled on all available positions.  */
1988
1989 static void
1990 add_candidate (struct ivopts_data *data,
1991                tree base, tree step, bool important, struct iv_use *use)
1992 {
1993   if (ip_normal_pos (data->current_loop))
1994     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1995   if (ip_end_pos (data->current_loop)
1996       && allow_ip_end_pos_p (data->current_loop))
1997     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1998 }
1999
2000 /* Add a standard "0 + 1 * iteration" iv candidate for a
2001    type with SIZE bits.  */
2002
2003 static void
2004 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2005                                      unsigned int size)
2006 {
2007   tree type = lang_hooks.types.type_for_size (size, true);
2008   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2009                  true, NULL);
2010 }
2011
2012 /* Adds standard iv candidates.  */
2013
2014 static void
2015 add_standard_iv_candidates (struct ivopts_data *data)
2016 {
2017   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2018
2019   /* The same for a double-integer type if it is still fast enough.  */
2020   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2021     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2022 }
2023
2024
2025 /* Adds candidates bases on the old induction variable IV.  */
2026
2027 static void
2028 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2029 {
2030   tree phi, def;
2031   struct iv_cand *cand;
2032
2033   add_candidate (data, iv->base, iv->step, true, NULL);
2034
2035   /* The same, but with initial value zero.  */
2036   add_candidate (data,
2037                  build_int_cst (TREE_TYPE (iv->base), 0),
2038                  iv->step, true, NULL);
2039
2040   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2041   if (TREE_CODE (phi) == PHI_NODE)
2042     {
2043       /* Additionally record the possibility of leaving the original iv
2044          untouched.  */
2045       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2046       cand = add_candidate_1 (data,
2047                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2048                               SSA_NAME_DEF_STMT (def));
2049       cand->var_before = iv->ssa_name;
2050       cand->var_after = def;
2051     }
2052 }
2053
2054 /* Adds candidates based on the old induction variables.  */
2055
2056 static void
2057 add_old_ivs_candidates (struct ivopts_data *data)
2058 {
2059   unsigned i;
2060   struct iv *iv;
2061   bitmap_iterator bi;
2062
2063   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2064     {
2065       iv = ver_info (data, i)->iv;
2066       if (iv && iv->biv_p && !zero_p (iv->step))
2067         add_old_iv_candidates (data, iv);
2068     }
2069 }
2070
2071 /* Adds candidates based on the value of the induction variable IV and USE.  */
2072
2073 static void
2074 add_iv_value_candidates (struct ivopts_data *data,
2075                          struct iv *iv, struct iv_use *use)
2076 {
2077   add_candidate (data, iv->base, iv->step, false, use);
2078
2079   /* The same, but with initial value zero.  */
2080   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2081                  iv->step, false, use);
2082 }
2083
2084 /* Adds candidates based on the address IV and USE.  */
2085
2086 static void
2087 add_address_candidates (struct ivopts_data *data,
2088                         struct iv *iv, struct iv_use *use)
2089 {
2090   tree base, abase;
2091   unsigned HOST_WIDE_INT offset;
2092
2093   /* First, the trivial choices.  */
2094   add_iv_value_candidates (data, iv, use);
2095
2096   /* Second, try removing the COMPONENT_REFs.  */
2097   if (TREE_CODE (iv->base) == ADDR_EXPR)
2098     {
2099       base = TREE_OPERAND (iv->base, 0);
2100       while (TREE_CODE (base) == COMPONENT_REF
2101              || (TREE_CODE (base) == ARRAY_REF
2102                  && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2103         base = TREE_OPERAND (base, 0);
2104
2105       if (base != TREE_OPERAND (iv->base, 0))
2106         {
2107           gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2108           gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2109
2110           if (TREE_CODE (base) == INDIRECT_REF)
2111             base = TREE_OPERAND (base, 0);
2112           else
2113             base = build_addr (base);
2114           add_candidate (data, base, iv->step, false, use);
2115         }
2116     }
2117
2118   /* Third, try removing the constant offset.  */
2119   abase = iv->base;
2120   base = strip_offset (abase, false, &offset);
2121   if (offset)
2122     add_candidate (data, base, iv->step, false, use);
2123 }
2124
2125 /* Possibly adds pseudocandidate for replacing the final value of USE by
2126    a direct computation.  */
2127
2128 static void
2129 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2130 {
2131   struct tree_niter_desc *niter;
2132
2133   /* We must know where we exit the loop and how many times does it roll.  */
2134   niter = niter_for_single_dom_exit (data);
2135   if (!niter
2136       || !zero_p (niter->may_be_zero))
2137     return;
2138
2139   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2140 }
2141
2142 /* Adds candidates based on the uses.  */
2143
2144 static void
2145 add_derived_ivs_candidates (struct ivopts_data *data)
2146 {
2147   unsigned i;
2148
2149   for (i = 0; i < n_iv_uses (data); i++)
2150     {
2151       struct iv_use *use = iv_use (data, i);
2152
2153       if (!use)
2154         continue;
2155
2156       switch (use->type)
2157         {
2158         case USE_NONLINEAR_EXPR:
2159         case USE_COMPARE:
2160           /* Just add the ivs based on the value of the iv used here.  */
2161           add_iv_value_candidates (data, use->iv, use);
2162           break;
2163
2164         case USE_OUTER:
2165           add_iv_value_candidates (data, use->iv, use);
2166
2167           /* Additionally, add the pseudocandidate for the possibility to
2168              replace the final value by a direct computation.  */
2169           add_iv_outer_candidates (data, use);
2170           break;
2171
2172         case USE_ADDRESS:
2173           add_address_candidates (data, use->iv, use);
2174           break;
2175
2176         default:
2177           gcc_unreachable ();
2178         }
2179     }
2180 }
2181
2182 /* Record important candidates and add them to related_cands bitmaps
2183    if needed.  */
2184
2185 static void
2186 record_important_candidates (struct ivopts_data *data)
2187 {
2188   unsigned i;
2189   struct iv_use *use;
2190
2191   for (i = 0; i < n_iv_cands (data); i++)
2192     {
2193       struct iv_cand *cand = iv_cand (data, i);
2194
2195       if (cand->important)
2196         bitmap_set_bit (data->important_candidates, i);
2197     }
2198
2199   data->consider_all_candidates = (n_iv_cands (data)
2200                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2201
2202   if (data->consider_all_candidates)
2203     {
2204       /* We will not need "related_cands" bitmaps in this case,
2205          so release them to decrease peak memory consumption.  */
2206       for (i = 0; i < n_iv_uses (data); i++)
2207         {
2208           use = iv_use (data, i);
2209           BITMAP_FREE (use->related_cands);
2210         }
2211     }
2212   else
2213     {
2214       /* Add important candidates to the related_cands bitmaps.  */
2215       for (i = 0; i < n_iv_uses (data); i++)
2216         bitmap_ior_into (iv_use (data, i)->related_cands,
2217                          data->important_candidates);
2218     }
2219 }
2220
2221 /* Finds the candidates for the induction variables.  */
2222
2223 static void
2224 find_iv_candidates (struct ivopts_data *data)
2225 {
2226   /* Add commonly used ivs.  */
2227   add_standard_iv_candidates (data);
2228
2229   /* Add old induction variables.  */
2230   add_old_ivs_candidates (data);
2231
2232   /* Add induction variables derived from uses.  */
2233   add_derived_ivs_candidates (data);
2234
2235   /* Record the important candidates.  */
2236   record_important_candidates (data);
2237 }
2238
2239 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2240    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2241    we allocate a simple list to every use.  */
2242
2243 static void
2244 alloc_use_cost_map (struct ivopts_data *data)
2245 {
2246   unsigned i, size, s, j;
2247
2248   for (i = 0; i < n_iv_uses (data); i++)
2249     {
2250       struct iv_use *use = iv_use (data, i);
2251       bitmap_iterator bi;
2252
2253       if (data->consider_all_candidates)
2254         size = n_iv_cands (data);
2255       else
2256         {
2257           s = 0;
2258           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2259             {
2260               s++;
2261             }
2262
2263           /* Round up to the power of two, so that moduling by it is fast.  */
2264           for (size = 1; size < s; size <<= 1)
2265             continue;
2266         }
2267
2268       use->n_map_members = size;
2269       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2270     }
2271 }
2272
2273 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2274    on invariants DEPENDS_ON.  */
2275
2276 static void
2277 set_use_iv_cost (struct ivopts_data *data,
2278                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2279                  bitmap depends_on)
2280 {
2281   unsigned i, s;
2282
2283   if (cost == INFTY)
2284     {
2285       BITMAP_FREE (depends_on);
2286       return;
2287     }
2288
2289   if (data->consider_all_candidates)
2290     {
2291       use->cost_map[cand->id].cand = cand;
2292       use->cost_map[cand->id].cost = cost;
2293       use->cost_map[cand->id].depends_on = depends_on;
2294       return;
2295     }
2296
2297   /* n_map_members is a power of two, so this computes modulo.  */
2298   s = cand->id & (use->n_map_members - 1);
2299   for (i = s; i < use->n_map_members; i++)
2300     if (!use->cost_map[i].cand)
2301       goto found;
2302   for (i = 0; i < s; i++)
2303     if (!use->cost_map[i].cand)
2304       goto found;
2305
2306   gcc_unreachable ();
2307
2308 found:
2309   use->cost_map[i].cand = cand;
2310   use->cost_map[i].cost = cost;
2311   use->cost_map[i].depends_on = depends_on;
2312 }
2313
2314 /* Gets cost of (USE, CANDIDATE) pair.  */
2315
2316 static struct cost_pair *
2317 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2318                  struct iv_cand *cand)
2319 {
2320   unsigned i, s;
2321   struct cost_pair *ret;
2322
2323   if (!cand)
2324     return NULL;
2325
2326   if (data->consider_all_candidates)
2327     {
2328       ret = use->cost_map + cand->id;
2329       if (!ret->cand)
2330         return NULL;
2331
2332       return ret;
2333     }
2334
2335   /* n_map_members is a power of two, so this computes modulo.  */
2336   s = cand->id & (use->n_map_members - 1);
2337   for (i = s; i < use->n_map_members; i++)
2338     if (use->cost_map[i].cand == cand)
2339       return use->cost_map + i;
2340
2341   for (i = 0; i < s; i++)
2342     if (use->cost_map[i].cand == cand)
2343       return use->cost_map + i;
2344
2345   return NULL;
2346 }
2347
2348 /* Returns estimate on cost of computing SEQ.  */
2349
2350 static unsigned
2351 seq_cost (rtx seq)
2352 {
2353   unsigned cost = 0;
2354   rtx set;
2355
2356   for (; seq; seq = NEXT_INSN (seq))
2357     {
2358       set = single_set (seq);
2359       if (set)
2360         cost += rtx_cost (set, SET);
2361       else
2362         cost++;
2363     }
2364
2365   return cost;
2366 }
2367
2368 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2369 static rtx
2370 produce_memory_decl_rtl (tree obj, int *regno)
2371 {
2372   rtx x;
2373
2374   gcc_assert (obj);
2375   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2376     {
2377       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2378       x = gen_rtx_SYMBOL_REF (Pmode, name);
2379     }
2380   else
2381     x = gen_raw_REG (Pmode, (*regno)++);
2382
2383   return gen_rtx_MEM (DECL_MODE (obj), x);
2384 }
2385
2386 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2387    walk_tree.  DATA contains the actual fake register number.  */
2388
2389 static tree
2390 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2391 {
2392   tree obj = NULL_TREE;
2393   rtx x = NULL_RTX;
2394   int *regno = data;
2395
2396   switch (TREE_CODE (*expr_p))
2397     {
2398     case ADDR_EXPR:
2399       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2400            handled_component_p (*expr_p);
2401            expr_p = &TREE_OPERAND (*expr_p, 0))
2402         continue;
2403       obj = *expr_p;
2404       if (DECL_P (obj))
2405         x = produce_memory_decl_rtl (obj, regno);
2406       break;
2407
2408     case SSA_NAME:
2409       *ws = 0;
2410       obj = SSA_NAME_VAR (*expr_p);
2411       if (!DECL_RTL_SET_P (obj))
2412         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2413       break;
2414
2415     case VAR_DECL:
2416     case PARM_DECL:
2417     case RESULT_DECL:
2418       *ws = 0;
2419       obj = *expr_p;
2420
2421       if (DECL_RTL_SET_P (obj))
2422         break;
2423
2424       if (DECL_MODE (obj) == BLKmode)
2425         x = produce_memory_decl_rtl (obj, regno);
2426       else
2427         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2428
2429       break;
2430
2431     default:
2432       break;
2433     }
2434
2435   if (x)
2436     {
2437       VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2438       SET_DECL_RTL (obj, x);
2439     }
2440
2441   return NULL_TREE;
2442 }
2443
2444 /* Determines cost of the computation of EXPR.  */
2445
2446 static unsigned
2447 computation_cost (tree expr)
2448 {
2449   rtx seq, rslt;
2450   tree type = TREE_TYPE (expr);
2451   unsigned cost;
2452   /* Avoid using hard regs in ways which may be unsupported.  */
2453   int regno = LAST_VIRTUAL_REGISTER + 1;
2454
2455   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2456   start_sequence ();
2457   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2458   seq = get_insns ();
2459   end_sequence ();
2460
2461   cost = seq_cost (seq);
2462   if (GET_CODE (rslt) == MEM)
2463     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2464
2465   return cost;
2466 }
2467
2468 /* Returns variable containing the value of candidate CAND at statement AT.  */
2469
2470 static tree
2471 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2472 {
2473   if (stmt_after_increment (loop, cand, stmt))
2474     return cand->var_after;
2475   else
2476     return cand->var_before;
2477 }
2478
2479 /* Determines the expression by that USE is expressed from induction variable
2480    CAND at statement AT in LOOP.  */
2481
2482 static tree
2483 get_computation_at (struct loop *loop,
2484                     struct iv_use *use, struct iv_cand *cand, tree at)
2485 {
2486   tree ubase = use->iv->base;
2487   tree ustep = use->iv->step;
2488   tree cbase = cand->iv->base;
2489   tree cstep = cand->iv->step;
2490   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2491   tree uutype;
2492   tree expr, delta;
2493   tree ratio;
2494   unsigned HOST_WIDE_INT ustepi, cstepi;
2495   HOST_WIDE_INT ratioi;
2496
2497   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2498     {
2499       /* We do not have a precision to express the values of use.  */
2500       return NULL_TREE;
2501     }
2502
2503   expr = var_at_stmt (loop, cand, at);
2504
2505   if (TREE_TYPE (expr) != ctype)
2506     {
2507       /* This may happen with the original ivs.  */
2508       expr = fold_convert (ctype, expr);
2509     }
2510
2511   if (TYPE_UNSIGNED (utype))
2512     uutype = utype;
2513   else
2514     {
2515       uutype = unsigned_type_for (utype);
2516       ubase = fold_convert (uutype, ubase);
2517       ustep = fold_convert (uutype, ustep);
2518     }
2519
2520   if (uutype != ctype)
2521     {
2522       expr = fold_convert (uutype, expr);
2523       cbase = fold_convert (uutype, cbase);
2524       cstep = fold_convert (uutype, cstep);
2525     }
2526
2527   if (!cst_and_fits_in_hwi (cstep)
2528       || !cst_and_fits_in_hwi (ustep))
2529     return NULL_TREE;
2530
2531   ustepi = int_cst_value (ustep);
2532   cstepi = int_cst_value (cstep);
2533
2534   if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2535     {
2536       /* TODO maybe consider case when ustep divides cstep and the ratio is
2537          a power of 2 (so that the division is fast to execute)?  We would
2538          need to be much more careful with overflows etc. then.  */
2539       return NULL_TREE;
2540     }
2541
2542   /* We may need to shift the value if we are after the increment.  */
2543   if (stmt_after_increment (loop, cand, at))
2544     cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2545
2546   /* use = ubase - ratio * cbase + ratio * var.
2547
2548      In general case ubase + ratio * (var - cbase) could be better (one less
2549      multiplication), but often it is possible to eliminate redundant parts
2550      of computations from (ubase - ratio * cbase) term, and if it does not
2551      happen, fold is able to apply the distributive law to obtain this form
2552      anyway.  */
2553
2554   if (ratioi == 1)
2555     {
2556       delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2557       expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2558     }
2559   else if (ratioi == -1)
2560     {
2561       delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2562       expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2563     }
2564   else
2565     {
2566       ratio = build_int_cst_type (uutype, ratioi);
2567       delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2568       delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2569       expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2570       expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2571     }
2572
2573   return fold_convert (utype, expr);
2574 }
2575
2576 /* Determines the expression by that USE is expressed from induction variable
2577    CAND in LOOP.  */
2578
2579 static tree
2580 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2581 {
2582   return get_computation_at (loop, use, cand, use->stmt);
2583 }
2584
2585 /* Returns cost of addition in MODE.  */
2586
2587 static unsigned
2588 add_cost (enum machine_mode mode)
2589 {
2590   static unsigned costs[NUM_MACHINE_MODES];
2591   rtx seq;
2592   unsigned cost;
2593
2594   if (costs[mode])
2595     return costs[mode];
2596
2597   start_sequence ();
2598   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2599                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2600                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2601                  NULL_RTX);
2602   seq = get_insns ();
2603   end_sequence ();
2604
2605   cost = seq_cost (seq);
2606   if (!cost)
2607     cost = 1;
2608
2609   costs[mode] = cost;
2610
2611   if (dump_file && (dump_flags & TDF_DETAILS))
2612     fprintf (dump_file, "Addition in %s costs %d\n",
2613              GET_MODE_NAME (mode), cost);
2614   return cost;
2615 }
2616
2617 /* Entry in a hashtable of already known costs for multiplication.  */
2618 struct mbc_entry
2619 {
2620   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2621   enum machine_mode mode;       /* In mode.  */
2622   unsigned cost;                /* The cost.  */
2623 };
2624
2625 /* Counts hash value for the ENTRY.  */
2626
2627 static hashval_t
2628 mbc_entry_hash (const void *entry)
2629 {
2630   const struct mbc_entry *e = entry;
2631
2632   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2633 }
2634
2635 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2636
2637 static int
2638 mbc_entry_eq (const void *entry1, const void *entry2)
2639 {
2640   const struct mbc_entry *e1 = entry1;
2641   const struct mbc_entry *e2 = entry2;
2642
2643   return (e1->mode == e2->mode
2644           && e1->cst == e2->cst);
2645 }
2646
2647 /* Returns cost of multiplication by constant CST in MODE.  */
2648
2649 static unsigned
2650 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2651 {
2652   static htab_t costs;
2653   struct mbc_entry **cached, act;
2654   rtx seq;
2655   unsigned cost;
2656
2657   if (!costs)
2658     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2659
2660   act.mode = mode;
2661   act.cst = cst;
2662   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2663   if (*cached)
2664     return (*cached)->cost;
2665
2666   *cached = xmalloc (sizeof (struct mbc_entry));
2667   (*cached)->mode = mode;
2668   (*cached)->cst = cst;
2669
2670   start_sequence ();
2671   expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2672                NULL_RTX, 0);
2673   seq = get_insns ();
2674   end_sequence ();
2675
2676   cost = seq_cost (seq);
2677
2678   if (dump_file && (dump_flags & TDF_DETAILS))
2679     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2680              (int) cst, GET_MODE_NAME (mode), cost);
2681
2682   (*cached)->cost = cost;
2683
2684   return cost;
2685 }
2686
2687 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2688    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2689    variable is omitted.  The created memory accesses MODE.
2690
2691    TODO -- there must be some better way.  This all is quite crude.  */
2692
2693 static unsigned
2694 get_address_cost (bool symbol_present, bool var_present,
2695                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2696 {
2697 #define MAX_RATIO 128
2698   static sbitmap valid_mult;
2699   static HOST_WIDE_INT rat, off;
2700   static HOST_WIDE_INT min_offset, max_offset;
2701   static unsigned costs[2][2][2][2];
2702   unsigned cost, acost;
2703   rtx seq, addr, base;
2704   bool offset_p, ratio_p;
2705   rtx reg1;
2706   HOST_WIDE_INT s_offset;
2707   unsigned HOST_WIDE_INT mask;
2708   unsigned bits;
2709
2710   if (!valid_mult)
2711     {
2712       HOST_WIDE_INT i;
2713
2714       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2715
2716       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2717       for (i = 1; i <= 1 << 20; i <<= 1)
2718         {
2719           XEXP (addr, 1) = GEN_INT (i);
2720           if (!memory_address_p (Pmode, addr))
2721             break;
2722         }
2723       max_offset = i >> 1;
2724       off = max_offset;
2725
2726       for (i = 1; i <= 1 << 20; i <<= 1)
2727         {
2728           XEXP (addr, 1) = GEN_INT (-i);
2729           if (!memory_address_p (Pmode, addr))
2730             break;
2731         }
2732       min_offset = -(i >> 1);
2733
2734       if (dump_file && (dump_flags & TDF_DETAILS))
2735         {
2736           fprintf (dump_file, "get_address_cost:\n");
2737           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
2738           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
2739         }
2740
2741       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2742       sbitmap_zero (valid_mult);
2743       rat = 1;
2744       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2745       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2746         {
2747           XEXP (addr, 1) = GEN_INT (i);
2748           if (memory_address_p (Pmode, addr))
2749             {
2750               SET_BIT (valid_mult, i + MAX_RATIO);
2751               rat = i;
2752             }
2753         }
2754
2755       if (dump_file && (dump_flags & TDF_DETAILS))
2756         {
2757           fprintf (dump_file, "  allowed multipliers:");
2758           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2759             if (TEST_BIT (valid_mult, i + MAX_RATIO))
2760               fprintf (dump_file, " %d", (int) i);
2761           fprintf (dump_file, "\n");
2762           fprintf (dump_file, "\n");
2763         }
2764     }
2765
2766   bits = GET_MODE_BITSIZE (Pmode);
2767   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2768   offset &= mask;
2769   if ((offset >> (bits - 1) & 1))
2770     offset |= ~mask;
2771   s_offset = offset;
2772
2773   cost = 0;
2774   offset_p = (s_offset != 0
2775               && min_offset <= s_offset && s_offset <= max_offset);
2776   ratio_p = (ratio != 1
2777              && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2778              && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2779
2780   if (ratio != 1 && !ratio_p)
2781     cost += multiply_by_cost (ratio, Pmode);
2782
2783   if (s_offset && !offset_p && !symbol_present)
2784     {
2785       cost += add_cost (Pmode);
2786       var_present = true;
2787     }
2788
2789   acost = costs[symbol_present][var_present][offset_p][ratio_p];
2790   if (!acost)
2791     {
2792       acost = 0;
2793
2794       addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2795       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2796       if (ratio_p)
2797         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2798
2799       if (var_present)
2800         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2801
2802       if (symbol_present)
2803         {
2804           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2805           if (offset_p)
2806             base = gen_rtx_fmt_e (CONST, Pmode,
2807                                   gen_rtx_fmt_ee (PLUS, Pmode,
2808                                                   base,
2809                                                   GEN_INT (off)));
2810         }
2811       else if (offset_p)
2812         base = GEN_INT (off);
2813       else
2814         base = NULL_RTX;
2815
2816       if (base)
2817         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2818
2819       start_sequence ();
2820       addr = memory_address (Pmode, addr);
2821       seq = get_insns ();
2822       end_sequence ();
2823
2824       acost = seq_cost (seq);
2825       acost += address_cost (addr, Pmode);
2826
2827       if (!acost)
2828         acost = 1;
2829       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2830     }
2831
2832   return cost + acost;
2833 }
2834
2835 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2836    the bitmap to that we should store it.  */
2837
2838 static struct ivopts_data *fd_ivopts_data;
2839 static tree
2840 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2841 {
2842   bitmap *depends_on = data;
2843   struct version_info *info;
2844
2845   if (TREE_CODE (*expr_p) != SSA_NAME)
2846     return NULL_TREE;
2847   info = name_info (fd_ivopts_data, *expr_p);
2848
2849   if (!info->inv_id || info->has_nonlin_use)
2850     return NULL_TREE;
2851
2852   if (!*depends_on)
2853     *depends_on = BITMAP_ALLOC (NULL);
2854   bitmap_set_bit (*depends_on, info->inv_id);
2855
2856   return NULL_TREE;
2857 }
2858
2859 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
2860    invariants the computation depends on.  */
2861
2862 static unsigned
2863 force_var_cost (struct ivopts_data *data,
2864                 tree expr, bitmap *depends_on)
2865 {
2866   static bool costs_initialized = false;
2867   static unsigned integer_cost;
2868   static unsigned symbol_cost;
2869   static unsigned address_cost;
2870   tree op0, op1;
2871   unsigned cost0, cost1, cost;
2872   enum machine_mode mode;
2873
2874   if (!costs_initialized)
2875     {
2876       tree var = create_tmp_var_raw (integer_type_node, "test_var");
2877       rtx x = gen_rtx_MEM (DECL_MODE (var),
2878                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2879       tree addr;
2880       tree type = build_pointer_type (integer_type_node);
2881
2882       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2883                                                            2000));
2884
2885       SET_DECL_RTL (var, x);
2886       TREE_STATIC (var) = 1;
2887       addr = build1 (ADDR_EXPR, type, var);
2888       symbol_cost = computation_cost (addr) + 1;
2889
2890       address_cost
2891         = computation_cost (build2 (PLUS_EXPR, type,
2892                                     addr,
2893                                     build_int_cst_type (type, 2000))) + 1;
2894       if (dump_file && (dump_flags & TDF_DETAILS))
2895         {
2896           fprintf (dump_file, "force_var_cost:\n");
2897           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
2898           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
2899           fprintf (dump_file, "  address %d\n", (int) address_cost);
2900           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
2901           fprintf (dump_file, "\n");
2902         }
2903
2904       costs_initialized = true;
2905     }
2906
2907   STRIP_NOPS (expr);
2908
2909   if (depends_on)
2910     {
2911       fd_ivopts_data = data;
2912       walk_tree (&expr, find_depends, depends_on, NULL);
2913     }
2914
2915   if (SSA_VAR_P (expr))
2916     return 0;
2917
2918   if (TREE_INVARIANT (expr))
2919     {
2920       if (TREE_CODE (expr) == INTEGER_CST)
2921         return integer_cost;
2922
2923       if (TREE_CODE (expr) == ADDR_EXPR)
2924         {
2925           tree obj = TREE_OPERAND (expr, 0);
2926
2927           if (TREE_CODE (obj) == VAR_DECL
2928               || TREE_CODE (obj) == PARM_DECL
2929               || TREE_CODE (obj) == RESULT_DECL)
2930             return symbol_cost;
2931         }
2932
2933       return address_cost;
2934     }
2935
2936   switch (TREE_CODE (expr))
2937     {
2938     case PLUS_EXPR:
2939     case MINUS_EXPR:
2940     case MULT_EXPR:
2941       op0 = TREE_OPERAND (expr, 0);
2942       op1 = TREE_OPERAND (expr, 1);
2943       STRIP_NOPS (op0);
2944       STRIP_NOPS (op1);
2945
2946       if (is_gimple_val (op0))
2947         cost0 = 0;
2948       else
2949         cost0 = force_var_cost (data, op0, NULL);
2950
2951       if (is_gimple_val (op1))
2952         cost1 = 0;
2953       else
2954         cost1 = force_var_cost (data, op1, NULL);
2955
2956       break;
2957
2958     default:
2959       /* Just an arbitrary value, FIXME.  */
2960       return target_spill_cost;
2961     }
2962
2963   mode = TYPE_MODE (TREE_TYPE (expr));
2964   switch (TREE_CODE (expr))
2965     {
2966     case PLUS_EXPR:
2967     case MINUS_EXPR:
2968       cost = add_cost (mode);
2969       break;
2970
2971     case MULT_EXPR:
2972       if (cst_and_fits_in_hwi (op0))
2973         cost = multiply_by_cost (int_cst_value (op0), mode);
2974       else if (cst_and_fits_in_hwi (op1))
2975         cost = multiply_by_cost (int_cst_value (op1), mode);
2976       else
2977         return target_spill_cost;
2978       break;
2979
2980     default:
2981       gcc_unreachable ();
2982     }
2983
2984   cost += cost0;
2985   cost += cost1;
2986
2987   /* Bound the cost by target_spill_cost.  The parts of complicated
2988      computations often are either loop invariant or at least can
2989      be shared between several iv uses, so letting this grow without
2990      limits would not give reasonable results.  */
2991   return cost < target_spill_cost ? cost : target_spill_cost;
2992 }
2993
2994 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
2995    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2996    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
2997    invariants the computation depends on.  */
2998
2999 static unsigned
3000 split_address_cost (struct ivopts_data *data,
3001                     tree addr, bool *symbol_present, bool *var_present,
3002                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3003 {
3004   tree core;
3005   HOST_WIDE_INT bitsize;
3006   HOST_WIDE_INT bitpos;
3007   tree toffset;
3008   enum machine_mode mode;
3009   int unsignedp, volatilep;
3010
3011   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3012                               &unsignedp, &volatilep, false);
3013
3014   if (toffset != 0
3015       || bitpos % BITS_PER_UNIT != 0
3016       || TREE_CODE (core) != VAR_DECL)
3017     {
3018       *symbol_present = false;
3019       *var_present = true;
3020       fd_ivopts_data = data;
3021       walk_tree (&addr, find_depends, depends_on, NULL);
3022       return target_spill_cost;
3023     }
3024
3025   *offset += bitpos / BITS_PER_UNIT;
3026   if (TREE_STATIC (core)
3027       || DECL_EXTERNAL (core))
3028     {
3029       *symbol_present = true;
3030       *var_present = false;
3031       return 0;
3032     }
3033
3034   *symbol_present = false;
3035   *var_present = true;
3036   return 0;
3037 }
3038
3039 /* Estimates cost of expressing difference of addresses E1 - E2 as
3040    var + symbol + offset.  The value of offset is added to OFFSET,
3041    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3042    part is missing.  DEPENDS_ON is a set of the invariants the computation
3043    depends on.  */
3044
3045 static unsigned
3046 ptr_difference_cost (struct ivopts_data *data,
3047                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3048                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3049 {
3050   HOST_WIDE_INT diff = 0;
3051   unsigned cost;
3052
3053   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3054
3055   if (ptr_difference_const (e1, e2, &diff))
3056     {
3057       *offset += diff;
3058       *symbol_present = false;
3059       *var_present = false;
3060       return 0;
3061     }
3062
3063   if (e2 == integer_zero_node)
3064     return split_address_cost (data, TREE_OPERAND (e1, 0),
3065                                symbol_present, var_present, offset, depends_on);
3066
3067   *symbol_present = false;
3068   *var_present = true;
3069
3070   cost = force_var_cost (data, e1, depends_on);
3071   cost += force_var_cost (data, e2, depends_on);
3072   cost += add_cost (Pmode);
3073
3074   return cost;
3075 }
3076
3077 /* Estimates cost of expressing difference E1 - E2 as
3078    var + symbol + offset.  The value of offset is added to OFFSET,
3079    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3080    part is missing.  DEPENDS_ON is a set of the invariants the computation
3081    depends on.  */
3082
3083 static unsigned
3084 difference_cost (struct ivopts_data *data,
3085                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3086                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3087 {
3088   unsigned cost;
3089   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3090   unsigned HOST_WIDE_INT off1, off2;
3091
3092   e1 = strip_offset (e1, false, &off1);
3093   e2 = strip_offset (e2, false, &off2);
3094   *offset += off1 - off2;
3095
3096   STRIP_NOPS (e1);
3097   STRIP_NOPS (e2);
3098
3099   if (TREE_CODE (e1) == ADDR_EXPR)
3100     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3101                                 depends_on);
3102   *symbol_present = false;
3103
3104   if (operand_equal_p (e1, e2, 0))
3105     {
3106       *var_present = false;
3107       return 0;
3108     }
3109   *var_present = true;
3110   if (zero_p (e2))
3111     return force_var_cost (data, e1, depends_on);
3112
3113   if (zero_p (e1))
3114     {
3115       cost = force_var_cost (data, e2, depends_on);
3116       cost += multiply_by_cost (-1, mode);
3117
3118       return cost;
3119     }
3120
3121   cost = force_var_cost (data, e1, depends_on);
3122   cost += force_var_cost (data, e2, depends_on);
3123   cost += add_cost (mode);
3124
3125   return cost;
3126 }
3127
3128 /* Determines the cost of the computation by that USE is expressed
3129    from induction variable CAND.  If ADDRESS_P is true, we just need
3130    to create an address from it, otherwise we want to get it into
3131    register.  A set of invariants we depend on is stored in
3132    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3133
3134 static unsigned
3135 get_computation_cost_at (struct ivopts_data *data,
3136                          struct iv_use *use, struct iv_cand *cand,
3137                          bool address_p, bitmap *depends_on, tree at)
3138 {
3139   tree ubase = use->iv->base, ustep = use->iv->step;
3140   tree cbase, cstep;
3141   tree utype = TREE_TYPE (ubase), ctype;
3142   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3143   HOST_WIDE_INT ratio, aratio;
3144   bool var_present, symbol_present;
3145   unsigned cost = 0, n_sums;
3146
3147   *depends_on = NULL;
3148
3149   /* Only consider real candidates.  */
3150   if (!cand->iv)
3151     return INFTY;
3152
3153   cbase = cand->iv->base;
3154   cstep = cand->iv->step;
3155   ctype = TREE_TYPE (cbase);
3156
3157   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3158     {
3159       /* We do not have a precision to express the values of use.  */
3160       return INFTY;
3161     }
3162
3163   if (address_p)
3164     {
3165       /* Do not try to express address of an object with computation based
3166          on address of a different object.  This may cause problems in rtl
3167          level alias analysis (that does not expect this to be happening,
3168          as this is illegal in C), and would be unlikely to be useful
3169          anyway.  */
3170       if (use->iv->base_object
3171           && cand->iv->base_object
3172           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3173         return INFTY;
3174     }
3175
3176   if (!cst_and_fits_in_hwi (ustep)
3177       || !cst_and_fits_in_hwi (cstep))
3178     return INFTY;
3179
3180   if (TREE_CODE (ubase) == INTEGER_CST
3181       && !cst_and_fits_in_hwi (ubase))
3182     goto fallback;
3183
3184   if (TREE_CODE (cbase) == INTEGER_CST
3185       && !cst_and_fits_in_hwi (cbase))
3186     goto fallback;
3187
3188   ustepi = int_cst_value (ustep);
3189   cstepi = int_cst_value (cstep);
3190
3191   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3192     {
3193       /* TODO -- add direct handling of this case.  */
3194       goto fallback;
3195     }
3196
3197   if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3198     return INFTY;
3199
3200   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3201      or ratio == 1, it is better to handle this like
3202
3203      ubase - ratio * cbase + ratio * var
3204
3205      (also holds in the case ratio == -1, TODO.  */
3206
3207   if (TREE_CODE (cbase) == INTEGER_CST)
3208     {
3209       offset = - ratio * int_cst_value (cbase);
3210       cost += difference_cost (data,
3211                                ubase, integer_zero_node,
3212                                &symbol_present, &var_present, &offset,
3213                                depends_on);
3214     }
3215   else if (ratio == 1)
3216     {
3217       cost += difference_cost (data,
3218                                ubase, cbase,
3219                                &symbol_present, &var_present, &offset,
3220                                depends_on);
3221     }
3222   else
3223     {
3224       cost += force_var_cost (data, cbase, depends_on);
3225       cost += add_cost (TYPE_MODE (ctype));
3226       cost += difference_cost (data,
3227                                ubase, integer_zero_node,
3228                                &symbol_present, &var_present, &offset,
3229                                depends_on);
3230     }
3231
3232   /* If we are after the increment, the value of the candidate is higher by
3233      one iteration.  */
3234   if (stmt_after_increment (data->current_loop, cand, at))
3235     offset -= ratio * cstepi;
3236
3237   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3238      (symbol/var/const parts may be omitted).  If we are looking for an address,
3239      find the cost of addressing this.  */
3240   if (address_p)
3241     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3242
3243   /* Otherwise estimate the costs for computing the expression.  */
3244   aratio = ratio > 0 ? ratio : -ratio;
3245   if (!symbol_present && !var_present && !offset)
3246     {
3247       if (ratio != 1)
3248         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3249
3250       return cost;
3251     }
3252
3253   if (aratio != 1)
3254     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3255
3256   n_sums = 1;
3257   if (var_present
3258       /* Symbol + offset should be compile-time computable.  */
3259       && (symbol_present || offset))
3260     n_sums++;
3261
3262   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3263
3264 fallback:
3265   {
3266     /* Just get the expression, expand it and measure the cost.  */
3267     tree comp = get_computation_at (data->current_loop, use, cand, at);
3268
3269     if (!comp)
3270       return INFTY;
3271
3272     if (address_p)
3273       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3274
3275     return computation_cost (comp);
3276   }
3277 }
3278
3279 /* Determines the cost of the computation by that USE is expressed
3280    from induction variable CAND.  If ADDRESS_P is true, we just need
3281    to create an address from it, otherwise we want to get it into
3282    register.  A set of invariants we depend on is stored in
3283    DEPENDS_ON.  */
3284
3285 static unsigned
3286 get_computation_cost (struct ivopts_data *data,
3287                       struct iv_use *use, struct iv_cand *cand,
3288                       bool address_p, bitmap *depends_on)
3289 {
3290   return get_computation_cost_at (data,
3291                                   use, cand, address_p, depends_on, use->stmt);
3292 }
3293
3294 /* Determines cost of basing replacement of USE on CAND in a generic
3295    expression.  */
3296
3297 static bool
3298 determine_use_iv_cost_generic (struct ivopts_data *data,
3299                                struct iv_use *use, struct iv_cand *cand)
3300 {
3301   bitmap depends_on;
3302   unsigned cost;
3303
3304   /* The simple case first -- if we need to express value of the preserved
3305      original biv, the cost is 0.  This also prevents us from counting the
3306      cost of increment twice -- once at this use and once in the cost of
3307      the candidate.  */
3308   if (cand->pos == IP_ORIGINAL
3309       && cand->incremented_at == use->stmt)
3310     {
3311       set_use_iv_cost (data, use, cand, 0, NULL);
3312       return true;
3313     }
3314
3315   cost = get_computation_cost (data, use, cand, false, &depends_on);
3316   set_use_iv_cost (data, use, cand, cost, depends_on);
3317
3318   return cost != INFTY;
3319 }
3320
3321 /* Determines cost of basing replacement of USE on CAND in an address.  */
3322
3323 static bool
3324 determine_use_iv_cost_address (struct ivopts_data *data,
3325                                struct iv_use *use, struct iv_cand *cand)
3326 {
3327   bitmap depends_on;
3328   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3329
3330   set_use_iv_cost (data, use, cand, cost, depends_on);
3331
3332   return cost != INFTY;
3333 }
3334
3335 /* Computes value of induction variable IV in iteration NITER.  */
3336
3337 static tree
3338 iv_value (struct iv *iv, tree niter)
3339 {
3340   tree val;
3341   tree type = TREE_TYPE (iv->base);
3342
3343   niter = fold_convert (type, niter);
3344   val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3345
3346   return fold (build2 (PLUS_EXPR, type, iv->base, val));
3347 }
3348
3349 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3350
3351 static tree
3352 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3353 {
3354   tree val = iv_value (cand->iv, niter);
3355   tree type = TREE_TYPE (cand->iv->base);
3356
3357   if (stmt_after_increment (loop, cand, at))
3358     val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3359
3360   return val;
3361 }
3362
3363 /* Returns period of induction variable iv.  */
3364
3365 static tree
3366 iv_period (struct iv *iv)
3367 {
3368   tree step = iv->step, period, type;
3369   tree pow2div;
3370
3371   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3372
3373   /* Period of the iv is gcd (step, type range).  Since type range is power
3374      of two, it suffices to determine the maximum power of two that divides
3375      step.  */
3376   pow2div = num_ending_zeros (step);
3377   type = unsigned_type_for (TREE_TYPE (step));
3378
3379   period = build_low_bits_mask (type,
3380                                 (TYPE_PRECISION (type)
3381                                  - tree_low_cst (pow2div, 1)));
3382
3383   return period;
3384 }
3385
3386 /* Check whether it is possible to express the condition in USE by comparison
3387    of candidate CAND.  If so, store the comparison code to COMPARE and the
3388    value compared with to BOUND.  */
3389
3390 static bool
3391 may_eliminate_iv (struct ivopts_data *data,
3392                   struct iv_use *use, struct iv_cand *cand,
3393                   enum tree_code *compare, tree *bound)
3394 {
3395   basic_block ex_bb;
3396   edge exit;
3397   struct tree_niter_desc *niter;
3398   tree nit, nit_type;
3399   tree wider_type, period, per_type;
3400   struct loop *loop = data->current_loop;
3401
3402   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3403      for other conditions inside loop body.  */
3404   ex_bb = bb_for_stmt (use->stmt);
3405   if (use->stmt != last_stmt (ex_bb)
3406       || TREE_CODE (use->stmt) != COND_EXPR)
3407     return false;
3408   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3409     return false;
3410
3411   exit = EDGE_SUCC (ex_bb, 0);
3412   if (flow_bb_inside_loop_p (loop, exit->dest))
3413     exit = EDGE_SUCC (ex_bb, 1);
3414   if (flow_bb_inside_loop_p (loop, exit->dest))
3415     return false;
3416
3417   niter = niter_for_exit (data, exit);
3418   if (!niter
3419       || !zero_p (niter->may_be_zero))
3420     return false;
3421
3422   nit = niter->niter;
3423   nit_type = TREE_TYPE (nit);
3424
3425   /* Determine whether we may use the variable to test whether niter iterations
3426      elapsed.  This is the case iff the period of the induction variable is
3427      greater than the number of iterations.  */
3428   period = iv_period (cand->iv);
3429   if (!period)
3430     return false;
3431   per_type = TREE_TYPE (period);
3432
3433   wider_type = TREE_TYPE (period);
3434   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3435     wider_type = per_type;
3436   else
3437     wider_type = nit_type;
3438
3439   if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3440                                        fold_convert (wider_type, period),
3441                                        fold_convert (wider_type, nit)))))
3442     return false;
3443
3444   if (exit->flags & EDGE_TRUE_VALUE)
3445     *compare = EQ_EXPR;
3446   else
3447     *compare = NE_EXPR;
3448
3449   *bound = cand_value_at (loop, cand, use->stmt, nit);
3450   return true;
3451 }
3452
3453 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3454
3455 static bool
3456 determine_use_iv_cost_condition (struct ivopts_data *data,
3457                                  struct iv_use *use, struct iv_cand *cand)
3458 {
3459   tree bound;
3460   enum tree_code compare;
3461
3462   /* Only consider real candidates.  */
3463   if (!cand->iv)
3464     {
3465       set_use_iv_cost (data, use, cand, INFTY, NULL);
3466       return false;
3467     }
3468
3469   if (may_eliminate_iv (data, use, cand, &compare, &bound))
3470     {
3471       bitmap depends_on = NULL;
3472       unsigned cost = force_var_cost (data, bound, &depends_on);
3473
3474       set_use_iv_cost (data, use, cand, cost, depends_on);
3475       return cost != INFTY;
3476     }
3477
3478   /* The induction variable elimination failed; just express the original
3479      giv.  If it is compared with an invariant, note that we cannot get
3480      rid of it.  */
3481   if (TREE_CODE (*use->op_p) == SSA_NAME)
3482     record_invariant (data, *use->op_p, true);
3483   else
3484     {
3485       record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3486       record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3487     }
3488
3489   return determine_use_iv_cost_generic (data, use, cand);
3490 }
3491
3492 /* Checks whether it is possible to replace the final value of USE by
3493    a direct computation.  If so, the formula is stored to *VALUE.  */
3494
3495 static bool
3496 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3497                          tree *value)
3498 {
3499   struct loop *loop = data->current_loop;
3500   edge exit;
3501   struct tree_niter_desc *niter;
3502
3503   exit = single_dom_exit (loop);
3504   if (!exit)
3505     return false;
3506
3507   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3508                               bb_for_stmt (use->stmt)));
3509
3510   niter = niter_for_single_dom_exit (data);
3511   if (!niter
3512       || !zero_p (niter->may_be_zero))
3513     return false;
3514
3515   *value = iv_value (use->iv, niter->niter);
3516
3517   return true;
3518 }
3519
3520 /* Determines cost of replacing final value of USE using CAND.  */
3521
3522 static bool
3523 determine_use_iv_cost_outer (struct ivopts_data *data,
3524                              struct iv_use *use, struct iv_cand *cand)
3525 {
3526   bitmap depends_on;
3527   unsigned cost;
3528   edge exit;
3529   tree value;
3530   struct loop *loop = data->current_loop;
3531
3532   /* The simple case first -- if we need to express value of the preserved
3533      original biv, the cost is 0.  This also prevents us from counting the
3534      cost of increment twice -- once at this use and once in the cost of
3535      the candidate.  */
3536   if (cand->pos == IP_ORIGINAL
3537       && cand->incremented_at == use->stmt)
3538     {
3539       set_use_iv_cost (data, use, cand, 0, NULL);
3540       return true;
3541     }
3542
3543   if (!cand->iv)
3544     {
3545       if (!may_replace_final_value (data, use, &value))
3546         {
3547           set_use_iv_cost (data, use, cand, INFTY, NULL);
3548           return false;
3549         }
3550
3551       depends_on = NULL;
3552       cost = force_var_cost (data, value, &depends_on);
3553
3554       cost /= AVG_LOOP_NITER (loop);
3555
3556       set_use_iv_cost (data, use, cand, cost, depends_on);
3557       return cost != INFTY;
3558     }
3559
3560   exit = single_dom_exit (loop);
3561   if (exit)
3562     {
3563       /* If there is just a single exit, we may use value of the candidate
3564          after we take it to determine the value of use.  */
3565       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3566                                       last_stmt (exit->src));
3567       if (cost != INFTY)
3568         cost /= AVG_LOOP_NITER (loop);
3569     }
3570   else
3571     {
3572       /* Otherwise we just need to compute the iv.  */
3573       cost = get_computation_cost (data, use, cand, false, &depends_on);
3574     }
3575
3576   set_use_iv_cost (data, use, cand, cost, depends_on);
3577
3578   return cost != INFTY;
3579 }
3580
3581 /* Determines cost of basing replacement of USE on CAND.  Returns false
3582    if USE cannot be based on CAND.  */
3583
3584 static bool
3585 determine_use_iv_cost (struct ivopts_data *data,
3586                        struct iv_use *use, struct iv_cand *cand)
3587 {
3588   switch (use->type)
3589     {
3590     case USE_NONLINEAR_EXPR:
3591       return determine_use_iv_cost_generic (data, use, cand);
3592
3593     case USE_OUTER:
3594       return determine_use_iv_cost_outer (data, use, cand);
3595
3596     case USE_ADDRESS:
3597       return determine_use_iv_cost_address (data, use, cand);
3598
3599     case USE_COMPARE:
3600       return determine_use_iv_cost_condition (data, use, cand);
3601
3602     default:
3603       gcc_unreachable ();
3604     }
3605 }
3606
3607 /* Determines costs of basing the use of the iv on an iv candidate.  */
3608
3609 static void
3610 determine_use_iv_costs (struct ivopts_data *data)
3611 {
3612   unsigned i, j;
3613   struct iv_use *use;
3614   struct iv_cand *cand;
3615   bitmap to_clear = BITMAP_ALLOC (NULL);
3616
3617   alloc_use_cost_map (data);
3618
3619   for (i = 0; i < n_iv_uses (data); i++)
3620     {
3621       use = iv_use (data, i);
3622
3623       if (data->consider_all_candidates)
3624         {
3625           for (j = 0; j < n_iv_cands (data); j++)
3626             {
3627               cand = iv_cand (data, j);
3628               determine_use_iv_cost (data, use, cand);
3629             }
3630         }
3631       else
3632         {
3633           bitmap_iterator bi;
3634
3635           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3636             {
3637               cand = iv_cand (data, j);
3638               if (!determine_use_iv_cost (data, use, cand))
3639                 bitmap_set_bit (to_clear, j);
3640             }
3641
3642           /* Remove the candidates for that the cost is infinite from
3643              the list of related candidates.  */
3644           bitmap_and_compl_into (use->related_cands, to_clear);
3645           bitmap_clear (to_clear);
3646         }
3647     }
3648
3649   BITMAP_FREE (to_clear);
3650
3651   if (dump_file && (dump_flags & TDF_DETAILS))
3652     {
3653       fprintf (dump_file, "Use-candidate costs:\n");
3654
3655       for (i = 0; i < n_iv_uses (data); i++)
3656         {
3657           use = iv_use (data, i);
3658
3659           fprintf (dump_file, "Use %d:\n", i);
3660           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3661           for (j = 0; j < use->n_map_members; j++)
3662             {
3663               if (!use->cost_map[j].cand
3664                   || use->cost_map[j].cost == INFTY)
3665                 continue;
3666
3667               fprintf (dump_file, "  %d\t%d\t",
3668                        use->cost_map[j].cand->id,
3669                        use->cost_map[j].cost);
3670               if (use->cost_map[j].depends_on)
3671                 bitmap_print (dump_file,
3672                               use->cost_map[j].depends_on, "","");
3673               fprintf (dump_file, "\n");
3674             }
3675
3676           fprintf (dump_file, "\n");
3677         }
3678       fprintf (dump_file, "\n");
3679     }
3680 }
3681
3682 /* Determines cost of the candidate CAND.  */
3683
3684 static void
3685 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3686 {
3687   unsigned cost_base, cost_step;
3688   tree base;
3689
3690   if (!cand->iv)
3691     {
3692       cand->cost = 0;
3693       return;
3694     }
3695
3696   /* There are two costs associated with the candidate -- its increment
3697      and its initialization.  The second is almost negligible for any loop
3698      that rolls enough, so we take it just very little into account.  */
3699
3700   base = cand->iv->base;
3701   cost_base = force_var_cost (data, base, NULL);
3702   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3703
3704   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3705
3706   /* Prefer the original iv unless we may gain something by replacing it;
3707      this is not really relevant for artificial ivs created by other
3708      passes.  */
3709   if (cand->pos == IP_ORIGINAL
3710       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3711     cand->cost--;
3712
3713   /* Prefer not to insert statements into latch unless there are some
3714      already (so that we do not create unnecessary jumps).  */
3715   if (cand->pos == IP_END
3716       && empty_block_p (ip_end_pos (data->current_loop)))
3717     cand->cost++;
3718 }
3719
3720 /* Determines costs of computation of the candidates.  */
3721
3722 static void
3723 determine_iv_costs (struct ivopts_data *data)
3724 {
3725   unsigned i;
3726
3727   if (dump_file && (dump_flags & TDF_DETAILS))
3728     {
3729       fprintf (dump_file, "Candidate costs:\n");
3730       fprintf (dump_file, "  cand\tcost\n");
3731     }
3732
3733   for (i = 0; i < n_iv_cands (data); i++)
3734     {
3735       struct iv_cand *cand = iv_cand (data, i);
3736
3737       determine_iv_cost (data, cand);
3738
3739       if (dump_file && (dump_flags & TDF_DETAILS))
3740         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3741     }
3742
3743 if (dump_file && (dump_flags & TDF_DETAILS))
3744       fprintf (dump_file, "\n");
3745 }
3746
3747 /* Calculates cost for having SIZE induction variables.  */
3748
3749 static unsigned
3750 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3751 {
3752   return global_cost_for_size (size,
3753                                loop_data (data->current_loop)->regs_used,
3754                                n_iv_uses (data));
3755 }
3756
3757 /* For each size of the induction variable set determine the penalty.  */
3758
3759 static void
3760 determine_set_costs (struct ivopts_data *data)
3761 {
3762   unsigned j, n;
3763   tree phi, op;
3764   struct loop *loop = data->current_loop;
3765   bitmap_iterator bi;
3766
3767   /* We use the following model (definitely improvable, especially the
3768      cost function -- TODO):
3769
3770      We estimate the number of registers available (using MD data), name it A.
3771
3772      We estimate the number of registers used by the loop, name it U.  This
3773      number is obtained as the number of loop phi nodes (not counting virtual
3774      registers and bivs) + the number of variables from outside of the loop.
3775
3776      We set a reserve R (free regs that are used for temporary computations,
3777      etc.).  For now the reserve is a constant 3.
3778
3779      Let I be the number of induction variables.
3780
3781      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3782         make a lot of ivs without a reason).
3783      -- if A - R < U + I <= A, the cost is I * PRES_COST
3784      -- if U + I > A, the cost is I * PRES_COST and
3785         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3786
3787   if (dump_file && (dump_flags & TDF_DETAILS))
3788     {
3789       fprintf (dump_file, "Global costs:\n");
3790       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3791       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3792       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3793       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3794     }
3795
3796   n = 0;
3797   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3798     {
3799       op = PHI_RESULT (phi);
3800
3801       if (!is_gimple_reg (op))
3802         continue;
3803
3804       if (get_iv (data, op))
3805         continue;
3806
3807       n++;
3808     }
3809
3810   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3811     {
3812       struct version_info *info = ver_info (data, j);
3813
3814       if (info->inv_id && info->has_nonlin_use)
3815         n++;
3816     }
3817
3818   loop_data (loop)->regs_used = n;
3819   if (dump_file && (dump_flags & TDF_DETAILS))
3820     fprintf (dump_file, "  regs_used %d\n", n);
3821
3822   if (dump_file && (dump_flags & TDF_DETAILS))
3823     {
3824       fprintf (dump_file, "  cost for size:\n");
3825       fprintf (dump_file, "  ivs\tcost\n");
3826       for (j = 0; j <= 2 * target_avail_regs; j++)
3827         fprintf (dump_file, "  %d\t%d\n", j,
3828                  ivopts_global_cost_for_size (data, j));
3829       fprintf (dump_file, "\n");
3830     }
3831 }
3832
3833 /* Returns true if A is a cheaper cost pair than B.  */
3834
3835 static bool
3836 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3837 {
3838   if (!a)
3839     return false;
3840
3841   if (!b)
3842     return true;
3843
3844   if (a->cost < b->cost)
3845     return true;
3846
3847   if (a->cost > b->cost)
3848     return false;
3849
3850   /* In case the costs are the same, prefer the cheaper candidate.  */
3851   if (a->cand->cost < b->cand->cost)
3852     return true;
3853
3854   return false;
3855 }
3856
3857 /* Computes the cost field of IVS structure.  */
3858
3859 static void
3860 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3861 {
3862   unsigned cost = 0;
3863
3864   cost += ivs->cand_use_cost;
3865   cost += ivs->cand_cost;
3866   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3867
3868   ivs->cost = cost;
3869 }
3870
3871 /* Set USE not to be expressed by any candidate in IVS.  */
3872
3873 static void
3874 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3875                  struct iv_use *use)
3876 {
3877   unsigned uid = use->id, cid, iid;
3878   bitmap deps;
3879   struct cost_pair *cp;
3880   bitmap_iterator bi;
3881
3882   cp = ivs->cand_for_use[uid];
3883   if (!cp)
3884     return;
3885   cid = cp->cand->id;
3886
3887   ivs->bad_uses++;
3888   ivs->cand_for_use[uid] = NULL;
3889   ivs->n_cand_uses[cid]--;
3890
3891   if (ivs->n_cand_uses[cid] == 0)
3892     {
3893       bitmap_clear_bit (ivs->cands, cid);
3894       /* Do not count the pseudocandidates.  */
3895       if (cp->cand->iv)
3896         ivs->n_regs--;
3897       ivs->n_cands--;
3898       ivs->cand_cost -= cp->cand->cost;
3899     }
3900
3901   ivs->cand_use_cost -= cp->cost;
3902
3903   deps = cp->depends_on;
3904
3905   if (deps)
3906     {
3907       EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3908         {
3909           ivs->n_invariant_uses[iid]--;
3910           if (ivs->n_invariant_uses[iid] == 0)
3911             ivs->n_regs--;
3912         }
3913     }
3914
3915   iv_ca_recount_cost (data, ivs);
3916 }
3917
3918 /* Set cost pair for USE in set IVS to CP.  */
3919
3920 static void
3921 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3922               struct iv_use *use, struct cost_pair *cp)
3923 {
3924   unsigned uid = use->id, cid, iid;
3925   bitmap deps;
3926   bitmap_iterator bi;
3927
3928   if (ivs->cand_for_use[uid] == cp)
3929     return;
3930
3931   if (ivs->cand_for_use[uid])
3932     iv_ca_set_no_cp (data, ivs, use);
3933
3934   if (cp)
3935     {
3936       cid = cp->cand->id;
3937
3938       ivs->bad_uses--;
3939       ivs->cand_for_use[uid] = cp;
3940       ivs->n_cand_uses[cid]++;
3941       if (ivs->n_cand_uses[cid] == 1)
3942         {
3943           bitmap_set_bit (ivs->cands, cid);
3944           /* Do not count the pseudocandidates.  */
3945           if (cp->cand->iv)
3946             ivs->n_regs++;
3947           ivs->n_cands++;
3948           ivs->cand_cost += cp->cand->cost;
3949         }
3950
3951       ivs->cand_use_cost += cp->cost;
3952
3953       deps = cp->depends_on;
3954
3955       if (deps)
3956         {
3957           EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3958             {
3959               ivs->n_invariant_uses[iid]++;
3960               if (ivs->n_invariant_uses[iid] == 1)
3961                 ivs->n_regs++;
3962             }
3963         }
3964
3965       iv_ca_recount_cost (data, ivs);
3966     }
3967 }
3968
3969 /* Extend set IVS by expressing USE by some of the candidates in it
3970    if possible.  */
3971
3972 static void
3973 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3974                struct iv_use *use)
3975 {
3976   struct cost_pair *best_cp = NULL, *cp;
3977   bitmap_iterator bi;
3978   unsigned i;
3979
3980   gcc_assert (ivs->upto >= use->id);
3981
3982   if (ivs->upto == use->id)
3983     {
3984       ivs->upto++;
3985       ivs->bad_uses++;
3986     }
3987
3988   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3989     {
3990       cp = get_use_iv_cost (data, use, iv_cand (data, i));
3991
3992       if (cheaper_cost_pair (cp, best_cp))
3993         best_cp = cp;
3994     }
3995
3996   iv_ca_set_cp (data, ivs, use, best_cp);
3997 }
3998
3999 /* Get cost for assignment IVS.  */
4000
4001 static unsigned
4002 iv_ca_cost (struct iv_ca *ivs)
4003 {
4004   return (ivs->bad_uses ? INFTY : ivs->cost);
4005 }
4006
4007 /* Returns true if all dependences of CP are among invariants in IVS.  */
4008
4009 static bool
4010 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4011 {
4012   unsigned i;
4013   bitmap_iterator bi;
4014
4015   if (!cp->depends_on)
4016     return true;
4017
4018   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4019     {
4020       if (ivs->n_invariant_uses[i] == 0)
4021         return false;
4022     }
4023
4024   return true;
4025 }
4026
4027 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4028    it before NEXT_CHANGE.  */
4029
4030 static struct iv_ca_delta *
4031 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4032                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4033 {
4034   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4035
4036   change->use = use;
4037   change->old_cp = old_cp;
4038   change->new_cp = new_cp;
4039   change->next_change = next_change;
4040
4041   return change;
4042 }
4043
4044 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4045    are rewritten.  */
4046
4047 static struct iv_ca_delta *
4048 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4049 {
4050   struct iv_ca_delta *last;
4051
4052   if (!l2)
4053     return l1;
4054
4055   if (!l1)
4056     return l2;
4057
4058   for (last = l1; last->next_change; last = last->next_change)
4059     continue;
4060   last->next_change = l2;
4061
4062   return l1;
4063 }
4064
4065 /* Returns candidate by that USE is expressed in IVS.  */
4066
4067 static struct cost_pair *
4068 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4069 {
4070   return ivs->cand_for_use[use->id];
4071 }
4072
4073 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4074
4075 static struct iv_ca_delta *
4076 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4077 {
4078   struct iv_ca_delta *act, *next, *prev = NULL;
4079   struct cost_pair *tmp;
4080
4081   for (act = delta; act; act = next)
4082     {
4083       next = act->next_change;
4084       act->next_change = prev;
4085       prev = act;
4086
4087       tmp = act->old_cp;
4088       act->old_cp = act->new_cp;
4089       act->new_cp = tmp;
4090     }
4091
4092   return prev;
4093 }
4094
4095 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4096    reverted instead.  */
4097
4098 static void
4099 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4100                     struct iv_ca_delta *delta, bool forward)
4101 {
4102   struct cost_pair *from, *to;
4103   struct iv_ca_delta *act;
4104
4105   if (!forward)
4106     delta = iv_ca_delta_reverse (delta);
4107
4108   for (act = delta; act; act = act->next_change)
4109     {
4110       from = act->old_cp;
4111       to = act->new_cp;
4112       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4113       iv_ca_set_cp (data, ivs, act->use, to);
4114     }
4115
4116   if (!forward)
4117     iv_ca_delta_reverse (delta);
4118 }
4119
4120 /* Returns true if CAND is used in IVS.  */
4121
4122 static bool
4123 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4124 {
4125   return ivs->n_cand_uses[cand->id] > 0;
4126 }
4127
4128 /* Returns number of induction variable candidates in the set IVS.  */
4129
4130 static unsigned
4131 iv_ca_n_cands (struct iv_ca *ivs)
4132 {
4133   return ivs->n_cands;
4134 }
4135
4136 /* Free the list of changes DELTA.  */
4137
4138 static void
4139 iv_ca_delta_free (struct iv_ca_delta **delta)
4140 {
4141   struct iv_ca_delta *act, *next;
4142
4143   for (act = *delta; act; act = next)
4144     {
4145       next = act->next_change;
4146       free (act);
4147     }
4148
4149   *delta = NULL;
4150 }
4151
4152 /* Allocates new iv candidates assignment.  */
4153
4154 static struct iv_ca *
4155 iv_ca_new (struct ivopts_data *data)
4156 {
4157   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4158
4159   nw->upto = 0;
4160   nw->bad_uses = 0;
4161   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4162   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4163   nw->cands = BITMAP_ALLOC (NULL);
4164   nw->n_cands = 0;
4165   nw->n_regs = 0;
4166   nw->cand_use_cost = 0;
4167   nw->cand_cost = 0;
4168   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4169   nw->cost = 0;
4170
4171   return nw;
4172 }
4173
4174 /* Free memory occupied by the set IVS.  */
4175
4176 static void
4177 iv_ca_free (struct iv_ca **ivs)
4178 {
4179   free ((*ivs)->cand_for_use);
4180   free ((*ivs)->n_cand_uses);
4181   BITMAP_FREE ((*ivs)->cands);
4182   free ((*ivs)->n_invariant_uses);
4183   free (*ivs);
4184   *ivs = NULL;
4185 }
4186
4187 /* Dumps IVS to FILE.  */
4188
4189 static void
4190 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4191 {
4192   const char *pref = "  invariants ";
4193   unsigned i;
4194
4195   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4196   bitmap_print (file, ivs->cands, "  candidates ","\n");
4197
4198   for (i = 1; i <= data->max_inv_id; i++)
4199     if (ivs->n_invariant_uses[i])
4200       {
4201         fprintf (file, "%s%d", pref, i);
4202         pref = ", ";
4203       }
4204   fprintf (file, "\n");
4205 }
4206
4207 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4208    new set, and store differences in DELTA.  Number of induction variables
4209    in the new set is stored to N_IVS.  */
4210
4211 static unsigned
4212 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4213               struct iv_cand *cand, struct iv_ca_delta **delta,
4214               unsigned *n_ivs)
4215 {
4216   unsigned i, cost;
4217   struct iv_use *use;
4218   struct cost_pair *old_cp, *new_cp;
4219
4220   *delta = NULL;
4221   for (i = 0; i < ivs->upto; i++)
4222     {
4223       use = iv_use (data, i);
4224       old_cp = iv_ca_cand_for_use (ivs, use);
4225
4226       if (old_cp
4227           && old_cp->cand == cand)
4228         continue;
4229
4230       new_cp = get_use_iv_cost (data, use, cand);
4231       if (!new_cp)
4232         continue;
4233
4234       if (!iv_ca_has_deps (ivs, new_cp))
4235         continue;
4236
4237       if (!cheaper_cost_pair (new_cp, old_cp))
4238         continue;
4239
4240       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4241     }
4242
4243   iv_ca_delta_commit (data, ivs, *delta, true);
4244   cost = iv_ca_cost (ivs);
4245   if (n_ivs)
4246     *n_ivs = iv_ca_n_cands (ivs);
4247   iv_ca_delta_commit (data, ivs, *delta, false);
4248
4249   return cost;
4250 }
4251
4252 /* Try narrowing set IVS by removing CAND.  Return the cost of
4253    the new set and store the differences in DELTA.  */
4254
4255 static unsigned
4256 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4257               struct iv_cand *cand, struct iv_ca_delta **delta)
4258 {
4259   unsigned i, ci;
4260   struct iv_use *use;
4261   struct cost_pair *old_cp, *new_cp, *cp;
4262   bitmap_iterator bi;
4263   struct iv_cand *cnd;
4264   unsigned cost;
4265
4266   *delta = NULL;
4267   for (i = 0; i < n_iv_uses (data); i++)
4268     {
4269       use = iv_use (data, i);
4270
4271       old_cp = iv_ca_cand_for_use (ivs, use);
4272       if (old_cp->cand != cand)
4273         continue;
4274
4275       new_cp = NULL;
4276
4277       if (data->consider_all_candidates)
4278         {
4279           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4280             {
4281               if (ci == cand->id)
4282                 continue;
4283
4284               cnd = iv_cand (data, ci);
4285
4286               cp = get_use_iv_cost (data, use, cnd);
4287               if (!cp)
4288                 continue;
4289               if (!iv_ca_has_deps (ivs, cp))
4290                 continue;
4291
4292               if (!cheaper_cost_pair (cp, new_cp))
4293                 continue;
4294
4295               new_cp = cp;
4296             }
4297         }
4298       else
4299         {
4300           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4301             {
4302               if (ci == cand->id)
4303                 continue;
4304
4305               cnd = iv_cand (data, ci);
4306
4307               cp = get_use_iv_cost (data, use, cnd);
4308               if (!cp)
4309                 continue;
4310               if (!iv_ca_has_deps (ivs, cp))
4311                 continue;
4312
4313               if (!cheaper_cost_pair (cp, new_cp))
4314                 continue;
4315
4316               new_cp = cp;
4317             }
4318         }
4319
4320       if (!new_cp)
4321         {
4322           iv_ca_delta_free (delta);
4323           return INFTY;
4324         }
4325
4326       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4327     }
4328
4329   iv_ca_delta_commit (data, ivs, *delta, true);
4330   cost = iv_ca_cost (ivs);
4331   iv_ca_delta_commit (data, ivs, *delta, false);
4332
4333   return cost;
4334 }
4335
4336 /* Try optimizing the set of candidates IVS by removing candidates different
4337    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4338    differences in DELTA.  */
4339
4340 static unsigned
4341 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4342              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4343 {
4344   bitmap_iterator bi;
4345   struct iv_ca_delta *act_delta, *best_delta;
4346   unsigned i, best_cost, acost;
4347   struct iv_cand *cand;
4348
4349   best_delta = NULL;
4350   best_cost = iv_ca_cost (ivs);
4351
4352   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4353     {
4354       cand = iv_cand (data, i);
4355
4356       if (cand == except_cand)
4357         continue;
4358
4359       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4360
4361       if (acost < best_cost)
4362         {
4363           best_cost = acost;
4364           iv_ca_delta_free (&best_delta);
4365           best_delta = act_delta;
4366         }
4367       else
4368         iv_ca_delta_free (&act_delta);
4369     }
4370
4371   if (!best_delta)
4372     {
4373       *delta = NULL;
4374       return best_cost;
4375     }
4376
4377   /* Recurse to possibly remove other unnecessary ivs.  */
4378   iv_ca_delta_commit (data, ivs, best_delta, true);
4379   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4380   iv_ca_delta_commit (data, ivs, best_delta, false);
4381   *delta = iv_ca_delta_join (best_delta, *delta);
4382   return best_cost;
4383 }
4384
4385 /* Tries to extend the sets IVS in the best possible way in order
4386    to express the USE.  */
4387
4388 static bool
4389 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4390                   struct iv_use *use)
4391 {
4392   unsigned best_cost, act_cost;
4393   unsigned i;
4394   bitmap_iterator bi;
4395   struct iv_cand *cand;
4396   struct iv_ca_delta *best_delta = NULL, *act_delta;
4397   struct cost_pair *cp;
4398
4399   iv_ca_add_use (data, ivs, use);
4400   best_cost = iv_ca_cost (ivs);
4401
4402   cp = iv_ca_cand_for_use (ivs, use);
4403   if (cp)
4404     {
4405       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4406       iv_ca_set_no_cp (data, ivs, use);
4407     }
4408
4409   /* First try important candidates.  Only if it fails, try the specific ones.
4410      Rationale -- in loops with many variables the best choice often is to use
4411      just one generic biv.  If we added here many ivs specific to the uses,
4412      the optimization algorithm later would be likely to get stuck in a local
4413      minimum, thus causing us to create too many ivs.  The approach from
4414      few ivs to more seems more likely to be successful -- starting from few
4415      ivs, replacing an expensive use by a specific iv should always be a
4416      win.  */
4417   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4418     {
4419       cand = iv_cand (data, i);
4420
4421       if (iv_ca_cand_used_p (ivs, cand))
4422         continue;
4423
4424       cp = get_use_iv_cost (data, use, cand);
4425       if (!cp)
4426         continue;
4427
4428       iv_ca_set_cp (data, ivs, use, cp);
4429       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4430       iv_ca_set_no_cp (data, ivs, use);
4431       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4432
4433       if (act_cost < best_cost)
4434         {
4435           best_cost = act_cost;
4436
4437           iv_ca_delta_free (&best_delta);
4438           best_delta = act_delta;
4439         }
4440       else
4441         iv_ca_delta_free (&act_delta);
4442     }
4443
4444   if (best_cost == INFTY)
4445     {
4446       for (i = 0; i < use->n_map_members; i++)
4447         {
4448           cp = use->cost_map + i;
4449           cand = cp->cand;
4450           if (!cand)
4451             continue;
4452
4453           /* Already tried this.  */
4454           if (cand->important)
4455             continue;
4456
4457           if (iv_ca_cand_used_p (ivs, cand))
4458             continue;
4459
4460           act_delta = NULL;
4461           iv_ca_set_cp (data, ivs, use, cp);
4462           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4463           iv_ca_set_no_cp (data, ivs, use);
4464           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4465                                        cp, act_delta);
4466
4467           if (act_cost < best_cost)
4468             {
4469               best_cost = act_cost;
4470
4471               if (best_delta)
4472                 iv_ca_delta_free (&best_delta);
4473               best_delta = act_delta;
4474             }
4475           else
4476             iv_ca_delta_free (&act_delta);
4477         }
4478     }
4479
4480   iv_ca_delta_commit (data, ivs, best_delta, true);
4481   iv_ca_delta_free (&best_delta);
4482
4483   return (best_cost != INFTY);
4484 }
4485
4486 /* Finds an initial assignment of candidates to uses.  */
4487
4488 static struct iv_ca *
4489 get_initial_solution (struct ivopts_data *data)
4490 {
4491   struct iv_ca *ivs = iv_ca_new (data);
4492   unsigned i;
4493
4494   for (i = 0; i < n_iv_uses (data); i++)
4495     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4496       {
4497         iv_ca_free (&ivs);
4498         return NULL;
4499       }
4500
4501   return ivs;
4502 }
4503
4504 /* Tries to improve set of induction variables IVS.  */
4505
4506 static bool
4507 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4508 {
4509   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4510   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4511   struct iv_cand *cand;
4512
4513   /* Try extending the set of induction variables by one.  */
4514   for (i = 0; i < n_iv_cands (data); i++)
4515     {
4516       cand = iv_cand (data, i);
4517
4518       if (iv_ca_cand_used_p (ivs, cand))
4519         continue;
4520
4521       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4522       if (!act_delta)
4523         continue;
4524
4525       /* If we successfully added the candidate and the set is small enough,
4526          try optimizing it by removing other candidates.  */
4527       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4528         {
4529           iv_ca_delta_commit (data, ivs, act_delta, true);
4530           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4531           iv_ca_delta_commit (data, ivs, act_delta, false);
4532           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4533         }
4534
4535       if (acost < best_cost)
4536         {
4537           best_cost = acost;
4538           iv_ca_delta_free (&best_delta);
4539           best_delta = act_delta;
4540         }
4541       else
4542         iv_ca_delta_free (&act_delta);
4543     }
4544
4545   if (!best_delta)
4546     {
4547       /* Try removing the candidates from the set instead.  */
4548       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4549
4550       /* Nothing more we can do.  */
4551       if (!best_delta)
4552         return false;
4553     }
4554
4555   iv_ca_delta_commit (data, ivs, best_delta, true);
4556   gcc_assert (best_cost == iv_ca_cost (ivs));
4557   iv_ca_delta_free (&best_delta);
4558   return true;
4559 }
4560
4561 /* Attempts to find the optimal set of induction variables.  We do simple
4562    greedy heuristic -- we try to replace at most one candidate in the selected
4563    solution and remove the unused ivs while this improves the cost.  */
4564
4565 static struct iv_ca *
4566 find_optimal_iv_set (struct ivopts_data *data)
4567 {
4568   unsigned i;
4569   struct iv_ca *set;
4570   struct iv_use *use;
4571
4572   /* Get the initial solution.  */
4573   set = get_initial_solution (data);
4574   if (!set)
4575     {
4576       if (dump_file && (dump_flags & TDF_DETAILS))
4577         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4578       return NULL;
4579     }
4580
4581   if (dump_file && (dump_flags & TDF_DETAILS))
4582     {
4583       fprintf (dump_file, "Initial set of candidates:\n");
4584       iv_ca_dump (data, dump_file, set);
4585     }
4586
4587   while (try_improve_iv_set (data, set))
4588     {
4589       if (dump_file && (dump_flags & TDF_DETAILS))
4590         {
4591           fprintf (dump_file, "Improved to:\n");
4592           iv_ca_dump (data, dump_file, set);
4593         }
4594     }
4595
4596   if (dump_file && (dump_flags & TDF_DETAILS))
4597     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4598
4599   for (i = 0; i < n_iv_uses (data); i++)
4600     {
4601       use = iv_use (data, i);
4602       use->selected = iv_ca_cand_for_use (set, use)->cand;
4603     }
4604
4605   return set;
4606 }
4607
4608 /* Creates a new induction variable corresponding to CAND.  */
4609
4610 static void
4611 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4612 {
4613   block_stmt_iterator incr_pos;
4614   tree base;
4615   bool after = false;
4616
4617   if (!cand->iv)
4618     return;
4619
4620   switch (cand->pos)
4621     {
4622     case IP_NORMAL:
4623       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4624       break;
4625
4626     case IP_END:
4627       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4628       after = true;
4629       break;
4630
4631     case IP_ORIGINAL:
4632       /* Mark that the iv is preserved.  */
4633       name_info (data, cand->var_before)->preserve_biv = true;
4634       name_info (data, cand->var_after)->preserve_biv = true;
4635
4636       /* Rewrite the increment so that it uses var_before directly.  */
4637       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4638
4639       return;
4640     }
4641
4642   gimple_add_tmp_var (cand->var_before);
4643   add_referenced_tmp_var (cand->var_before);
4644
4645   base = unshare_expr (cand->iv->base);
4646
4647   create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4648              &incr_pos, after, &cand->var_before, &cand->var_after);
4649 }
4650
4651 /* Creates new induction variables described in SET.  */
4652
4653 static void
4654 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4655 {
4656   unsigned i;
4657   struct iv_cand *cand;
4658   bitmap_iterator bi;
4659
4660   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4661     {
4662       cand = iv_cand (data, i);
4663       create_new_iv (data, cand);
4664     }
4665 }
4666
4667 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4668    is true, remove also the ssa name defined by the statement.  */
4669
4670 static void
4671 remove_statement (tree stmt, bool including_defined_name)
4672 {
4673   if (TREE_CODE (stmt) == PHI_NODE)
4674     {
4675       if (!including_defined_name)
4676         {
4677           /* Prevent the ssa name defined by the statement from being removed.  */
4678           SET_PHI_RESULT (stmt, NULL);
4679         }
4680       remove_phi_node (stmt, NULL_TREE);
4681     }
4682   else
4683     {
4684       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4685
4686       bsi_remove (&bsi);
4687     }
4688 }
4689
4690 /* Rewrites USE (definition of iv used in a nonlinear expression)
4691    using candidate CAND.  */
4692
4693 static void
4694 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4695                             struct iv_use *use, struct iv_cand *cand)
4696 {
4697   tree comp;
4698   tree op, stmts, tgt, ass;
4699   block_stmt_iterator bsi, pbsi;
4700
4701   /* An important special case -- if we are asked to express value of
4702      the original iv by itself, just exit; there is no need to
4703      introduce a new computation (that might also need casting the
4704      variable to unsigned and back).  */
4705   if (cand->pos == IP_ORIGINAL
4706       && TREE_CODE (use->stmt) == MODIFY_EXPR
4707       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4708     {
4709       op = TREE_OPERAND (use->stmt, 1);
4710
4711       /* Be a bit careful.  In case variable is expressed in some
4712          complicated way, rewrite it so that we may get rid of this
4713          complicated expression.  */
4714       if ((TREE_CODE (op) == PLUS_EXPR
4715            || TREE_CODE (op) == MINUS_EXPR)
4716           && TREE_OPERAND (op, 0) == cand->var_before
4717           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4718         return;
4719     }
4720
4721   comp = unshare_expr (get_computation (data->current_loop,
4722                                         use, cand));
4723   switch (TREE_CODE (use->stmt))
4724     {
4725     case PHI_NODE:
4726       tgt = PHI_RESULT (use->stmt);
4727
4728       /* If we should keep the biv, do not replace it.  */
4729       if (name_info (data, tgt)->preserve_biv)
4730         return;
4731
4732       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4733       while (!bsi_end_p (pbsi)
4734              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4735         {
4736           bsi = pbsi;
4737           bsi_next (&pbsi);
4738         }
4739       break;
4740
4741     case MODIFY_EXPR:
4742       tgt = TREE_OPERAND (use->stmt, 0);
4743       bsi = bsi_for_stmt (use->stmt);
4744       break;
4745
4746     default:
4747       gcc_unreachable ();
4748     }
4749
4750   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4751
4752   if (TREE_CODE (use->stmt) == PHI_NODE)
4753     {
4754       if (stmts)
4755         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4756       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4757       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4758       remove_statement (use->stmt, false);
4759       SSA_NAME_DEF_STMT (tgt) = ass;
4760     }
4761   else
4762     {
4763       if (stmts)
4764         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4765       TREE_OPERAND (use->stmt, 1) = op;
4766     }
4767 }
4768
4769 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4770    for_each_index.  */
4771
4772 static bool
4773 idx_remove_ssa_names (tree base, tree *idx,
4774                       void *data ATTRIBUTE_UNUSED)
4775 {
4776   tree *op;
4777
4778   if (TREE_CODE (*idx) == SSA_NAME)
4779     *idx = SSA_NAME_VAR (*idx);
4780
4781   if (TREE_CODE (base) == ARRAY_REF)
4782     {
4783       op = &TREE_OPERAND (base, 2);
4784       if (*op
4785           && TREE_CODE (*op) == SSA_NAME)
4786         *op = SSA_NAME_VAR (*op);
4787       op = &TREE_OPERAND (base, 3);
4788       if (*op
4789           && TREE_CODE (*op) == SSA_NAME)
4790         *op = SSA_NAME_VAR (*op);
4791     }
4792
4793   return true;
4794 }
4795
4796 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4797
4798 static tree
4799 unshare_and_remove_ssa_names (tree ref)
4800 {
4801   ref = unshare_expr (ref);
4802   for_each_index (&ref, idx_remove_ssa_names, NULL);
4803
4804   return ref;
4805 }
4806
4807 /* Rewrites base of memory access OP with expression WITH in statement
4808    pointed to by BSI.  */
4809
4810 static void
4811 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4812 {
4813   tree bvar, var, new_name, copy, name;
4814   tree orig;
4815
4816   var = bvar = get_base_address (*op);
4817
4818   if (!var || TREE_CODE (with) != SSA_NAME)
4819     goto do_rewrite;
4820
4821   gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4822   gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4823   if (TREE_CODE (var) == INDIRECT_REF)
4824     var = TREE_OPERAND (var, 0);
4825   if (TREE_CODE (var) == SSA_NAME)
4826     {
4827       name = var;
4828       var = SSA_NAME_VAR (var);
4829     }
4830   else if (DECL_P (var))
4831     name = NULL_TREE;
4832   else
4833     goto do_rewrite;
4834
4835   /* We need to add a memory tag for the variable.  But we do not want
4836      to add it to the temporary used for the computations, since this leads
4837      to problems in redundancy elimination when there are common parts
4838      in two computations referring to the different arrays.  So we copy
4839      the variable to a new temporary.  */
4840   copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4841
4842   if (name)
4843     new_name = duplicate_ssa_name (name, copy);
4844   else
4845     {
4846       tree tag = var_ann (var)->type_mem_tag;
4847       tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
4848       add_referenced_tmp_var (new_ptr);
4849       if (tag)
4850         var_ann (new_ptr)->type_mem_tag = tag;
4851       else
4852         add_type_alias (new_ptr, var);
4853       new_name = make_ssa_name (new_ptr, copy);
4854     }
4855
4856   TREE_OPERAND (copy, 0) = new_name;
4857   update_stmt (copy);
4858   bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4859   with = new_name;
4860
4861 do_rewrite:
4862
4863   orig = NULL_TREE;
4864   gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4865   gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4866
4867   if (TREE_CODE (*op) == INDIRECT_REF)
4868     orig = REF_ORIGINAL (*op);
4869   if (!orig)
4870     orig = unshare_and_remove_ssa_names (*op);
4871
4872   *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4873
4874   /* Record the original reference, for purposes of alias analysis.  */
4875   REF_ORIGINAL (*op) = orig;
4876
4877   /* Virtual operands in the original statement may have to be renamed
4878      because of the replacement.  */
4879   mark_new_vars_to_rename (bsi_stmt (*bsi));
4880 }
4881
4882 /* Rewrites USE (address that is an iv) using candidate CAND.  */
4883
4884 static void
4885 rewrite_use_address (struct ivopts_data *data,
4886                      struct iv_use *use, struct iv_cand *cand)
4887 {
4888   tree comp = unshare_expr (get_computation (data->current_loop,
4889                                              use, cand));
4890   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4891   tree stmts;
4892   tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4893
4894   if (stmts)
4895     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4896
4897   rewrite_address_base (&bsi, use->op_p, op);
4898 }
4899
4900 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4901    candidate CAND.  */
4902
4903 static void
4904 rewrite_use_compare (struct ivopts_data *data,
4905                      struct iv_use *use, struct iv_cand *cand)
4906 {
4907   tree comp;
4908   tree *op_p, cond, op, stmts, bound;
4909   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4910   enum tree_code compare;
4911
4912   if (may_eliminate_iv (data, use, cand, &compare, &bound))
4913     {
4914       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4915       tree var_type = TREE_TYPE (var);
4916
4917       bound = fold_convert (var_type, bound);
4918       op = force_gimple_operand (unshare_expr (bound), &stmts,
4919                                  true, NULL_TREE);
4920
4921       if (stmts)
4922         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4923
4924       *use->op_p = build2 (compare, boolean_type_node, var, op);
4925       update_stmt (use->stmt);
4926       return;
4927     }
4928
4929   /* The induction variable elimination failed; just express the original
4930      giv.  */
4931   comp = unshare_expr (get_computation (data->current_loop, use, cand));
4932
4933   cond = *use->op_p;
4934   op_p = &TREE_OPERAND (cond, 0);
4935   if (TREE_CODE (*op_p) != SSA_NAME
4936       || zero_p (get_iv (data, *op_p)->step))
4937     op_p = &TREE_OPERAND (cond, 1);
4938
4939   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4940   if (stmts)
4941     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4942
4943   *op_p = op;
4944 }
4945
4946 /* Ensure that operand *OP_P may be used at the end of EXIT without
4947    violating loop closed ssa form.  */
4948
4949 static void
4950 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4951 {
4952   basic_block def_bb;
4953   struct loop *def_loop;
4954   tree phi, use;
4955
4956   use = USE_FROM_PTR (op_p);
4957   if (TREE_CODE (use) != SSA_NAME)
4958     return;
4959
4960   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4961   if (!def_bb)
4962     return;
4963
4964   def_loop = def_bb->loop_father;
4965   if (flow_bb_inside_loop_p (def_loop, exit->dest))
4966     return;
4967
4968   /* Try finding a phi node that copies the value out of the loop.  */
4969   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4970     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4971       break;
4972
4973   if (!phi)
4974     {
4975       /* Create such a phi node.  */
4976       tree new_name = duplicate_ssa_name (use, NULL);
4977
4978       phi = create_phi_node (new_name, exit->dest);
4979       SSA_NAME_DEF_STMT (new_name) = phi;
4980       add_phi_arg (phi, use, exit);
4981     }
4982
4983   SET_USE (op_p, PHI_RESULT (phi));
4984 }
4985
4986 /* Ensure that operands of STMT may be used at the end of EXIT without
4987    violating loop closed ssa form.  */
4988
4989 static void
4990 protect_loop_closed_ssa_form (edge exit, tree stmt)
4991 {
4992   use_optype uses;
4993   vuse_optype vuses;
4994   v_may_def_optype v_may_defs;
4995   unsigned i;
4996
4997   get_stmt_operands (stmt);
4998
4999   uses = STMT_USE_OPS (stmt);
5000   for (i = 0; i < NUM_USES (uses); i++)
5001     protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
5002
5003   vuses = STMT_VUSE_OPS (stmt);
5004   for (i = 0; i < NUM_VUSES (vuses); i++)
5005     protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5006
5007   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5008   for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5009     protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5010 }
5011
5012 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5013    so that they are emitted on the correct place, and so that the loop closed
5014    ssa form is preserved.  */
5015
5016 static void
5017 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5018 {
5019   tree_stmt_iterator tsi;
5020   block_stmt_iterator bsi;
5021   tree phi, stmt, def, next;
5022
5023   if (!single_pred_p (exit->dest))
5024     split_loop_exit_edge (exit);
5025
5026   /* Ensure there is label in exit->dest, so that we can
5027      insert after it.  */
5028   tree_block_label (exit->dest);
5029   bsi = bsi_after_labels (exit->dest);
5030
5031   if (TREE_CODE (stmts) == STATEMENT_LIST)
5032     {
5033       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5034         {
5035           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5036           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5037         }
5038     }
5039   else
5040     {
5041       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5042       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5043     }
5044
5045   if (!op)
5046     return;
5047
5048   for (phi = phi_nodes (exit->dest); phi; phi = next)
5049     {
5050       next = PHI_CHAIN (phi);
5051
5052       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5053         {
5054           def = PHI_RESULT (phi);
5055           remove_statement (phi, false);
5056           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5057                         def, op);
5058           SSA_NAME_DEF_STMT (def) = stmt;
5059           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5060         }
5061     }
5062 }
5063
5064 /* Rewrites the final value of USE (that is only needed outside of the loop)
5065    using candidate CAND.  */
5066
5067 static void
5068 rewrite_use_outer (struct ivopts_data *data,
5069                    struct iv_use *use, struct iv_cand *cand)
5070 {
5071   edge exit;
5072   tree value, op, stmts, tgt;
5073   tree phi;
5074
5075   switch (TREE_CODE (use->stmt))
5076     {
5077     case PHI_NODE:
5078       tgt = PHI_RESULT (use->stmt);
5079       break;
5080     case MODIFY_EXPR:
5081       tgt = TREE_OPERAND (use->stmt, 0);
5082       break;
5083     default:
5084       gcc_unreachable ();
5085     }
5086
5087   exit = single_dom_exit (data->current_loop);
5088
5089   if (exit)
5090     {
5091       if (!cand->iv)
5092         {
5093           bool ok = may_replace_final_value (data, use, &value);
5094           gcc_assert (ok);
5095         }
5096       else
5097         value = get_computation_at (data->current_loop,
5098                                     use, cand, last_stmt (exit->src));
5099
5100       value = unshare_expr (value);
5101       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5102
5103       /* If we will preserve the iv anyway and we would need to perform
5104          some computation to replace the final value, do nothing.  */
5105       if (stmts && name_info (data, tgt)->preserve_biv)
5106         return;
5107
5108       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5109         {
5110           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5111
5112           if (USE_FROM_PTR (use_p) == tgt)
5113             SET_USE (use_p, op);
5114         }
5115
5116       if (stmts)
5117         compute_phi_arg_on_exit (exit, stmts, op);
5118
5119       /* Enable removal of the statement.  We cannot remove it directly,
5120          since we may still need the aliasing information attached to the
5121          ssa name defined by it.  */
5122       name_info (data, tgt)->iv->have_use_for = false;
5123       return;
5124     }
5125
5126   /* If the variable is going to be preserved anyway, there is nothing to
5127      do.  */
5128   if (name_info (data, tgt)->preserve_biv)
5129     return;
5130
5131   /* Otherwise we just need to compute the iv.  */
5132   rewrite_use_nonlinear_expr (data, use, cand);
5133 }
5134
5135 /* Rewrites USE using candidate CAND.  */
5136
5137 static void
5138 rewrite_use (struct ivopts_data *data,
5139              struct iv_use *use, struct iv_cand *cand)
5140 {
5141   switch (use->type)
5142     {
5143       case USE_NONLINEAR_EXPR:
5144         rewrite_use_nonlinear_expr (data, use, cand);
5145         break;
5146
5147       case USE_OUTER:
5148         rewrite_use_outer (data, use, cand);
5149         break;
5150
5151       case USE_ADDRESS:
5152         rewrite_use_address (data, use, cand);
5153         break;
5154
5155       case USE_COMPARE:
5156         rewrite_use_compare (data, use, cand);
5157         break;
5158
5159       default:
5160         gcc_unreachable ();
5161     }
5162   update_stmt (use->stmt);
5163 }
5164
5165 /* Rewrite the uses using the selected induction variables.  */
5166
5167 static void
5168 rewrite_uses (struct ivopts_data *data)
5169 {
5170   unsigned i;
5171   struct iv_cand *cand;
5172   struct iv_use *use;
5173
5174   for (i = 0; i < n_iv_uses (data); i++)
5175     {
5176       use = iv_use (data, i);
5177       cand = use->selected;
5178       gcc_assert (cand);
5179
5180       rewrite_use (data, use, cand);
5181     }
5182 }
5183
5184 /* Removes the ivs that are not used after rewriting.  */
5185
5186 static void
5187 remove_unused_ivs (struct ivopts_data *data)
5188 {
5189   unsigned j;
5190   bitmap_iterator bi;
5191
5192   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5193     {
5194       struct version_info *info;
5195
5196       info = ver_info (data, j);
5197       if (info->iv
5198           && !zero_p (info->iv->step)
5199           && !info->inv_id
5200           && !info->iv->have_use_for
5201           && !info->preserve_biv)
5202         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5203     }
5204 }
5205
5206 /* Frees data allocated by the optimization of a single loop.  */
5207
5208 static void
5209 free_loop_data (struct ivopts_data *data)
5210 {
5211   unsigned i, j;
5212   bitmap_iterator bi;
5213
5214   htab_empty (data->niters);
5215
5216   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5217     {
5218       struct version_info *info;
5219
5220       info = ver_info (data, i);
5221       if (info->iv)
5222         free (info->iv);
5223       info->iv = NULL;
5224       info->has_nonlin_use = false;
5225       info->preserve_biv = false;
5226       info->inv_id = 0;
5227     }
5228   bitmap_clear (data->relevant);
5229   bitmap_clear (data->important_candidates);
5230
5231   for (i = 0; i < n_iv_uses (data); i++)
5232     {
5233       struct iv_use *use = iv_use (data, i);
5234
5235       free (use->iv);
5236       BITMAP_FREE (use->related_cands);
5237       for (j = 0; j < use->n_map_members; j++)
5238         if (use->cost_map[j].depends_on)
5239           BITMAP_FREE (use->cost_map[j].depends_on);
5240       free (use->cost_map);
5241       free (use);
5242     }
5243   VARRAY_POP_ALL (data->iv_uses);
5244
5245   for (i = 0; i < n_iv_cands (data); i++)
5246     {
5247       struct iv_cand *cand = iv_cand (data, i);
5248
5249       if (cand->iv)
5250         free (cand->iv);
5251       free (cand);
5252     }
5253   VARRAY_POP_ALL (data->iv_candidates);
5254
5255   if (data->version_info_size < num_ssa_names)
5256     {
5257       data->version_info_size = 2 * num_ssa_names;
5258       free (data->version_info);
5259       data->version_info = xcalloc (data->version_info_size,
5260                                     sizeof (struct version_info));
5261     }
5262
5263   data->max_inv_id = 0;
5264
5265   for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5266     {
5267       tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5268
5269       SET_DECL_RTL (obj, NULL_RTX);
5270     }
5271   VARRAY_POP_ALL (decl_rtl_to_reset);
5272 }
5273
5274 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5275    loop tree.  */
5276
5277 static void
5278 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5279 {
5280   unsigned i;
5281
5282   for (i = 1; i < loops->num; i++)
5283     if (loops->parray[i])
5284       {
5285         free (loops->parray[i]->aux);
5286         loops->parray[i]->aux = NULL;
5287       }
5288
5289   free_loop_data (data);
5290   free (data->version_info);
5291   BITMAP_FREE (data->relevant);
5292   BITMAP_FREE (data->important_candidates);
5293   htab_delete (data->niters);
5294
5295   VARRAY_FREE (decl_rtl_to_reset);
5296   VARRAY_FREE (data->iv_uses);
5297   VARRAY_FREE (data->iv_candidates);
5298 }
5299
5300 /* Optimizes the LOOP.  Returns true if anything changed.  */
5301
5302 static bool
5303 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5304 {
5305   bool changed = false;
5306   struct iv_ca *iv_ca;
5307   edge exit;
5308
5309   data->current_loop = loop;
5310
5311   if (dump_file && (dump_flags & TDF_DETAILS))
5312     {
5313       fprintf (dump_file, "Processing loop %d\n", loop->num);
5314
5315       exit = single_dom_exit (loop);
5316       if (exit)
5317         {
5318           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5319                    exit->src->index, exit->dest->index);
5320           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5321           fprintf (dump_file, "\n");
5322         }
5323
5324       fprintf (dump_file, "\n");
5325     }
5326
5327   /* For each ssa name determines whether it behaves as an induction variable
5328      in some loop.  */
5329   if (!find_induction_variables (data))
5330     goto finish;
5331
5332   /* Finds interesting uses (item 1).  */
5333   find_interesting_uses (data);
5334   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5335     goto finish;
5336
5337   /* Finds candidates for the induction variables (item 2).  */
5338   find_iv_candidates (data);
5339
5340   /* Calculates the costs (item 3, part 1).  */
5341   determine_use_iv_costs (data);
5342   determine_iv_costs (data);
5343   determine_set_costs (data);
5344
5345   /* Find the optimal set of induction variables (item 3, part 2).  */
5346   iv_ca = find_optimal_iv_set (data);
5347   if (!iv_ca)
5348     goto finish;
5349   changed = true;
5350
5351   /* Create the new induction variables (item 4, part 1).  */
5352   create_new_ivs (data, iv_ca);
5353   iv_ca_free (&iv_ca);
5354
5355   /* Rewrite the uses (item 4, part 2).  */
5356   rewrite_uses (data);
5357
5358   /* Remove the ivs that are unused after rewriting.  */
5359   remove_unused_ivs (data);
5360
5361   /* We have changed the structure of induction variables; it might happen
5362      that definitions in the scev database refer to some of them that were
5363      eliminated.  */
5364   scev_reset ();
5365
5366 finish:
5367   free_loop_data (data);
5368
5369   return changed;
5370 }
5371
5372 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5373
5374 void
5375 tree_ssa_iv_optimize (struct loops *loops)
5376 {
5377   struct loop *loop;
5378   struct ivopts_data data;
5379
5380   tree_ssa_iv_optimize_init (loops, &data);
5381
5382   /* Optimize the loops starting with the innermost ones.  */
5383   loop = loops->tree_root;
5384   while (loop->inner)
5385     loop = loop->inner;
5386
5387   /* Scan the loops, inner ones first.  */
5388   while (loop != loops->tree_root)
5389     {
5390       if (dump_file && (dump_flags & TDF_DETAILS))
5391         flow_loop_dump (loop, dump_file, NULL, 1);
5392
5393       tree_ssa_iv_optimize_loop (&data, loop);
5394
5395       if (loop->next)
5396         {
5397           loop = loop->next;
5398           while (loop->inner)
5399             loop = loop->inner;
5400         }
5401       else
5402         loop = loop->outer;
5403     }
5404
5405   /* FIXME.  IV opts introduces new aliases and call-clobbered
5406      variables, which need to be renamed.  However, when we call the
5407      renamer, not all statements will be scanned for operands.  In
5408      particular, the newly introduced aliases may appear in statements
5409      that are considered "unmodified", so the renamer will not get a
5410      chance to rename those operands.
5411
5412      Work around this problem by forcing an operand re-scan on every
5413      statement.  This will not be necessary once the new operand
5414      scanner is implemented.  */
5415   if (need_ssa_update_p ())
5416     {
5417       basic_block bb;
5418       block_stmt_iterator si;
5419       FOR_EACH_BB (bb)
5420         for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5421           update_stmt (bsi_stmt (si));
5422
5423       update_ssa (TODO_update_ssa);
5424     }
5425
5426   rewrite_into_loop_closed_ssa (NULL);
5427   tree_ssa_iv_optimize_finalize (loops, &data);
5428 }