gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93
  94 /* The infinite cost.  */
  95 #define INFTY 10000000
  96
  97 /* The expected number of loop iterations.  TODO -- use profiling instead of
  98    this.  */
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101
 102 /* Representation of the induction variable.  */
 103 struct iv
 104 {
 105   tree base;            /* Initial value of the iv.  */
 106   tree base_object;     /* A memory object to that the induction variable points.  */
 107   tree step;            /* Step of the iv (constant only).  */
 108   tree ssa_name;        /* The ssa name with the value.  */
 109   bool biv_p;           /* Is it a biv?  */
 110   bool have_use_for;    /* Do we already have a use for it?  */
 111   unsigned use_id;      /* The identifier in the use if it is the case.  */
 112 };
 113
 114 /* Per-ssa version information (induction variable descriptions, etc.).  */
 115 struct version_info
 116 {
 117   tree name;            /* The ssa name.  */
 118   struct iv *iv;        /* Induction variable description.  */
 119   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 120                            an expression that is not an induction variable.  */
 121   unsigned inv_id;      /* Id of an invariant.  */
 122   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 123 };
 124
 125 /* Types of uses.  */
 126 enum use_type
 127 {
 128   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 129   USE_ADDRESS,          /* Use in an address.  */
 130   USE_COMPARE           /* Use is a compare.  */
 131 };
 132
 133 /* The candidate - cost pair.  */
 134 struct cost_pair
 135 {
 136   struct iv_cand *cand; /* The candidate.  */
 137   unsigned cost;        /* The cost.  */
 138   bitmap depends_on;    /* The list of invariants that have to be
 139                            preserved.  */
 140   tree value;           /* For final value elimination, the expression for
 141                            the final value of the iv.  For iv elimination,
 142                            the new bound to compare with.  */
 143 };
 144
 145 /* Use.  */
 146 struct iv_use
 147 {
 148   unsigned id;          /* The id of the use.  */
 149   enum use_type type;   /* Type of the use.  */
 150   struct iv *iv;        /* The induction variable it is based on.  */
 151   tree stmt;            /* Statement in that it occurs.  */
 152   tree *op_p;           /* The place where it occurs.  */
 153   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 154                            important ones.  */
 155
 156   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 157   struct cost_pair *cost_map;
 158                         /* The costs wrto the iv candidates.  */
 159
 160   struct iv_cand *selected;
 161                         /* The selected candidate.  */
 162 };
 163
 164 /* The position where the iv is computed.  */
 165 enum iv_position
 166 {
 167   IP_NORMAL,            /* At the end, just before the exit condition.  */
 168   IP_END,               /* At the end of the latch block.  */
 169   IP_ORIGINAL           /* The original biv.  */
 170 };
 171
 172 /* The induction variable candidate.  */
 173 struct iv_cand
 174 {
 175   unsigned id;          /* The number of the candidate.  */
 176   bool important;       /* Whether this is an "important" candidate, i.e. such
 177                            that it should be considered by all uses.  */
 178   enum iv_position pos; /* Where it is computed.  */
 179   tree incremented_at;  /* For original biv, the statement where it is
 180                            incremented.  */
 181   tree var_before;      /* The variable used for it before increment.  */
 182   tree var_after;       /* The variable used for it after increment.  */
 183   struct iv *iv;        /* The value of the candidate.  NULL for
 184                            "pseudocandidate" used to indicate the possibility
 185                            to replace the final value of an iv by direct
 186                            computation of the value.  */
 187   unsigned cost;        /* Cost of the candidate.  */
 188   bitmap depends_on;    /* The list of invariants that are used in step of the
 189                            biv.  */
 190 };
 191
 192 /* The data used by the induction variable optimizations.  */
 193
 194 typedef struct iv_use *iv_use_p;
 195 DEF_VEC_P(iv_use_p);
 196 DEF_VEC_ALLOC_P(iv_use_p,heap);
 197
 198 typedef struct iv_cand *iv_cand_p;
 199 DEF_VEC_P(iv_cand_p);
 200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 201
 202 struct ivopts_data
 203 {
 204   /* The currently optimized loop.  */
 205   struct loop *current_loop;
 206
 207   /* Number of registers used in it.  */
 208   unsigned regs_used;
 209
 210   /* Numbers of iterations for all exits of the current loop.  */
 211   htab_t niters;
 212
 213   /* The size of version_info array allocated.  */
 214   unsigned version_info_size;
 215
 216   /* The array of information for the ssa names.  */
 217   struct version_info *version_info;
 218
 219   /* The bitmap of indices in version_info whose value was changed.  */
 220   bitmap relevant;
 221
 222   /* The maximum invariant id.  */
 223   unsigned max_inv_id;
 224
 225   /* The uses of induction variables.  */
 226   VEC(iv_use_p,heap) *iv_uses;
 227
 228   /* The candidates.  */
 229   VEC(iv_cand_p,heap) *iv_candidates;
 230
 231   /* A bitmap of important candidates.  */
 232   bitmap important_candidates;
 233
 234   /* Whether to consider just related and important candidates when replacing a
 235      use.  */
 236   bool consider_all_candidates;
 237 };
 238
 239 /* An assignment of iv candidates to uses.  */
 240
 241 struct iv_ca
 242 {
 243   /* The number of uses covered by the assignment.  */
 244   unsigned upto;
 245
 246   /* Number of uses that cannot be expressed by the candidates in the set.  */
 247   unsigned bad_uses;
 248
 249   /* Candidate assigned to a use, together with the related costs.  */
 250   struct cost_pair **cand_for_use;
 251
 252   /* Number of times each candidate is used.  */
 253   unsigned *n_cand_uses;
 254
 255   /* The candidates used.  */
 256   bitmap cands;
 257
 258   /* The number of candidates in the set.  */
 259   unsigned n_cands;
 260
 261   /* Total number of registers needed.  */
 262   unsigned n_regs;
 263
 264   /* Total cost of expressing uses.  */
 265   unsigned cand_use_cost;
 266
 267   /* Total cost of candidates.  */
 268   unsigned cand_cost;
 269
 270   /* Number of times each invariant is used.  */
 271   unsigned *n_invariant_uses;
 272
 273   /* Total cost of the assignment.  */
 274   unsigned cost;
 275 };
 276
 277 /* Difference of two iv candidate assignments.  */
 278
 279 struct iv_ca_delta
 280 {
 281   /* Changed use.  */
 282   struct iv_use *use;
 283
 284   /* An old assignment (for rollback purposes).  */
 285   struct cost_pair *old_cp;
 286
 287   /* A new assignment.  */
 288   struct cost_pair *new_cp;
 289
 290   /* Next change in the list.  */
 291   struct iv_ca_delta *next_change;
 292 };
 293
 294 /* Bound on number of candidates below that all candidates are considered.  */
 295
 296 #define CONSIDER_ALL_CANDIDATES_BOUND \
 297   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 298
 299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 300    optimizing such a loop would help, and it would take ages).  */
 301
 302 #define MAX_CONSIDERED_USES \
 303   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 304
 305 /* If there are at most this number of ivs in the set, try removing unnecessary
 306    ivs from the set always.  */
 307
 308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 309   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 310
 311 /* The list of trees for that the decl_rtl field must be reset is stored
 312    here.  */
 313
 314 static VEC(tree,heap) *decl_rtl_to_reset;
 315
 316 /* Number of uses recorded in DATA.  */
 317
 318 static inline unsigned
 319 n_iv_uses (struct ivopts_data *data)
 320 {
 321   return VEC_length (iv_use_p, data->iv_uses);
 322 }
 323
 324 /* Ith use recorded in DATA.  */
 325
 326 static inline struct iv_use *
 327 iv_use (struct ivopts_data *data, unsigned i)
 328 {
 329   return VEC_index (iv_use_p, data->iv_uses, i);
 330 }
 331
 332 /* Number of candidates recorded in DATA.  */
 333
 334 static inline unsigned
 335 n_iv_cands (struct ivopts_data *data)
 336 {
 337   return VEC_length (iv_cand_p, data->iv_candidates);
 338 }
 339
 340 /* Ith candidate recorded in DATA.  */
 341
 342 static inline struct iv_cand *
 343 iv_cand (struct ivopts_data *data, unsigned i)
 344 {
 345   return VEC_index (iv_cand_p, data->iv_candidates, i);
 346 }
 347
 348 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 349
 350 edge
 351 single_dom_exit (struct loop *loop)
 352 {
 353   edge exit = single_exit (loop);
 354
 355   if (!exit)
 356     return NULL;
 357
 358   if (!just_once_each_iteration_p (loop, exit->src))
 359     return NULL;
 360
 361   return exit;
 362 }
 363
 364 /* Dumps information about the induction variable IV to FILE.  */
 365
 366 extern void dump_iv (FILE *, struct iv *);
 367 void
 368 dump_iv (FILE *file, struct iv *iv)
 369 {
 370   if (iv->ssa_name)
 371     {
 372       fprintf (file, "ssa name ");
 373       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 374       fprintf (file, "\n");
 375     }
 376
 377   fprintf (file, "  type ");
 378   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 379   fprintf (file, "\n");
 380
 381   if (iv->step)
 382     {
 383       fprintf (file, "  base ");
 384       print_generic_expr (file, iv->base, TDF_SLIM);
 385       fprintf (file, "\n");
 386
 387       fprintf (file, "  step ");
 388       print_generic_expr (file, iv->step, TDF_SLIM);
 389       fprintf (file, "\n");
 390     }
 391   else
 392     {
 393       fprintf (file, "  invariant ");
 394       print_generic_expr (file, iv->base, TDF_SLIM);
 395       fprintf (file, "\n");
 396     }
 397
 398   if (iv->base_object)
 399     {
 400       fprintf (file, "  base object ");
 401       print_generic_expr (file, iv->base_object, TDF_SLIM);
 402       fprintf (file, "\n");
 403     }
 404
 405   if (iv->biv_p)
 406     fprintf (file, "  is a biv\n");
 407 }
 408
 409 /* Dumps information about the USE to FILE.  */
 410
 411 extern void dump_use (FILE *, struct iv_use *);
 412 void
 413 dump_use (FILE *file, struct iv_use *use)
 414 {
 415   fprintf (file, "use %d\n", use->id);
 416
 417   switch (use->type)
 418     {
 419     case USE_NONLINEAR_EXPR:
 420       fprintf (file, "  generic\n");
 421       break;
 422
 423     case USE_ADDRESS:
 424       fprintf (file, "  address\n");
 425       break;
 426
 427     case USE_COMPARE:
 428       fprintf (file, "  compare\n");
 429       break;
 430
 431     default:
 432       gcc_unreachable ();
 433     }
 434
 435   fprintf (file, "  in statement ");
 436   print_generic_expr (file, use->stmt, TDF_SLIM);
 437   fprintf (file, "\n");
 438
 439   fprintf (file, "  at position ");
 440   if (use->op_p)
 441     print_generic_expr (file, *use->op_p, TDF_SLIM);
 442   fprintf (file, "\n");
 443
 444   dump_iv (file, use->iv);
 445
 446   if (use->related_cands)
 447     {
 448       fprintf (file, "  related candidates ");
 449       dump_bitmap (file, use->related_cands);
 450     }
 451 }
 452
 453 /* Dumps information about the uses to FILE.  */
 454
 455 extern void dump_uses (FILE *, struct ivopts_data *);
 456 void
 457 dump_uses (FILE *file, struct ivopts_data *data)
 458 {
 459   unsigned i;
 460   struct iv_use *use;
 461
 462   for (i = 0; i < n_iv_uses (data); i++)
 463     {
 464       use = iv_use (data, i);
 465
 466       dump_use (file, use);
 467       fprintf (file, "\n");
 468     }
 469 }
 470
 471 /* Dumps information about induction variable candidate CAND to FILE.  */
 472
 473 extern void dump_cand (FILE *, struct iv_cand *);
 474 void
 475 dump_cand (FILE *file, struct iv_cand *cand)
 476 {
 477   struct iv *iv = cand->iv;
 478
 479   fprintf (file, "candidate %d%s\n",
 480            cand->id, cand->important ? " (important)" : "");
 481
 482   if (cand->depends_on)
 483     {
 484       fprintf (file, "  depends on ");
 485       dump_bitmap (file, cand->depends_on);
 486     }
 487
 488   if (!iv)
 489     {
 490       fprintf (file, "  final value replacement\n");
 491       return;
 492     }
 493
 494   switch (cand->pos)
 495     {
 496     case IP_NORMAL:
 497       fprintf (file, "  incremented before exit test\n");
 498       break;
 499
 500     case IP_END:
 501       fprintf (file, "  incremented at end\n");
 502       break;
 503
 504     case IP_ORIGINAL:
 505       fprintf (file, "  original biv\n");
 506       break;
 507     }
 508
 509   dump_iv (file, iv);
 510 }
 511
 512 /* Returns the info for ssa version VER.  */
 513
 514 static inline struct version_info *
 515 ver_info (struct ivopts_data *data, unsigned ver)
 516 {
 517   return data->version_info + ver;
 518 }
 519
 520 /* Returns the info for ssa name NAME.  */
 521
 522 static inline struct version_info *
 523 name_info (struct ivopts_data *data, tree name)
 524 {
 525   return ver_info (data, SSA_NAME_VERSION (name));
 526 }
 527
 528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 529    emitted in LOOP.  */
 530
 531 static bool
 532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 533 {
 534   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 535
 536   gcc_assert (bb);
 537
 538   if (sbb == loop->latch)
 539     return true;
 540
 541   if (sbb != bb)
 542     return false;
 543
 544   return stmt == last_stmt (bb);
 545 }
 546
 547 /* Returns true if STMT if after the place where the original induction
 548    variable CAND is incremented.  */
 549
 550 static bool
 551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 552 {
 553   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 554   basic_block stmt_bb = bb_for_stmt (stmt);
 555   block_stmt_iterator bsi;
 556
 557   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 558     return false;
 559
 560   if (stmt_bb != cand_bb)
 561     return true;
 562
 563   /* Scan the block from the end, since the original ivs are usually
 564      incremented at the end of the loop body.  */
 565   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 566     {
 567       if (bsi_stmt (bsi) == cand->incremented_at)
 568         return false;
 569       if (bsi_stmt (bsi) == stmt)
 570         return true;
 571     }
 572 }
 573
 574 /* Returns true if STMT if after the place where the induction variable
 575    CAND is incremented in LOOP.  */
 576
 577 static bool
 578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 579 {
 580   switch (cand->pos)
 581     {
 582     case IP_END:
 583       return false;
 584
 585     case IP_NORMAL:
 586       return stmt_after_ip_normal_pos (loop, stmt);
 587
 588     case IP_ORIGINAL:
 589       return stmt_after_ip_original_pos (cand, stmt);
 590
 591     default:
 592       gcc_unreachable ();
 593     }
 594 }
 595
 596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 597
 598 static bool
 599 abnormal_ssa_name_p (tree exp)
 600 {
 601   if (!exp)
 602     return false;
 603
 604   if (TREE_CODE (exp) != SSA_NAME)
 605     return false;
 606
 607   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 608 }
 609
 610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 611    abnormal phi node.  Callback for for_each_index.  */
 612
 613 static bool
 614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 615                                   void *data ATTRIBUTE_UNUSED)
 616 {
 617   if (TREE_CODE (base) == ARRAY_REF)
 618     {
 619       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 620         return false;
 621       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 622         return false;
 623     }
 624
 625   return !abnormal_ssa_name_p (*index);
 626 }
 627
 628 /* Returns true if EXPR contains a ssa name that occurs in an
 629    abnormal phi node.  */
 630
 631 bool
 632 contains_abnormal_ssa_name_p (tree expr)
 633 {
 634   enum tree_code code;
 635   enum tree_code_class class;
 636
 637   if (!expr)
 638     return false;
 639
 640   code = TREE_CODE (expr);
 641   class = TREE_CODE_CLASS (code);
 642
 643   if (code == SSA_NAME)
 644     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 645
 646   if (code == INTEGER_CST
 647       || is_gimple_min_invariant (expr))
 648     return false;
 649
 650   if (code == ADDR_EXPR)
 651     return !for_each_index (&TREE_OPERAND (expr, 0),
 652                             idx_contains_abnormal_ssa_name_p,
 653                             NULL);
 654
 655   switch (class)
 656     {
 657     case tcc_binary:
 658     case tcc_comparison:
 659       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 660         return true;
 661
 662       /* Fallthru.  */
 663     case tcc_unary:
 664       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 665         return true;
 666
 667       break;
 668
 669     default:
 670       gcc_unreachable ();
 671     }
 672
 673   return false;
 674 }
 675
 676 /* Element of the table in that we cache the numbers of iterations obtained
 677    from exits of the loop.  */
 678
 679 struct nfe_cache_elt
 680 {
 681   /* The edge for that the number of iterations is cached.  */
 682   edge exit;
 683
 684   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 685      determined.  */
 686   tree niter;
 687 };
 688
 689 /* Hash function for nfe_cache_elt E.  */
 690
 691 static hashval_t
 692 nfe_hash (const void *e)
 693 {
 694   const struct nfe_cache_elt *elt = e;
 695
 696   return htab_hash_pointer (elt->exit);
 697 }
 698
 699 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 700
 701 static int
 702 nfe_eq (const void *e1, const void *e2)
 703 {
 704   const struct nfe_cache_elt *elt1 = e1;
 705
 706   return elt1->exit == e2;
 707 }
 708
 709 /*  Returns tree describing number of iterations determined from
 710     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 711
 712 static tree
 713 niter_for_exit (struct ivopts_data *data, edge exit)
 714 {
 715   struct nfe_cache_elt *nfe_desc;
 716   struct tree_niter_desc desc;
 717   PTR *slot;
 718
 719   slot = htab_find_slot_with_hash (data->niters, exit,
 720                                    htab_hash_pointer (exit),
 721                                    INSERT);
 722
 723   if (!*slot)
 724     {
 725       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 726       nfe_desc->exit = exit;
 727
 728       /* Try to determine number of iterations.  We must know it
 729          unconditionally (i.e., without possibility of # of iterations
 730          being zero).  Also, we cannot safely work with ssa names that
 731          appear in phi nodes on abnormal edges, so that we do not create
 732          overlapping life ranges for them (PR 27283).  */
 733       if (number_of_iterations_exit (data->current_loop,
 734                                      exit, &desc, true)
 735           && integer_zerop (desc.may_be_zero)
 736           && !contains_abnormal_ssa_name_p (desc.niter))
 737         nfe_desc->niter = desc.niter;
 738       else
 739         nfe_desc->niter = NULL_TREE;
 740     }
 741   else
 742     nfe_desc = *slot;
 743
 744   return nfe_desc->niter;
 745 }
 746
 747 /* Returns tree describing number of iterations determined from
 748    single dominating exit of DATA->current_loop, or NULL if something
 749    goes wrong.  */
 750
 751 static tree
 752 niter_for_single_dom_exit (struct ivopts_data *data)
 753 {
 754   edge exit = single_dom_exit (data->current_loop);
 755
 756   if (!exit)
 757     return NULL;
 758
 759   return niter_for_exit (data, exit);
 760 }
 761
 762 /* Initializes data structures used by the iv optimization pass, stored
 763    in DATA.  */
 764
 765 static void
 766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 767 {
 768   data->version_info_size = 2 * num_ssa_names;
 769   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 770   data->relevant = BITMAP_ALLOC (NULL);
 771   data->important_candidates = BITMAP_ALLOC (NULL);
 772   data->max_inv_id = 0;
 773   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 774   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 775   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 776   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 777 }
 778
 779 /* Returns a memory object to that EXPR points.  In case we are able to
 780    determine that it does not point to any such object, NULL is returned.  */
 781
 782 static tree
 783 determine_base_object (tree expr)
 784 {
 785   enum tree_code code = TREE_CODE (expr);
 786   tree base, obj, op0, op1;
 787
 788   /* If this is a pointer casted to any type, we need to determine
 789      the base object for the pointer; so handle conversions before
 790      throwing away non-pointer expressions.  */
 791   if (TREE_CODE (expr) == NOP_EXPR
 792       || TREE_CODE (expr) == CONVERT_EXPR)
 793     return determine_base_object (TREE_OPERAND (expr, 0));
 794
 795   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 796     return NULL_TREE;
 797
 798   switch (code)
 799     {
 800     case INTEGER_CST:
 801       return NULL_TREE;
 802
 803     case ADDR_EXPR:
 804       obj = TREE_OPERAND (expr, 0);
 805       base = get_base_address (obj);
 806
 807       if (!base)
 808         return expr;
 809
 810       if (TREE_CODE (base) == INDIRECT_REF)
 811         return determine_base_object (TREE_OPERAND (base, 0));
 812
 813       return fold_convert (ptr_type_node,
 814                            build_fold_addr_expr (base));
 815
 816     case PLUS_EXPR:
 817     case MINUS_EXPR:
 818       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 819       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 820
 821       if (!op1)
 822         return op0;
 823
 824       if (!op0)
 825         return (code == PLUS_EXPR
 826                 ? op1
 827                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 828
 829       return fold_build2 (code, ptr_type_node, op0, op1);
 830
 831     default:
 832       return fold_convert (ptr_type_node, expr);
 833     }
 834 }
 835
 836 /* Allocates an induction variable with given initial value BASE and step STEP
 837    for loop LOOP.  */
 838
 839 static struct iv *
 840 alloc_iv (tree base, tree step)
 841 {
 842   struct iv *iv = XCNEW (struct iv);
 843   gcc_assert (step != NULL_TREE);
 844
 845   iv->base = base;
 846   iv->base_object = determine_base_object (base);
 847   iv->step = step;
 848   iv->biv_p = false;
 849   iv->have_use_for = false;
 850   iv->use_id = 0;
 851   iv->ssa_name = NULL_TREE;
 852
 853   return iv;
 854 }
 855
 856 /* Sets STEP and BASE for induction variable IV.  */
 857
 858 static void
 859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 860 {
 861   struct version_info *info = name_info (data, iv);
 862
 863   gcc_assert (!info->iv);
 864
 865   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 866   info->iv = alloc_iv (base, step);
 867   info->iv->ssa_name = iv;
 868 }
 869
 870 /* Finds induction variable declaration for VAR.  */
 871
 872 static struct iv *
 873 get_iv (struct ivopts_data *data, tree var)
 874 {
 875   basic_block bb;
 876   tree type = TREE_TYPE (var);
 877
 878   if (!POINTER_TYPE_P (type)
 879       && !INTEGRAL_TYPE_P (type))
 880     return NULL;
 881
 882   if (!name_info (data, var)->iv)
 883     {
 884       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 885
 886       if (!bb
 887           || !flow_bb_inside_loop_p (data->current_loop, bb))
 888         set_iv (data, var, var, build_int_cst (type, 0));
 889     }
 890
 891   return name_info (data, var)->iv;
 892 }
 893
 894 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 895    not define a simple affine biv with nonzero step.  */
 896
 897 static tree
 898 determine_biv_step (tree phi)
 899 {
 900   struct loop *loop = bb_for_stmt (phi)->loop_father;
 901   tree name = PHI_RESULT (phi);
 902   affine_iv iv;
 903
 904   if (!is_gimple_reg (name))
 905     return NULL_TREE;
 906
 907   if (!simple_iv (loop, phi, name, &iv, true))
 908     return NULL_TREE;
 909
 910   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 911 }
 912
 913 /* Finds basic ivs.  */
 914
 915 static bool
 916 find_bivs (struct ivopts_data *data)
 917 {
 918   tree phi, step, type, base;
 919   bool found = false;
 920   struct loop *loop = data->current_loop;
 921
 922   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 923     {
 924       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 925         continue;
 926
 927       step = determine_biv_step (phi);
 928       if (!step)
 929         continue;
 930
 931       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 932       base = expand_simple_operations (base);
 933       if (contains_abnormal_ssa_name_p (base)
 934           || contains_abnormal_ssa_name_p (step))
 935         continue;
 936
 937       type = TREE_TYPE (PHI_RESULT (phi));
 938       base = fold_convert (type, base);
 939       if (step)
 940         step = fold_convert (type, step);
 941
 942       set_iv (data, PHI_RESULT (phi), base, step);
 943       found = true;
 944     }
 945
 946   return found;
 947 }
 948
 949 /* Marks basic ivs.  */
 950
 951 static void
 952 mark_bivs (struct ivopts_data *data)
 953 {
 954   tree phi, var;
 955   struct iv *iv, *incr_iv;
 956   struct loop *loop = data->current_loop;
 957   basic_block incr_bb;
 958
 959   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 960     {
 961       iv = get_iv (data, PHI_RESULT (phi));
 962       if (!iv)
 963         continue;
 964
 965       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 966       incr_iv = get_iv (data, var);
 967       if (!incr_iv)
 968         continue;
 969
 970       /* If the increment is in the subloop, ignore it.  */
 971       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 972       if (incr_bb->loop_father != data->current_loop
 973           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 974         continue;
 975
 976       iv->biv_p = true;
 977       incr_iv->biv_p = true;
 978     }
 979 }
 980
 981 /* Checks whether STMT defines a linear induction variable and stores its
 982    parameters to IV.  */
 983
 984 static bool
 985 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 986 {
 987   tree lhs;
 988   struct loop *loop = data->current_loop;
 989
 990   iv->base = NULL_TREE;
 991   iv->step = NULL_TREE;
 992
 993   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 994     return false;
 995
 996   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 997   if (TREE_CODE (lhs) != SSA_NAME)
 998     return false;
 999
1000   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
1001     return false;
1002   iv->base = expand_simple_operations (iv->base);
1003
1004   if (contains_abnormal_ssa_name_p (iv->base)
1005       || contains_abnormal_ssa_name_p (iv->step))
1006     return false;
1007
1008   return true;
1009 }
1010
1011 /* Finds general ivs in statement STMT.  */
1012
1013 static void
1014 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1015 {
1016   affine_iv iv;
1017
1018   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1019     return;
1020
1021   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1022 }
1023
1024 /* Finds general ivs in basic block BB.  */
1025
1026 static void
1027 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1028 {
1029   block_stmt_iterator bsi;
1030
1031   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1032     find_givs_in_stmt (data, bsi_stmt (bsi));
1033 }
1034
1035 /* Finds general ivs.  */
1036
1037 static void
1038 find_givs (struct ivopts_data *data)
1039 {
1040   struct loop *loop = data->current_loop;
1041   basic_block *body = get_loop_body_in_dom_order (loop);
1042   unsigned i;
1043
1044   for (i = 0; i < loop->num_nodes; i++)
1045     find_givs_in_bb (data, body[i]);
1046   free (body);
1047 }
1048
1049 /* For each ssa name defined in LOOP determines whether it is an induction
1050    variable and if so, its initial value and step.  */
1051
1052 static bool
1053 find_induction_variables (struct ivopts_data *data)
1054 {
1055   unsigned i;
1056   bitmap_iterator bi;
1057
1058   if (!find_bivs (data))
1059     return false;
1060
1061   find_givs (data);
1062   mark_bivs (data);
1063
1064   if (dump_file && (dump_flags & TDF_DETAILS))
1065     {
1066       tree niter = niter_for_single_dom_exit (data);
1067
1068       if (niter)
1069         {
1070           fprintf (dump_file, "  number of iterations ");
1071           print_generic_expr (dump_file, niter, TDF_SLIM);
1072           fprintf (dump_file, "\n\n");
1073         };
1074
1075       fprintf (dump_file, "Induction variables:\n\n");
1076
1077       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1078         {
1079           if (ver_info (data, i)->iv)
1080             dump_iv (dump_file, ver_info (data, i)->iv);
1081         }
1082     }
1083
1084   return true;
1085 }
1086
1087 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1088
1089 static struct iv_use *
1090 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1091             tree stmt, enum use_type use_type)
1092 {
1093   struct iv_use *use = XCNEW (struct iv_use);
1094
1095   use->id = n_iv_uses (data);
1096   use->type = use_type;
1097   use->iv = iv;
1098   use->stmt = stmt;
1099   use->op_p = use_p;
1100   use->related_cands = BITMAP_ALLOC (NULL);
1101
1102   /* To avoid showing ssa name in the dumps, if it was not reset by the
1103      caller.  */
1104   iv->ssa_name = NULL_TREE;
1105
1106   if (dump_file && (dump_flags & TDF_DETAILS))
1107     dump_use (dump_file, use);
1108
1109   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1110
1111   return use;
1112 }
1113
1114 /* Checks whether OP is a loop-level invariant and if so, records it.
1115    NONLINEAR_USE is true if the invariant is used in a way we do not
1116    handle specially.  */
1117
1118 static void
1119 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1120 {
1121   basic_block bb;
1122   struct version_info *info;
1123
1124   if (TREE_CODE (op) != SSA_NAME
1125       || !is_gimple_reg (op))
1126     return;
1127
1128   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1129   if (bb
1130       && flow_bb_inside_loop_p (data->current_loop, bb))
1131     return;
1132
1133   info = name_info (data, op);
1134   info->name = op;
1135   info->has_nonlin_use |= nonlinear_use;
1136   if (!info->inv_id)
1137     info->inv_id = ++data->max_inv_id;
1138   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1139 }
1140
1141 /* Checks whether the use OP is interesting and if so, records it.  */
1142
1143 static struct iv_use *
1144 find_interesting_uses_op (struct ivopts_data *data, tree op)
1145 {
1146   struct iv *iv;
1147   struct iv *civ;
1148   tree stmt;
1149   struct iv_use *use;
1150
1151   if (TREE_CODE (op) != SSA_NAME)
1152     return NULL;
1153
1154   iv = get_iv (data, op);
1155   if (!iv)
1156     return NULL;
1157
1158   if (iv->have_use_for)
1159     {
1160       use = iv_use (data, iv->use_id);
1161
1162       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1163       return use;
1164     }
1165
1166   if (integer_zerop (iv->step))
1167     {
1168       record_invariant (data, op, true);
1169       return NULL;
1170     }
1171   iv->have_use_for = true;
1172
1173   civ = XNEW (struct iv);
1174   *civ = *iv;
1175
1176   stmt = SSA_NAME_DEF_STMT (op);
1177   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1178               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1179
1180   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1181   iv->use_id = use->id;
1182
1183   return use;
1184 }
1185
1186 /* Given a condition *COND_P, checks whether it is a compare of an induction
1187    variable and an invariant.  If this is the case, CONTROL_VAR is set
1188    to location of the iv, BOUND to the location of the invariant,
1189    IV_VAR and IV_BOUND are set to the corresponding induction variable
1190    descriptions, and true is returned.  If this is not the case,
1191    CONTROL_VAR and BOUND are set to the arguments of the condition and
1192    false is returned.  */
1193
1194 static bool
1195 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1196                        tree **control_var, tree **bound,
1197                        struct iv **iv_var, struct iv **iv_bound)
1198 {
1199   /* The nodes returned when COND has just one operand.  Note that you should
1200      not modify anything in BOUND or IV_BOUND because of this.  */
1201   static struct iv const_iv;
1202   static tree zero;
1203   tree cond = *cond_p;
1204   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1205   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1206   bool ret = false;
1207
1208   zero = integer_zero_node;
1209   const_iv.step = integer_zero_node;
1210
1211   if (TREE_CODE (cond) == SSA_NAME)
1212     {
1213       op0 = cond_p;
1214       iv0 = get_iv (data, cond);
1215       ret = (iv0 && !integer_zerop (iv0->step));
1216       goto end;
1217     }
1218
1219   if (!COMPARISON_CLASS_P (cond))
1220     {
1221       op0 = cond_p;
1222       goto end;
1223     }
1224
1225   op0 = &TREE_OPERAND (cond, 0);
1226   op1 = &TREE_OPERAND (cond, 1);
1227   if (TREE_CODE (*op0) == SSA_NAME)
1228     iv0 = get_iv (data, *op0);
1229   if (TREE_CODE (*op1) == SSA_NAME)
1230     iv1 = get_iv (data, *op1);
1231
1232   /* Exactly one of the compared values must be an iv, and the other one must
1233      be an invariant.  */
1234   if (!iv0 || !iv1)
1235     goto end;
1236
1237   if (integer_zerop (iv0->step))
1238     {
1239       /* Control variable may be on the other side.  */
1240       tmp_op = op0; op0 = op1; op1 = tmp_op;
1241       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1242     }
1243   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1244
1245 end:
1246   if (control_var)
1247     *control_var = op0;;
1248   if (iv_var)
1249     *iv_var = iv0;;
1250   if (bound)
1251     *bound = op1;
1252   if (iv_bound)
1253     *iv_bound = iv1;
1254
1255   return ret;
1256 }
1257
1258 /* Checks whether the condition *COND_P in STMT is interesting
1259    and if so, records it.  */
1260
1261 static void
1262 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1263 {
1264   tree *var_p, *bound_p;
1265   struct iv *var_iv, *civ;
1266
1267   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1268     {
1269       find_interesting_uses_op (data, *var_p);
1270       find_interesting_uses_op (data, *bound_p);
1271       return;
1272     }
1273
1274   civ = XNEW (struct iv);
1275   *civ = *var_iv;
1276   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1277 }
1278
1279 /* Returns true if expression EXPR is obviously invariant in LOOP,
1280    i.e. if all its operands are defined outside of the LOOP.  */
1281
1282 bool
1283 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1284 {
1285   basic_block def_bb;
1286   unsigned i, len;
1287
1288   if (is_gimple_min_invariant (expr))
1289     return true;
1290
1291   if (TREE_CODE (expr) == SSA_NAME)
1292     {
1293       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1294       if (def_bb
1295           && flow_bb_inside_loop_p (loop, def_bb))
1296         return false;
1297
1298       return true;
1299     }
1300
1301   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1302     return false;
1303
1304   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1305   for (i = 0; i < len; i++)
1306     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1307       return false;
1308
1309   return true;
1310 }
1311
1312 /* Cumulates the steps of indices into DATA and replaces their values with the
1313    initial ones.  Returns false when the value of the index cannot be determined.
1314    Callback for for_each_index.  */
1315
1316 struct ifs_ivopts_data
1317 {
1318   struct ivopts_data *ivopts_data;
1319   tree stmt;
1320   tree step;
1321 };
1322
1323 static bool
1324 idx_find_step (tree base, tree *idx, void *data)
1325 {
1326   struct ifs_ivopts_data *dta = data;
1327   struct iv *iv;
1328   tree step, iv_base, iv_step, lbound, off;
1329   struct loop *loop = dta->ivopts_data->current_loop;
1330
1331   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1332       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1333     return false;
1334
1335   /* If base is a component ref, require that the offset of the reference
1336      be invariant.  */
1337   if (TREE_CODE (base) == COMPONENT_REF)
1338     {
1339       off = component_ref_field_offset (base);
1340       return expr_invariant_in_loop_p (loop, off);
1341     }
1342
1343   /* If base is array, first check whether we will be able to move the
1344      reference out of the loop (in order to take its address in strength
1345      reduction).  In order for this to work we need both lower bound
1346      and step to be loop invariants.  */
1347   if (TREE_CODE (base) == ARRAY_REF)
1348     {
1349       step = array_ref_element_size (base);
1350       lbound = array_ref_low_bound (base);
1351
1352       if (!expr_invariant_in_loop_p (loop, step)
1353           || !expr_invariant_in_loop_p (loop, lbound))
1354         return false;
1355     }
1356
1357   if (TREE_CODE (*idx) != SSA_NAME)
1358     return true;
1359
1360   iv = get_iv (dta->ivopts_data, *idx);
1361   if (!iv)
1362     return false;
1363
1364   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1365           *&x[0], which is not folded and does not trigger the
1366           ARRAY_REF path below.  */
1367   *idx = iv->base;
1368
1369   if (integer_zerop (iv->step))
1370     return true;
1371
1372   if (TREE_CODE (base) == ARRAY_REF)
1373     {
1374       step = array_ref_element_size (base);
1375
1376       /* We only handle addresses whose step is an integer constant.  */
1377       if (TREE_CODE (step) != INTEGER_CST)
1378         return false;
1379     }
1380   else
1381     /* The step for pointer arithmetics already is 1 byte.  */
1382     step = build_int_cst (sizetype, 1);
1383
1384   iv_base = iv->base;
1385   iv_step = iv->step;
1386   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1387                             sizetype, &iv_base, &iv_step, dta->stmt,
1388                             false))
1389     {
1390       /* The index might wrap.  */
1391       return false;
1392     }
1393
1394   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1395   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1396
1397   return true;
1398 }
1399
1400 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1401    object is passed to it in DATA.  */
1402
1403 static bool
1404 idx_record_use (tree base, tree *idx,
1405                 void *data)
1406 {
1407   find_interesting_uses_op (data, *idx);
1408   if (TREE_CODE (base) == ARRAY_REF)
1409     {
1410       find_interesting_uses_op (data, array_ref_element_size (base));
1411       find_interesting_uses_op (data, array_ref_low_bound (base));
1412     }
1413   return true;
1414 }
1415
1416 /* Returns true if memory reference REF may be unaligned.  */
1417
1418 static bool
1419 may_be_unaligned_p (tree ref)
1420 {
1421   tree base;
1422   tree base_type;
1423   HOST_WIDE_INT bitsize;
1424   HOST_WIDE_INT bitpos;
1425   tree toffset;
1426   enum machine_mode mode;
1427   int unsignedp, volatilep;
1428   unsigned base_align;
1429
1430   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1431      thus they are not misaligned.  */
1432   if (TREE_CODE (ref) == TARGET_MEM_REF)
1433     return false;
1434
1435   /* The test below is basically copy of what expr.c:normal_inner_ref
1436      does to check whether the object must be loaded by parts when
1437      STRICT_ALIGNMENT is true.  */
1438   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1439                               &unsignedp, &volatilep, true);
1440   base_type = TREE_TYPE (base);
1441   base_align = TYPE_ALIGN (base_type);
1442
1443   if (mode != BLKmode
1444       && (base_align < GET_MODE_ALIGNMENT (mode)
1445           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1446           || bitpos % BITS_PER_UNIT != 0))
1447     return true;
1448
1449   return false;
1450 }
1451
1452 /* Return true if EXPR may be non-addressable.   */
1453
1454 static bool
1455 may_be_nonaddressable_p (tree expr)
1456 {
1457   switch (TREE_CODE (expr))
1458     {
1459     case COMPONENT_REF:
1460       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1461              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1462
1463     case ARRAY_REF:
1464     case ARRAY_RANGE_REF:
1465       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1466
1467     case VIEW_CONVERT_EXPR:
1468       /* This kind of view-conversions may wrap non-addressable objects
1469          and make them look addressable.  After some processing the
1470          non-addressability may be uncovered again, causing ADDR_EXPRs
1471          of inappropriate objects to be built.  */
1472       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1473              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1474
1475     default:
1476       break;
1477     }
1478
1479   return false;
1480 }
1481
1482 /* Finds addresses in *OP_P inside STMT.  */
1483
1484 static void
1485 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1486 {
1487   tree base = *op_p, step = build_int_cst (sizetype, 0);
1488   struct iv *civ;
1489   struct ifs_ivopts_data ifs_ivopts_data;
1490
1491   /* Do not play with volatile memory references.  A bit too conservative,
1492      perhaps, but safe.  */
1493   if (stmt_ann (stmt)->has_volatile_ops)
1494     goto fail;
1495
1496   /* Ignore bitfields for now.  Not really something terribly complicated
1497      to handle.  TODO.  */
1498   if (TREE_CODE (base) == BIT_FIELD_REF)
1499     goto fail;
1500
1501   if (may_be_nonaddressable_p (base))
1502     goto fail;
1503
1504   if (STRICT_ALIGNMENT
1505       && may_be_unaligned_p (base))
1506     goto fail;
1507
1508   base = unshare_expr (base);
1509
1510   if (TREE_CODE (base) == TARGET_MEM_REF)
1511     {
1512       tree type = build_pointer_type (TREE_TYPE (base));
1513       tree astep;
1514
1515       if (TMR_BASE (base)
1516           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1517         {
1518           civ = get_iv (data, TMR_BASE (base));
1519           if (!civ)
1520             goto fail;
1521
1522           TMR_BASE (base) = civ->base;
1523           step = civ->step;
1524         }
1525       if (TMR_INDEX (base)
1526           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1527         {
1528           civ = get_iv (data, TMR_INDEX (base));
1529           if (!civ)
1530             goto fail;
1531
1532           TMR_INDEX (base) = civ->base;
1533           astep = civ->step;
1534
1535           if (astep)
1536             {
1537               if (TMR_STEP (base))
1538                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1539
1540               step = fold_build2 (PLUS_EXPR, type, step, astep);
1541             }
1542         }
1543
1544       if (integer_zerop (step))
1545         goto fail;
1546       base = tree_mem_ref_addr (type, base);
1547     }
1548   else
1549     {
1550       ifs_ivopts_data.ivopts_data = data;
1551       ifs_ivopts_data.stmt = stmt;
1552       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1553       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1554           || integer_zerop (ifs_ivopts_data.step))
1555         goto fail;
1556       step = ifs_ivopts_data.step;
1557
1558       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1559       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1560
1561       base = build_fold_addr_expr (base);
1562
1563       /* Substituting bases of IVs into the base expression might
1564          have caused folding opportunities.  */
1565       if (TREE_CODE (base) == ADDR_EXPR)
1566         {
1567           tree *ref = &TREE_OPERAND (base, 0);
1568           while (handled_component_p (*ref))
1569             ref = &TREE_OPERAND (*ref, 0);
1570           if (TREE_CODE (*ref) == INDIRECT_REF)
1571             *ref = fold_indirect_ref (*ref);
1572         }
1573     }
1574
1575   civ = alloc_iv (base, step);
1576   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1577   return;
1578
1579 fail:
1580   for_each_index (op_p, idx_record_use, data);
1581 }
1582
1583 /* Finds and records invariants used in STMT.  */
1584
1585 static void
1586 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1587 {
1588   ssa_op_iter iter;
1589   use_operand_p use_p;
1590   tree op;
1591
1592   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1593     {
1594       op = USE_FROM_PTR (use_p);
1595       record_invariant (data, op, false);
1596     }
1597 }
1598
1599 /* Finds interesting uses of induction variables in the statement STMT.  */
1600
1601 static void
1602 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1603 {
1604   struct iv *iv;
1605   tree op, lhs, rhs;
1606   ssa_op_iter iter;
1607   use_operand_p use_p;
1608
1609   find_invariants_stmt (data, stmt);
1610
1611   if (TREE_CODE (stmt) == COND_EXPR)
1612     {
1613       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1614       return;
1615     }
1616
1617   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1618     {
1619       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1620       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1621
1622       if (TREE_CODE (lhs) == SSA_NAME)
1623         {
1624           /* If the statement defines an induction variable, the uses are not
1625              interesting by themselves.  */
1626
1627           iv = get_iv (data, lhs);
1628
1629           if (iv && !integer_zerop (iv->step))
1630             return;
1631         }
1632
1633       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1634         {
1635         case tcc_comparison:
1636           find_interesting_uses_cond (data, stmt,
1637                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1638           return;
1639
1640         case tcc_reference:
1641           find_interesting_uses_address (data, stmt,
1642                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1643           if (REFERENCE_CLASS_P (lhs))
1644             find_interesting_uses_address (data, stmt,
1645                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1646           return;
1647
1648         default: ;
1649         }
1650
1651       if (REFERENCE_CLASS_P (lhs)
1652           && is_gimple_val (rhs))
1653         {
1654           find_interesting_uses_address (data, stmt,
1655                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1656           find_interesting_uses_op (data, rhs);
1657           return;
1658         }
1659
1660       /* TODO -- we should also handle address uses of type
1661
1662          memory = call (whatever);
1663
1664          and
1665
1666          call (memory).  */
1667     }
1668
1669   if (TREE_CODE (stmt) == PHI_NODE
1670       && bb_for_stmt (stmt) == data->current_loop->header)
1671     {
1672       lhs = PHI_RESULT (stmt);
1673       iv = get_iv (data, lhs);
1674
1675       if (iv && !integer_zerop (iv->step))
1676         return;
1677     }
1678
1679   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1680     {
1681       op = USE_FROM_PTR (use_p);
1682
1683       if (TREE_CODE (op) != SSA_NAME)
1684         continue;
1685
1686       iv = get_iv (data, op);
1687       if (!iv)
1688         continue;
1689
1690       find_interesting_uses_op (data, op);
1691     }
1692 }
1693
1694 /* Finds interesting uses of induction variables outside of loops
1695    on loop exit edge EXIT.  */
1696
1697 static void
1698 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1699 {
1700   tree phi, def;
1701
1702   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1703     {
1704       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1705       if (is_gimple_reg (def))
1706         find_interesting_uses_op (data, def);
1707     }
1708 }
1709
1710 /* Finds uses of the induction variables that are interesting.  */
1711
1712 static void
1713 find_interesting_uses (struct ivopts_data *data)
1714 {
1715   basic_block bb;
1716   block_stmt_iterator bsi;
1717   tree phi;
1718   basic_block *body = get_loop_body (data->current_loop);
1719   unsigned i;
1720   struct version_info *info;
1721   edge e;
1722
1723   if (dump_file && (dump_flags & TDF_DETAILS))
1724     fprintf (dump_file, "Uses:\n\n");
1725
1726   for (i = 0; i < data->current_loop->num_nodes; i++)
1727     {
1728       edge_iterator ei;
1729       bb = body[i];
1730
1731       FOR_EACH_EDGE (e, ei, bb->succs)
1732         if (e->dest != EXIT_BLOCK_PTR
1733             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1734           find_interesting_uses_outside (data, e);
1735
1736       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1737         find_interesting_uses_stmt (data, phi);
1738       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1739         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1740     }
1741
1742   if (dump_file && (dump_flags & TDF_DETAILS))
1743     {
1744       bitmap_iterator bi;
1745
1746       fprintf (dump_file, "\n");
1747
1748       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1749         {
1750           info = ver_info (data, i);
1751           if (info->inv_id)
1752             {
1753               fprintf (dump_file, "  ");
1754               print_generic_expr (dump_file, info->name, TDF_SLIM);
1755               fprintf (dump_file, " is invariant (%d)%s\n",
1756                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1757             }
1758         }
1759
1760       fprintf (dump_file, "\n");
1761     }
1762
1763   free (body);
1764 }
1765
1766 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1767    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1768    we are at the top-level of the processed address.  */
1769
1770 static tree
1771 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1772                 unsigned HOST_WIDE_INT *offset)
1773 {
1774   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1775   enum tree_code code;
1776   tree type, orig_type = TREE_TYPE (expr);
1777   unsigned HOST_WIDE_INT off0, off1, st;
1778   tree orig_expr = expr;
1779
1780   STRIP_NOPS (expr);
1781
1782   type = TREE_TYPE (expr);
1783   code = TREE_CODE (expr);
1784   *offset = 0;
1785
1786   switch (code)
1787     {
1788     case INTEGER_CST:
1789       if (!cst_and_fits_in_hwi (expr)
1790           || integer_zerop (expr))
1791         return orig_expr;
1792
1793       *offset = int_cst_value (expr);
1794       return build_int_cst (orig_type, 0);
1795
1796     case PLUS_EXPR:
1797     case MINUS_EXPR:
1798       op0 = TREE_OPERAND (expr, 0);
1799       op1 = TREE_OPERAND (expr, 1);
1800
1801       op0 = strip_offset_1 (op0, false, false, &off0);
1802       op1 = strip_offset_1 (op1, false, false, &off1);
1803
1804       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1805       if (op0 == TREE_OPERAND (expr, 0)
1806           && op1 == TREE_OPERAND (expr, 1))
1807         return orig_expr;
1808
1809       if (integer_zerop (op1))
1810         expr = op0;
1811       else if (integer_zerop (op0))
1812         {
1813           if (code == PLUS_EXPR)
1814             expr = op1;
1815           else
1816             expr = fold_build1 (NEGATE_EXPR, type, op1);
1817         }
1818       else
1819         expr = fold_build2 (code, type, op0, op1);
1820
1821       return fold_convert (orig_type, expr);
1822
1823     case ARRAY_REF:
1824       if (!inside_addr)
1825         return orig_expr;
1826
1827       step = array_ref_element_size (expr);
1828       if (!cst_and_fits_in_hwi (step))
1829         break;
1830
1831       st = int_cst_value (step);
1832       op1 = TREE_OPERAND (expr, 1);
1833       op1 = strip_offset_1 (op1, false, false, &off1);
1834       *offset = off1 * st;
1835
1836       if (top_compref
1837           && integer_zerop (op1))
1838         {
1839           /* Strip the component reference completely.  */
1840           op0 = TREE_OPERAND (expr, 0);
1841           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1842           *offset += off0;
1843           return op0;
1844         }
1845       break;
1846
1847     case COMPONENT_REF:
1848       if (!inside_addr)
1849         return orig_expr;
1850
1851       tmp = component_ref_field_offset (expr);
1852       if (top_compref
1853           && cst_and_fits_in_hwi (tmp))
1854         {
1855           /* Strip the component reference completely.  */
1856           op0 = TREE_OPERAND (expr, 0);
1857           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1858           *offset = off0 + int_cst_value (tmp);
1859           return op0;
1860         }
1861       break;
1862
1863     case ADDR_EXPR:
1864       op0 = TREE_OPERAND (expr, 0);
1865       op0 = strip_offset_1 (op0, true, true, &off0);
1866       *offset += off0;
1867
1868       if (op0 == TREE_OPERAND (expr, 0))
1869         return orig_expr;
1870
1871       expr = build_fold_addr_expr (op0);
1872       return fold_convert (orig_type, expr);
1873
1874     case INDIRECT_REF:
1875       inside_addr = false;
1876       break;
1877
1878     default:
1879       return orig_expr;
1880     }
1881
1882   /* Default handling of expressions for that we want to recurse into
1883      the first operand.  */
1884   op0 = TREE_OPERAND (expr, 0);
1885   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1886   *offset += off0;
1887
1888   if (op0 == TREE_OPERAND (expr, 0)
1889       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1890     return orig_expr;
1891
1892   expr = copy_node (expr);
1893   TREE_OPERAND (expr, 0) = op0;
1894   if (op1)
1895     TREE_OPERAND (expr, 1) = op1;
1896
1897   /* Inside address, we might strip the top level component references,
1898      thus changing type of the expression.  Handling of ADDR_EXPR
1899      will fix that.  */
1900   expr = fold_convert (orig_type, expr);
1901
1902   return expr;
1903 }
1904
1905 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1906
1907 static tree
1908 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1909 {
1910   return strip_offset_1 (expr, false, false, offset);
1911 }
1912
1913 /* Returns variant of TYPE that can be used as base for different uses.
1914    We return unsigned type with the same precision, which avoids problems
1915    with overflows.  */
1916
1917 static tree
1918 generic_type_for (tree type)
1919 {
1920   if (POINTER_TYPE_P (type))
1921     return unsigned_type_for (type);
1922
1923   if (TYPE_UNSIGNED (type))
1924     return type;
1925
1926   return unsigned_type_for (type);
1927 }
1928
1929 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1930    the bitmap to that we should store it.  */
1931
1932 static struct ivopts_data *fd_ivopts_data;
1933 static tree
1934 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1935 {
1936   bitmap *depends_on = data;
1937   struct version_info *info;
1938
1939   if (TREE_CODE (*expr_p) != SSA_NAME)
1940     return NULL_TREE;
1941   info = name_info (fd_ivopts_data, *expr_p);
1942
1943   if (!info->inv_id || info->has_nonlin_use)
1944     return NULL_TREE;
1945
1946   if (!*depends_on)
1947     *depends_on = BITMAP_ALLOC (NULL);
1948   bitmap_set_bit (*depends_on, info->inv_id);
1949
1950   return NULL_TREE;
1951 }
1952
1953 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1954    position to POS.  If USE is not NULL, the candidate is set as related to
1955    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1956    replacement of the final value of the iv by a direct computation.  */
1957
1958 static struct iv_cand *
1959 add_candidate_1 (struct ivopts_data *data,
1960                  tree base, tree step, bool important, enum iv_position pos,
1961                  struct iv_use *use, tree incremented_at)
1962 {
1963   unsigned i;
1964   struct iv_cand *cand = NULL;
1965   tree type, orig_type;
1966
1967   if (base)
1968     {
1969       orig_type = TREE_TYPE (base);
1970       type = generic_type_for (orig_type);
1971       if (type != orig_type)
1972         {
1973           base = fold_convert (type, base);
1974           step = fold_convert (type, step);
1975         }
1976     }
1977
1978   for (i = 0; i < n_iv_cands (data); i++)
1979     {
1980       cand = iv_cand (data, i);
1981
1982       if (cand->pos != pos)
1983         continue;
1984
1985       if (cand->incremented_at != incremented_at)
1986         continue;
1987
1988       if (!cand->iv)
1989         {
1990           if (!base && !step)
1991             break;
1992
1993           continue;
1994         }
1995
1996       if (!base && !step)
1997         continue;
1998
1999       if (operand_equal_p (base, cand->iv->base, 0)
2000           && operand_equal_p (step, cand->iv->step, 0))
2001         break;
2002     }
2003
2004   if (i == n_iv_cands (data))
2005     {
2006       cand = XCNEW (struct iv_cand);
2007       cand->id = i;
2008
2009       if (!base && !step)
2010         cand->iv = NULL;
2011       else
2012         cand->iv = alloc_iv (base, step);
2013
2014       cand->pos = pos;
2015       if (pos != IP_ORIGINAL && cand->iv)
2016         {
2017           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2018           cand->var_after = cand->var_before;
2019         }
2020       cand->important = important;
2021       cand->incremented_at = incremented_at;
2022       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2023
2024       if (step
2025           && TREE_CODE (step) != INTEGER_CST)
2026         {
2027           fd_ivopts_data = data;
2028           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2029         }
2030
2031       if (dump_file && (dump_flags & TDF_DETAILS))
2032         dump_cand (dump_file, cand);
2033     }
2034
2035   if (important && !cand->important)
2036     {
2037       cand->important = true;
2038       if (dump_file && (dump_flags & TDF_DETAILS))
2039         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2040     }
2041
2042   if (use)
2043     {
2044       bitmap_set_bit (use->related_cands, i);
2045       if (dump_file && (dump_flags & TDF_DETAILS))
2046         fprintf (dump_file, "Candidate %d is related to use %d\n",
2047                  cand->id, use->id);
2048     }
2049
2050   return cand;
2051 }
2052
2053 /* Returns true if incrementing the induction variable at the end of the LOOP
2054    is allowed.
2055
2056    The purpose is to avoid splitting latch edge with a biv increment, thus
2057    creating a jump, possibly confusing other optimization passes and leaving
2058    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2059    is not available (so we do not have a better alternative), or if the latch
2060    edge is already nonempty.  */
2061
2062 static bool
2063 allow_ip_end_pos_p (struct loop *loop)
2064 {
2065   if (!ip_normal_pos (loop))
2066     return true;
2067
2068   if (!empty_block_p (ip_end_pos (loop)))
2069     return true;
2070
2071   return false;
2072 }
2073
2074 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2075    position to POS.  If USE is not NULL, the candidate is set as related to
2076    it.  The candidate computation is scheduled on all available positions.  */
2077
2078 static void
2079 add_candidate (struct ivopts_data *data,
2080                tree base, tree step, bool important, struct iv_use *use)
2081 {
2082   if (ip_normal_pos (data->current_loop))
2083     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2084   if (ip_end_pos (data->current_loop)
2085       && allow_ip_end_pos_p (data->current_loop))
2086     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2087 }
2088
2089 /* Add a standard "0 + 1 * iteration" iv candidate for a
2090    type with SIZE bits.  */
2091
2092 static void
2093 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2094                                      unsigned int size)
2095 {
2096   tree type = lang_hooks.types.type_for_size (size, true);
2097   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2098                  true, NULL);
2099 }
2100
2101 /* Adds standard iv candidates.  */
2102
2103 static void
2104 add_standard_iv_candidates (struct ivopts_data *data)
2105 {
2106   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2107
2108   /* The same for a double-integer type if it is still fast enough.  */
2109   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2110     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2111 }
2112
2113
2114 /* Adds candidates bases on the old induction variable IV.  */
2115
2116 static void
2117 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2118 {
2119   tree phi, def;
2120   struct iv_cand *cand;
2121
2122   add_candidate (data, iv->base, iv->step, true, NULL);
2123
2124   /* The same, but with initial value zero.  */
2125   add_candidate (data,
2126                  build_int_cst (TREE_TYPE (iv->base), 0),
2127                  iv->step, true, NULL);
2128
2129   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2130   if (TREE_CODE (phi) == PHI_NODE)
2131     {
2132       /* Additionally record the possibility of leaving the original iv
2133          untouched.  */
2134       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2135       cand = add_candidate_1 (data,
2136                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2137                               SSA_NAME_DEF_STMT (def));
2138       cand->var_before = iv->ssa_name;
2139       cand->var_after = def;
2140     }
2141 }
2142
2143 /* Adds candidates based on the old induction variables.  */
2144
2145 static void
2146 add_old_ivs_candidates (struct ivopts_data *data)
2147 {
2148   unsigned i;
2149   struct iv *iv;
2150   bitmap_iterator bi;
2151
2152   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2153     {
2154       iv = ver_info (data, i)->iv;
2155       if (iv && iv->biv_p && !integer_zerop (iv->step))
2156         add_old_iv_candidates (data, iv);
2157     }
2158 }
2159
2160 /* Adds candidates based on the value of the induction variable IV and USE.  */
2161
2162 static void
2163 add_iv_value_candidates (struct ivopts_data *data,
2164                          struct iv *iv, struct iv_use *use)
2165 {
2166   unsigned HOST_WIDE_INT offset;
2167   tree base;
2168
2169   add_candidate (data, iv->base, iv->step, false, use);
2170
2171   /* The same, but with initial value zero.  Make such variable important,
2172      since it is generic enough so that possibly many uses may be based
2173      on it.  */
2174   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2175                  iv->step, true, use);
2176
2177   /* Third, try removing the constant offset.  */
2178   base = strip_offset (iv->base, &offset);
2179   if (offset)
2180     add_candidate (data, base, iv->step, false, use);
2181 }
2182
2183 /* Adds candidates based on the uses.  */
2184
2185 static void
2186 add_derived_ivs_candidates (struct ivopts_data *data)
2187 {
2188   unsigned i;
2189
2190   for (i = 0; i < n_iv_uses (data); i++)
2191     {
2192       struct iv_use *use = iv_use (data, i);
2193
2194       if (!use)
2195         continue;
2196
2197       switch (use->type)
2198         {
2199         case USE_NONLINEAR_EXPR:
2200         case USE_COMPARE:
2201         case USE_ADDRESS:
2202           /* Just add the ivs based on the value of the iv used here.  */
2203           add_iv_value_candidates (data, use->iv, use);
2204           break;
2205
2206         default:
2207           gcc_unreachable ();
2208         }
2209     }
2210 }
2211
2212 /* Record important candidates and add them to related_cands bitmaps
2213    if needed.  */
2214
2215 static void
2216 record_important_candidates (struct ivopts_data *data)
2217 {
2218   unsigned i;
2219   struct iv_use *use;
2220
2221   for (i = 0; i < n_iv_cands (data); i++)
2222     {
2223       struct iv_cand *cand = iv_cand (data, i);
2224
2225       if (cand->important)
2226         bitmap_set_bit (data->important_candidates, i);
2227     }
2228
2229   data->consider_all_candidates = (n_iv_cands (data)
2230                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2231
2232   if (data->consider_all_candidates)
2233     {
2234       /* We will not need "related_cands" bitmaps in this case,
2235          so release them to decrease peak memory consumption.  */
2236       for (i = 0; i < n_iv_uses (data); i++)
2237         {
2238           use = iv_use (data, i);
2239           BITMAP_FREE (use->related_cands);
2240         }
2241     }
2242   else
2243     {
2244       /* Add important candidates to the related_cands bitmaps.  */
2245       for (i = 0; i < n_iv_uses (data); i++)
2246         bitmap_ior_into (iv_use (data, i)->related_cands,
2247                          data->important_candidates);
2248     }
2249 }
2250
2251 /* Finds the candidates for the induction variables.  */
2252
2253 static void
2254 find_iv_candidates (struct ivopts_data *data)
2255 {
2256   /* Add commonly used ivs.  */
2257   add_standard_iv_candidates (data);
2258
2259   /* Add old induction variables.  */
2260   add_old_ivs_candidates (data);
2261
2262   /* Add induction variables derived from uses.  */
2263   add_derived_ivs_candidates (data);
2264
2265   /* Record the important candidates.  */
2266   record_important_candidates (data);
2267 }
2268
2269 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2270    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2271    we allocate a simple list to every use.  */
2272
2273 static void
2274 alloc_use_cost_map (struct ivopts_data *data)
2275 {
2276   unsigned i, size, s, j;
2277
2278   for (i = 0; i < n_iv_uses (data); i++)
2279     {
2280       struct iv_use *use = iv_use (data, i);
2281       bitmap_iterator bi;
2282
2283       if (data->consider_all_candidates)
2284         size = n_iv_cands (data);
2285       else
2286         {
2287           s = 0;
2288           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2289             {
2290               s++;
2291             }
2292
2293           /* Round up to the power of two, so that moduling by it is fast.  */
2294           for (size = 1; size < s; size <<= 1)
2295             continue;
2296         }
2297
2298       use->n_map_members = size;
2299       use->cost_map = XCNEWVEC (struct cost_pair, size);
2300     }
2301 }
2302
2303 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2304    on invariants DEPENDS_ON and that the value used in expressing it
2305    is VALUE.*/
2306
2307 static void
2308 set_use_iv_cost (struct ivopts_data *data,
2309                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2310                  bitmap depends_on, tree value)
2311 {
2312   unsigned i, s;
2313
2314   if (cost == INFTY)
2315     {
2316       BITMAP_FREE (depends_on);
2317       return;
2318     }
2319
2320   if (data->consider_all_candidates)
2321     {
2322       use->cost_map[cand->id].cand = cand;
2323       use->cost_map[cand->id].cost = cost;
2324       use->cost_map[cand->id].depends_on = depends_on;
2325       use->cost_map[cand->id].value = value;
2326       return;
2327     }
2328
2329   /* n_map_members is a power of two, so this computes modulo.  */
2330   s = cand->id & (use->n_map_members - 1);
2331   for (i = s; i < use->n_map_members; i++)
2332     if (!use->cost_map[i].cand)
2333       goto found;
2334   for (i = 0; i < s; i++)
2335     if (!use->cost_map[i].cand)
2336       goto found;
2337
2338   gcc_unreachable ();
2339
2340 found:
2341   use->cost_map[i].cand = cand;
2342   use->cost_map[i].cost = cost;
2343   use->cost_map[i].depends_on = depends_on;
2344   use->cost_map[i].value = value;
2345 }
2346
2347 /* Gets cost of (USE, CANDIDATE) pair.  */
2348
2349 static struct cost_pair *
2350 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2351                  struct iv_cand *cand)
2352 {
2353   unsigned i, s;
2354   struct cost_pair *ret;
2355
2356   if (!cand)
2357     return NULL;
2358
2359   if (data->consider_all_candidates)
2360     {
2361       ret = use->cost_map + cand->id;
2362       if (!ret->cand)
2363         return NULL;
2364
2365       return ret;
2366     }
2367
2368   /* n_map_members is a power of two, so this computes modulo.  */
2369   s = cand->id & (use->n_map_members - 1);
2370   for (i = s; i < use->n_map_members; i++)
2371     if (use->cost_map[i].cand == cand)
2372       return use->cost_map + i;
2373
2374   for (i = 0; i < s; i++)
2375     if (use->cost_map[i].cand == cand)
2376       return use->cost_map + i;
2377
2378   return NULL;
2379 }
2380
2381 /* Returns estimate on cost of computing SEQ.  */
2382
2383 static unsigned
2384 seq_cost (rtx seq)
2385 {
2386   unsigned cost = 0;
2387   rtx set;
2388
2389   for (; seq; seq = NEXT_INSN (seq))
2390     {
2391       set = single_set (seq);
2392       if (set)
2393         cost += rtx_cost (set, SET);
2394       else
2395         cost++;
2396     }
2397
2398   return cost;
2399 }
2400
2401 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2402 static rtx
2403 produce_memory_decl_rtl (tree obj, int *regno)
2404 {
2405   rtx x;
2406
2407   gcc_assert (obj);
2408   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2409     {
2410       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2411       x = gen_rtx_SYMBOL_REF (Pmode, name);
2412     }
2413   else
2414     x = gen_raw_REG (Pmode, (*regno)++);
2415
2416   return gen_rtx_MEM (DECL_MODE (obj), x);
2417 }
2418
2419 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2420    walk_tree.  DATA contains the actual fake register number.  */
2421
2422 static tree
2423 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2424 {
2425   tree obj = NULL_TREE;
2426   rtx x = NULL_RTX;
2427   int *regno = data;
2428
2429   switch (TREE_CODE (*expr_p))
2430     {
2431     case ADDR_EXPR:
2432       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2433            handled_component_p (*expr_p);
2434            expr_p = &TREE_OPERAND (*expr_p, 0))
2435         continue;
2436       obj = *expr_p;
2437       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2438         x = produce_memory_decl_rtl (obj, regno);
2439       break;
2440
2441     case SSA_NAME:
2442       *ws = 0;
2443       obj = SSA_NAME_VAR (*expr_p);
2444       if (!DECL_RTL_SET_P (obj))
2445         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2446       break;
2447
2448     case VAR_DECL:
2449     case PARM_DECL:
2450     case RESULT_DECL:
2451       *ws = 0;
2452       obj = *expr_p;
2453
2454       if (DECL_RTL_SET_P (obj))
2455         break;
2456
2457       if (DECL_MODE (obj) == BLKmode)
2458         x = produce_memory_decl_rtl (obj, regno);
2459       else
2460         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2461
2462       break;
2463
2464     default:
2465       break;
2466     }
2467
2468   if (x)
2469     {
2470       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2471       SET_DECL_RTL (obj, x);
2472     }
2473
2474   return NULL_TREE;
2475 }
2476
2477 /* Determines cost of the computation of EXPR.  */
2478
2479 static unsigned
2480 computation_cost (tree expr)
2481 {
2482   rtx seq, rslt;
2483   tree type = TREE_TYPE (expr);
2484   unsigned cost;
2485   /* Avoid using hard regs in ways which may be unsupported.  */
2486   int regno = LAST_VIRTUAL_REGISTER + 1;
2487
2488   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2489   start_sequence ();
2490   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2491   seq = get_insns ();
2492   end_sequence ();
2493
2494   cost = seq_cost (seq);
2495   if (MEM_P (rslt))
2496     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2497
2498   return cost;
2499 }
2500
2501 /* Returns variable containing the value of candidate CAND at statement AT.  */
2502
2503 static tree
2504 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2505 {
2506   if (stmt_after_increment (loop, cand, stmt))
2507     return cand->var_after;
2508   else
2509     return cand->var_before;
2510 }
2511
2512 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2513    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2514
2515 int
2516 tree_int_cst_sign_bit (tree t)
2517 {
2518   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2519   unsigned HOST_WIDE_INT w;
2520
2521   if (bitno < HOST_BITS_PER_WIDE_INT)
2522     w = TREE_INT_CST_LOW (t);
2523   else
2524     {
2525       w = TREE_INT_CST_HIGH (t);
2526       bitno -= HOST_BITS_PER_WIDE_INT;
2527     }
2528
2529   return (w >> bitno) & 1;
2530 }
2531
2532 /* If we can prove that TOP = cst * BOT for some constant cst,
2533    store cst to MUL and return true.  Otherwise return false.
2534    The returned value is always sign-extended, regardless of the
2535    signedness of TOP and BOT.  */
2536
2537 static bool
2538 constant_multiple_of (tree top, tree bot, double_int *mul)
2539 {
2540   tree mby;
2541   enum tree_code code;
2542   double_int res, p0, p1;
2543   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2544
2545   STRIP_NOPS (top);
2546   STRIP_NOPS (bot);
2547
2548   if (operand_equal_p (top, bot, 0))
2549     {
2550       *mul = double_int_one;
2551       return true;
2552     }
2553
2554   code = TREE_CODE (top);
2555   switch (code)
2556     {
2557     case MULT_EXPR:
2558       mby = TREE_OPERAND (top, 1);
2559       if (TREE_CODE (mby) != INTEGER_CST)
2560         return false;
2561
2562       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2563         return false;
2564
2565       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2566                               precision);
2567       return true;
2568
2569     case PLUS_EXPR:
2570     case MINUS_EXPR:
2571       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2572           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2573         return false;
2574
2575       if (code == MINUS_EXPR)
2576         p1 = double_int_neg (p1);
2577       *mul = double_int_sext (double_int_add (p0, p1), precision);
2578       return true;
2579
2580     case INTEGER_CST:
2581       if (TREE_CODE (bot) != INTEGER_CST)
2582         return false;
2583
2584       p0 = double_int_sext (tree_to_double_int (top), precision);
2585       p1 = double_int_sext (tree_to_double_int (bot), precision);
2586       if (double_int_zero_p (p1))
2587         return false;
2588       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2589                               precision);
2590       return double_int_zero_p (res);
2591
2592     default:
2593       return false;
2594     }
2595 }
2596
2597 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2598    same precision that is at least as wide as the precision of TYPE, stores
2599    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2600    type of A and B.  */
2601
2602 static tree
2603 determine_common_wider_type (tree *a, tree *b)
2604 {
2605   tree wider_type = NULL;
2606   tree suba, subb;
2607   tree atype = TREE_TYPE (*a);
2608
2609   if ((TREE_CODE (*a) == NOP_EXPR
2610        || TREE_CODE (*a) == CONVERT_EXPR))
2611     {
2612       suba = TREE_OPERAND (*a, 0);
2613       wider_type = TREE_TYPE (suba);
2614       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2615         return atype;
2616     }
2617   else
2618     return atype;
2619
2620   if ((TREE_CODE (*b) == NOP_EXPR
2621        || TREE_CODE (*b) == CONVERT_EXPR))
2622     {
2623       subb = TREE_OPERAND (*b, 0);
2624       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2625         return atype;
2626     }
2627   else
2628     return atype;
2629
2630   *a = suba;
2631   *b = subb;
2632   return wider_type;
2633 }
2634
2635 /* Determines the expression by that USE is expressed from induction variable
2636    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2637    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2638
2639 static bool
2640 get_computation_aff (struct loop *loop,
2641                      struct iv_use *use, struct iv_cand *cand, tree at,
2642                      struct affine_tree_combination *aff)
2643 {
2644   tree ubase = use->iv->base;
2645   tree ustep = use->iv->step;
2646   tree cbase = cand->iv->base;
2647   tree cstep = cand->iv->step, cstep_common;
2648   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2649   tree common_type, var;
2650   tree uutype;
2651   aff_tree cbase_aff, var_aff;
2652   double_int rat;
2653
2654   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2655     {
2656       /* We do not have a precision to express the values of use.  */
2657       return false;
2658     }
2659
2660   var = var_at_stmt (loop, cand, at);
2661   uutype = unsigned_type_for (utype);
2662
2663   /* If the conversion is not noop, perform it.  */
2664   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2665     {
2666       cstep = fold_convert (uutype, cstep);
2667       cbase = fold_convert (uutype, cbase);
2668       var = fold_convert (uutype, var);
2669     }
2670
2671   if (!constant_multiple_of (ustep, cstep, &rat))
2672     return false;
2673
2674   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2675      type, we achieve better folding by computing their difference in this
2676      wider type, and cast the result to UUTYPE.  We do not need to worry about
2677      overflows, as all the arithmetics will in the end be performed in UUTYPE
2678      anyway.  */
2679   common_type = determine_common_wider_type (&ubase, &cbase);
2680
2681   /* use = ubase - ratio * cbase + ratio * var.  */
2682   tree_to_aff_combination (ubase, common_type, aff);
2683   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2684   tree_to_aff_combination (var, uutype, &var_aff);
2685
2686   /* We need to shift the value if we are after the increment.  */
2687   if (stmt_after_increment (loop, cand, at))
2688     {
2689       aff_tree cstep_aff;
2690
2691       if (common_type != uutype)
2692         cstep_common = fold_convert (common_type, cstep);
2693       else
2694         cstep_common = cstep;
2695
2696       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2697       aff_combination_add (&cbase_aff, &cstep_aff);
2698     }
2699
2700   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2701   aff_combination_add (aff, &cbase_aff);
2702   if (common_type != uutype)
2703     aff_combination_convert (aff, uutype);
2704
2705   aff_combination_scale (&var_aff, rat);
2706   aff_combination_add (aff, &var_aff);
2707
2708   return true;
2709 }
2710
2711 /* Determines the expression by that USE is expressed from induction variable
2712    CAND at statement AT in LOOP.  The computation is unshared.  */
2713
2714 static tree
2715 get_computation_at (struct loop *loop,
2716                     struct iv_use *use, struct iv_cand *cand, tree at)
2717 {
2718   aff_tree aff;
2719   tree type = TREE_TYPE (use->iv->base);
2720
2721   if (!get_computation_aff (loop, use, cand, at, &aff))
2722     return NULL_TREE;
2723   unshare_aff_combination (&aff);
2724   return fold_convert (type, aff_combination_to_tree (&aff));
2725 }
2726
2727 /* Determines the expression by that USE is expressed from induction variable
2728    CAND in LOOP.  The computation is unshared.  */
2729
2730 static tree
2731 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2732 {
2733   return get_computation_at (loop, use, cand, use->stmt);
2734 }
2735
2736 /* Returns cost of addition in MODE.  */
2737
2738 static unsigned
2739 add_cost (enum machine_mode mode)
2740 {
2741   static unsigned costs[NUM_MACHINE_MODES];
2742   rtx seq;
2743   unsigned cost;
2744
2745   if (costs[mode])
2746     return costs[mode];
2747
2748   start_sequence ();
2749   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2750                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2751                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2752                  NULL_RTX);
2753   seq = get_insns ();
2754   end_sequence ();
2755
2756   cost = seq_cost (seq);
2757   if (!cost)
2758     cost = 1;
2759
2760   costs[mode] = cost;
2761
2762   if (dump_file && (dump_flags & TDF_DETAILS))
2763     fprintf (dump_file, "Addition in %s costs %d\n",
2764              GET_MODE_NAME (mode), cost);
2765   return cost;
2766 }
2767
2768 /* Entry in a hashtable of already known costs for multiplication.  */
2769 struct mbc_entry
2770 {
2771   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2772   enum machine_mode mode;       /* In mode.  */
2773   unsigned cost;                /* The cost.  */
2774 };
2775
2776 /* Counts hash value for the ENTRY.  */
2777
2778 static hashval_t
2779 mbc_entry_hash (const void *entry)
2780 {
2781   const struct mbc_entry *e = entry;
2782
2783   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2784 }
2785
2786 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2787
2788 static int
2789 mbc_entry_eq (const void *entry1, const void *entry2)
2790 {
2791   const struct mbc_entry *e1 = entry1;
2792   const struct mbc_entry *e2 = entry2;
2793
2794   return (e1->mode == e2->mode
2795           && e1->cst == e2->cst);
2796 }
2797
2798 /* Returns cost of multiplication by constant CST in MODE.  */
2799
2800 unsigned
2801 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2802 {
2803   static htab_t costs;
2804   struct mbc_entry **cached, act;
2805   rtx seq;
2806   unsigned cost;
2807
2808   if (!costs)
2809     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2810
2811   act.mode = mode;
2812   act.cst = cst;
2813   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2814   if (*cached)
2815     return (*cached)->cost;
2816
2817   *cached = XNEW (struct mbc_entry);
2818   (*cached)->mode = mode;
2819   (*cached)->cst = cst;
2820
2821   start_sequence ();
2822   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2823                gen_int_mode (cst, mode), NULL_RTX, 0);
2824   seq = get_insns ();
2825   end_sequence ();
2826
2827   cost = seq_cost (seq);
2828
2829   if (dump_file && (dump_flags & TDF_DETAILS))
2830     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2831              (int) cst, GET_MODE_NAME (mode), cost);
2832
2833   (*cached)->cost = cost;
2834
2835   return cost;
2836 }
2837
2838 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2839    validity for a memory reference accessing memory of mode MODE.  */
2840
2841 bool
2842 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2843 {
2844 #define MAX_RATIO 128
2845   static sbitmap valid_mult[MAX_MACHINE_MODE];
2846
2847   if (!valid_mult[mode])
2848     {
2849       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2850       rtx addr;
2851       HOST_WIDE_INT i;
2852
2853       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2854       sbitmap_zero (valid_mult[mode]);
2855       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2856       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2857         {
2858           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2859           if (memory_address_p (mode, addr))
2860             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2861         }
2862
2863       if (dump_file && (dump_flags & TDF_DETAILS))
2864         {
2865           fprintf (dump_file, "  allowed multipliers:");
2866           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2867             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2868               fprintf (dump_file, " %d", (int) i);
2869           fprintf (dump_file, "\n");
2870           fprintf (dump_file, "\n");
2871         }
2872     }
2873
2874   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2875     return false;
2876
2877   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2878 }
2879
2880 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2881    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2882    variable is omitted.  Compute the cost for a memory reference that accesses
2883    a memory location of mode MEM_MODE.
2884
2885    TODO -- there must be some better way.  This all is quite crude.  */
2886
2887 static unsigned
2888 get_address_cost (bool symbol_present, bool var_present,
2889                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2890                   enum machine_mode mem_mode)
2891 {
2892   static bool initialized[MAX_MACHINE_MODE];
2893   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2894   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2895   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2896   unsigned cost, acost;
2897   bool offset_p, ratio_p;
2898   HOST_WIDE_INT s_offset;
2899   unsigned HOST_WIDE_INT mask;
2900   unsigned bits;
2901
2902   if (!initialized[mem_mode])
2903     {
2904       HOST_WIDE_INT i;
2905       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2906       int old_cse_not_expected;
2907       unsigned sym_p, var_p, off_p, rat_p, add_c;
2908       rtx seq, addr, base;
2909       rtx reg0, reg1;
2910
2911       initialized[mem_mode] = true;
2912
2913       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2914
2915       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2916       for (i = start; i <= 1 << 20; i <<= 1)
2917         {
2918           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2919           if (!memory_address_p (mem_mode, addr))
2920             break;
2921         }
2922       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2923       off[mem_mode] = max_offset[mem_mode];
2924
2925       for (i = start; i <= 1 << 20; i <<= 1)
2926         {
2927           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2928           if (!memory_address_p (mem_mode, addr))
2929             break;
2930         }
2931       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2932
2933       if (dump_file && (dump_flags & TDF_DETAILS))
2934         {
2935           fprintf (dump_file, "get_address_cost:\n");
2936           fprintf (dump_file, "  min offset %s %d\n",
2937                    GET_MODE_NAME (mem_mode),
2938                    (int) min_offset[mem_mode]);
2939           fprintf (dump_file, "  max offset %s %d\n",
2940                    GET_MODE_NAME (mem_mode),
2941                    (int) max_offset[mem_mode]);
2942         }
2943
2944       rat[mem_mode] = 1;
2945       for (i = 2; i <= MAX_RATIO; i++)
2946         if (multiplier_allowed_in_address_p (i, mem_mode))
2947           {
2948             rat[mem_mode] = i;
2949             break;
2950           }
2951
2952       /* Compute the cost of various addressing modes.  */
2953       acost = 0;
2954       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2955       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2956
2957       for (i = 0; i < 16; i++)
2958         {
2959           sym_p = i & 1;
2960           var_p = (i >> 1) & 1;
2961           off_p = (i >> 2) & 1;
2962           rat_p = (i >> 3) & 1;
2963
2964           addr = reg0;
2965           if (rat_p)
2966             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2967                                    gen_int_mode (rat[mem_mode], Pmode));
2968
2969           if (var_p)
2970             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2971
2972           if (sym_p)
2973             {
2974               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2975               if (off_p)
2976                 base = gen_rtx_fmt_e (CONST, Pmode,
2977                                       gen_rtx_fmt_ee (PLUS, Pmode,
2978                                                       base,
2979                                                       gen_int_mode (off[mem_mode],
2980                                                                     Pmode)));
2981             }
2982           else if (off_p)
2983             base = gen_int_mode (off[mem_mode], Pmode);
2984           else
2985             base = NULL_RTX;
2986
2987           if (base)
2988             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2989
2990           start_sequence ();
2991           /* To avoid splitting addressing modes, pretend that no cse will
2992              follow.  */
2993           old_cse_not_expected = cse_not_expected;
2994           cse_not_expected = true;
2995           addr = memory_address (mem_mode, addr);
2996           cse_not_expected = old_cse_not_expected;
2997           seq = get_insns ();
2998           end_sequence ();
2999
3000           acost = seq_cost (seq);
3001           acost += address_cost (addr, mem_mode);
3002
3003           if (!acost)
3004             acost = 1;
3005           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3006         }
3007
3008       /* On some targets, it is quite expensive to load symbol to a register,
3009          which makes addresses that contain symbols look much more expensive.
3010          However, the symbol will have to be loaded in any case before the
3011          loop (and quite likely we have it in register already), so it does not
3012          make much sense to penalize them too heavily.  So make some final
3013          tweaks for the SYMBOL_PRESENT modes:
3014
3015          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3016          var is cheaper, use this mode with small penalty.
3017          If VAR_PRESENT is true, try whether the mode with
3018          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3019          if this is the case, use it.  */
3020       add_c = add_cost (Pmode);
3021       for (i = 0; i < 8; i++)
3022         {
3023           var_p = i & 1;
3024           off_p = (i >> 1) & 1;
3025           rat_p = (i >> 2) & 1;
3026
3027           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3028           if (var_p)
3029             acost += add_c;
3030
3031           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3032             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3033         }
3034
3035       if (dump_file && (dump_flags & TDF_DETAILS))
3036         {
3037           fprintf (dump_file, "Address costs:\n");
3038
3039           for (i = 0; i < 16; i++)
3040             {
3041               sym_p = i & 1;
3042               var_p = (i >> 1) & 1;
3043               off_p = (i >> 2) & 1;
3044               rat_p = (i >> 3) & 1;
3045
3046               fprintf (dump_file, "  ");
3047               if (sym_p)
3048                 fprintf (dump_file, "sym + ");
3049               if (var_p)
3050                 fprintf (dump_file, "var + ");
3051               if (off_p)
3052                 fprintf (dump_file, "cst + ");
3053               if (rat_p)
3054                 fprintf (dump_file, "rat * ");
3055
3056               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3057               fprintf (dump_file, "index costs %d\n", acost);
3058             }
3059           fprintf (dump_file, "\n");
3060         }
3061     }
3062
3063   bits = GET_MODE_BITSIZE (Pmode);
3064   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3065   offset &= mask;
3066   if ((offset >> (bits - 1) & 1))
3067     offset |= ~mask;
3068   s_offset = offset;
3069
3070   cost = 0;
3071   offset_p = (s_offset != 0
3072               && min_offset[mem_mode] <= s_offset
3073               && s_offset <= max_offset[mem_mode]);
3074   ratio_p = (ratio != 1
3075              && multiplier_allowed_in_address_p (ratio, mem_mode));
3076
3077   if (ratio != 1 && !ratio_p)
3078     cost += multiply_by_cost (ratio, Pmode);
3079
3080   if (s_offset && !offset_p && !symbol_present)
3081     cost += add_cost (Pmode);
3082
3083   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3084   return cost + acost;
3085 }
3086
3087 /* Estimates cost of forcing expression EXPR into a variable.  */
3088
3089 unsigned
3090 force_expr_to_var_cost (tree expr)
3091 {
3092   static bool costs_initialized = false;
3093   static unsigned integer_cost;
3094   static unsigned symbol_cost;
3095   static unsigned address_cost;
3096   tree op0, op1;
3097   unsigned cost0, cost1, cost;
3098   enum machine_mode mode;
3099
3100   if (!costs_initialized)
3101     {
3102       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3103       rtx x = gen_rtx_MEM (DECL_MODE (var),
3104                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3105       tree addr;
3106       tree type = build_pointer_type (integer_type_node);
3107
3108       integer_cost = computation_cost (build_int_cst (integer_type_node,
3109                                                       2000));
3110
3111       SET_DECL_RTL (var, x);
3112       TREE_STATIC (var) = 1;
3113       addr = build1 (ADDR_EXPR, type, var);
3114       symbol_cost = computation_cost (addr) + 1;
3115
3116       address_cost
3117         = computation_cost (build2 (PLUS_EXPR, type,
3118                                     addr,
3119                                     build_int_cst (type, 2000))) + 1;
3120       if (dump_file && (dump_flags & TDF_DETAILS))
3121         {
3122           fprintf (dump_file, "force_expr_to_var_cost:\n");
3123           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3124           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3125           fprintf (dump_file, "  address %d\n", (int) address_cost);
3126           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3127           fprintf (dump_file, "\n");
3128         }
3129
3130       costs_initialized = true;
3131     }
3132
3133   STRIP_NOPS (expr);
3134
3135   if (SSA_VAR_P (expr))
3136     return 0;
3137
3138   if (TREE_INVARIANT (expr))
3139     {
3140       if (TREE_CODE (expr) == INTEGER_CST)
3141         return integer_cost;
3142
3143       if (TREE_CODE (expr) == ADDR_EXPR)
3144         {
3145           tree obj = TREE_OPERAND (expr, 0);
3146
3147           if (TREE_CODE (obj) == VAR_DECL
3148               || TREE_CODE (obj) == PARM_DECL
3149               || TREE_CODE (obj) == RESULT_DECL)
3150             return symbol_cost;
3151         }
3152
3153       return address_cost;
3154     }
3155
3156   switch (TREE_CODE (expr))
3157     {
3158     case PLUS_EXPR:
3159     case MINUS_EXPR:
3160     case MULT_EXPR:
3161       op0 = TREE_OPERAND (expr, 0);
3162       op1 = TREE_OPERAND (expr, 1);
3163       STRIP_NOPS (op0);
3164       STRIP_NOPS (op1);
3165
3166       if (is_gimple_val (op0))
3167         cost0 = 0;
3168       else
3169         cost0 = force_expr_to_var_cost (op0);
3170
3171       if (is_gimple_val (op1))
3172         cost1 = 0;
3173       else
3174         cost1 = force_expr_to_var_cost (op1);
3175
3176       break;
3177
3178     default:
3179       /* Just an arbitrary value, FIXME.  */
3180       return target_spill_cost;
3181     }
3182
3183   mode = TYPE_MODE (TREE_TYPE (expr));
3184   switch (TREE_CODE (expr))
3185     {
3186     case PLUS_EXPR:
3187     case MINUS_EXPR:
3188       cost = add_cost (mode);
3189       break;
3190
3191     case MULT_EXPR:
3192       if (cst_and_fits_in_hwi (op0))
3193         cost = multiply_by_cost (int_cst_value (op0), mode);
3194       else if (cst_and_fits_in_hwi (op1))
3195         cost = multiply_by_cost (int_cst_value (op1), mode);
3196       else
3197         return target_spill_cost;
3198       break;
3199
3200     default:
3201       gcc_unreachable ();
3202     }
3203
3204   cost += cost0;
3205   cost += cost1;
3206
3207   /* Bound the cost by target_spill_cost.  The parts of complicated
3208      computations often are either loop invariant or at least can
3209      be shared between several iv uses, so letting this grow without
3210      limits would not give reasonable results.  */
3211   return cost < target_spill_cost ? cost : target_spill_cost;
3212 }
3213
3214 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3215    invariants the computation depends on.  */
3216
3217 static unsigned
3218 force_var_cost (struct ivopts_data *data,
3219                 tree expr, bitmap *depends_on)
3220 {
3221   if (depends_on)
3222     {
3223       fd_ivopts_data = data;
3224       walk_tree (&expr, find_depends, depends_on, NULL);
3225     }
3226
3227   return force_expr_to_var_cost (expr);
3228 }
3229
3230 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3231    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3232    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3233    invariants the computation depends on.  */
3234
3235 static unsigned
3236 split_address_cost (struct ivopts_data *data,
3237                     tree addr, bool *symbol_present, bool *var_present,
3238                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3239 {
3240   tree core;
3241   HOST_WIDE_INT bitsize;
3242   HOST_WIDE_INT bitpos;
3243   tree toffset;
3244   enum machine_mode mode;
3245   int unsignedp, volatilep;
3246
3247   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3248                               &unsignedp, &volatilep, false);
3249
3250   if (toffset != 0
3251       || bitpos % BITS_PER_UNIT != 0
3252       || TREE_CODE (core) != VAR_DECL)
3253     {
3254       *symbol_present = false;
3255       *var_present = true;
3256       fd_ivopts_data = data;
3257       walk_tree (&addr, find_depends, depends_on, NULL);
3258       return target_spill_cost;
3259     }
3260
3261   *offset += bitpos / BITS_PER_UNIT;
3262   if (TREE_STATIC (core)
3263       || DECL_EXTERNAL (core))
3264     {
3265       *symbol_present = true;
3266       *var_present = false;
3267       return 0;
3268     }
3269
3270   *symbol_present = false;
3271   *var_present = true;
3272   return 0;
3273 }
3274
3275 /* Estimates cost of expressing difference of addresses E1 - E2 as
3276    var + symbol + offset.  The value of offset is added to OFFSET,
3277    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3278    part is missing.  DEPENDS_ON is a set of the invariants the computation
3279    depends on.  */
3280
3281 static unsigned
3282 ptr_difference_cost (struct ivopts_data *data,
3283                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3284                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3285 {
3286   HOST_WIDE_INT diff = 0;
3287   unsigned cost;
3288
3289   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3290
3291   if (ptr_difference_const (e1, e2, &diff))
3292     {
3293       *offset += diff;
3294       *symbol_present = false;
3295       *var_present = false;
3296       return 0;
3297     }
3298
3299   if (e2 == integer_zero_node)
3300     return split_address_cost (data, TREE_OPERAND (e1, 0),
3301                                symbol_present, var_present, offset, depends_on);
3302
3303   *symbol_present = false;
3304   *var_present = true;
3305
3306   cost = force_var_cost (data, e1, depends_on);
3307   cost += force_var_cost (data, e2, depends_on);
3308   cost += add_cost (Pmode);
3309
3310   return cost;
3311 }
3312
3313 /* Estimates cost of expressing difference E1 - E2 as
3314    var + symbol + offset.  The value of offset is added to OFFSET,
3315    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3316    part is missing.  DEPENDS_ON is a set of the invariants the computation
3317    depends on.  */
3318
3319 static unsigned
3320 difference_cost (struct ivopts_data *data,
3321                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3322                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3323 {
3324   unsigned cost;
3325   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3326   unsigned HOST_WIDE_INT off1, off2;
3327
3328   e1 = strip_offset (e1, &off1);
3329   e2 = strip_offset (e2, &off2);
3330   *offset += off1 - off2;
3331
3332   STRIP_NOPS (e1);
3333   STRIP_NOPS (e2);
3334
3335   if (TREE_CODE (e1) == ADDR_EXPR)
3336     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3337                                 depends_on);
3338   *symbol_present = false;
3339
3340   if (operand_equal_p (e1, e2, 0))
3341     {
3342       *var_present = false;
3343       return 0;
3344     }
3345   *var_present = true;
3346   if (integer_zerop (e2))
3347     return force_var_cost (data, e1, depends_on);
3348
3349   if (integer_zerop (e1))
3350     {
3351       cost = force_var_cost (data, e2, depends_on);
3352       cost += multiply_by_cost (-1, mode);
3353
3354       return cost;
3355     }
3356
3357   cost = force_var_cost (data, e1, depends_on);
3358   cost += force_var_cost (data, e2, depends_on);
3359   cost += add_cost (mode);
3360
3361   return cost;
3362 }
3363
3364 /* Determines the cost of the computation by that USE is expressed
3365    from induction variable CAND.  If ADDRESS_P is true, we just need
3366    to create an address from it, otherwise we want to get it into
3367    register.  A set of invariants we depend on is stored in
3368    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3369
3370 static unsigned
3371 get_computation_cost_at (struct ivopts_data *data,
3372                          struct iv_use *use, struct iv_cand *cand,
3373                          bool address_p, bitmap *depends_on, tree at)
3374 {
3375   tree ubase = use->iv->base, ustep = use->iv->step;
3376   tree cbase, cstep;
3377   tree utype = TREE_TYPE (ubase), ctype;
3378   unsigned HOST_WIDE_INT cstepi, offset = 0;
3379   HOST_WIDE_INT ratio, aratio;
3380   bool var_present, symbol_present;
3381   unsigned cost = 0, n_sums;
3382   double_int rat;
3383
3384   *depends_on = NULL;
3385
3386   /* Only consider real candidates.  */
3387   if (!cand->iv)
3388     return INFTY;
3389
3390   cbase = cand->iv->base;
3391   cstep = cand->iv->step;
3392   ctype = TREE_TYPE (cbase);
3393
3394   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3395     {
3396       /* We do not have a precision to express the values of use.  */
3397       return INFTY;
3398     }
3399
3400   if (address_p)
3401     {
3402       /* Do not try to express address of an object with computation based
3403          on address of a different object.  This may cause problems in rtl
3404          level alias analysis (that does not expect this to be happening,
3405          as this is illegal in C), and would be unlikely to be useful
3406          anyway.  */
3407       if (use->iv->base_object
3408           && cand->iv->base_object
3409           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3410         return INFTY;
3411     }
3412
3413   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3414     {
3415       /* TODO -- add direct handling of this case.  */
3416       goto fallback;
3417     }
3418
3419   /* CSTEPI is removed from the offset in case statement is after the
3420      increment.  If the step is not constant, we use zero instead.
3421      This is a bit imprecise (there is the extra addition), but
3422      redundancy elimination is likely to transform the code so that
3423      it uses value of the variable before increment anyway,
3424      so it is not that much unrealistic.  */
3425   if (cst_and_fits_in_hwi (cstep))
3426     cstepi = int_cst_value (cstep);
3427   else
3428     cstepi = 0;
3429
3430   if (!constant_multiple_of (ustep, cstep, &rat))
3431     return INFTY;
3432
3433   if (double_int_fits_in_shwi_p (rat))
3434     ratio = double_int_to_shwi (rat);
3435   else
3436     return INFTY;
3437
3438   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3439      or ratio == 1, it is better to handle this like
3440
3441      ubase - ratio * cbase + ratio * var
3442
3443      (also holds in the case ratio == -1, TODO.  */
3444
3445   if (cst_and_fits_in_hwi (cbase))
3446     {
3447       offset = - ratio * int_cst_value (cbase);
3448       cost += difference_cost (data,
3449                                ubase, integer_zero_node,
3450                                &symbol_present, &var_present, &offset,
3451                                depends_on);
3452     }
3453   else if (ratio == 1)
3454     {
3455       cost += difference_cost (data,
3456                                ubase, cbase,
3457                                &symbol_present, &var_present, &offset,
3458                                depends_on);
3459     }
3460   else
3461     {
3462       cost += force_var_cost (data, cbase, depends_on);
3463       cost += add_cost (TYPE_MODE (ctype));
3464       cost += difference_cost (data,
3465                                ubase, integer_zero_node,
3466                                &symbol_present, &var_present, &offset,
3467                                depends_on);
3468     }
3469
3470   /* If we are after the increment, the value of the candidate is higher by
3471      one iteration.  */
3472   if (stmt_after_increment (data->current_loop, cand, at))
3473     offset -= ratio * cstepi;
3474
3475   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3476      (symbol/var/const parts may be omitted).  If we are looking for an address,
3477      find the cost of addressing this.  */
3478   if (address_p)
3479     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3480                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3481
3482   /* Otherwise estimate the costs for computing the expression.  */
3483   aratio = ratio > 0 ? ratio : -ratio;
3484   if (!symbol_present && !var_present && !offset)
3485     {
3486       if (ratio != 1)
3487         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3488
3489       return cost;
3490     }
3491
3492   if (aratio != 1)
3493     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3494
3495   n_sums = 1;
3496   if (var_present
3497       /* Symbol + offset should be compile-time computable.  */
3498       && (symbol_present || offset))
3499     n_sums++;
3500
3501   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3502
3503 fallback:
3504   {
3505     /* Just get the expression, expand it and measure the cost.  */
3506     tree comp = get_computation_at (data->current_loop, use, cand, at);
3507
3508     if (!comp)
3509       return INFTY;
3510
3511     if (address_p)
3512       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3513
3514     return computation_cost (comp);
3515   }
3516 }
3517
3518 /* Determines the cost of the computation by that USE is expressed
3519    from induction variable CAND.  If ADDRESS_P is true, we just need
3520    to create an address from it, otherwise we want to get it into
3521    register.  A set of invariants we depend on is stored in
3522    DEPENDS_ON.  */
3523
3524 static unsigned
3525 get_computation_cost (struct ivopts_data *data,
3526                       struct iv_use *use, struct iv_cand *cand,
3527                       bool address_p, bitmap *depends_on)
3528 {
3529   return get_computation_cost_at (data,
3530                                   use, cand, address_p, depends_on, use->stmt);
3531 }
3532
3533 /* Determines cost of basing replacement of USE on CAND in a generic
3534    expression.  */
3535
3536 static bool
3537 determine_use_iv_cost_generic (struct ivopts_data *data,
3538                                struct iv_use *use, struct iv_cand *cand)
3539 {
3540   bitmap depends_on;
3541   unsigned cost;
3542
3543   /* The simple case first -- if we need to express value of the preserved
3544      original biv, the cost is 0.  This also prevents us from counting the
3545      cost of increment twice -- once at this use and once in the cost of
3546      the candidate.  */
3547   if (cand->pos == IP_ORIGINAL
3548       && cand->incremented_at == use->stmt)
3549     {
3550       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3551       return true;
3552     }
3553
3554   cost = get_computation_cost (data, use, cand, false, &depends_on);
3555   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3556
3557   return cost != INFTY;
3558 }
3559
3560 /* Determines cost of basing replacement of USE on CAND in an address.  */
3561
3562 static bool
3563 determine_use_iv_cost_address (struct ivopts_data *data,
3564                                struct iv_use *use, struct iv_cand *cand)
3565 {
3566   bitmap depends_on;
3567   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3568
3569   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3570
3571   return cost != INFTY;
3572 }
3573
3574 /* Computes value of candidate CAND at position AT in iteration NITER, and
3575    stores it to VAL.  */
3576
3577 static void
3578 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3579                aff_tree *val)
3580 {
3581   aff_tree step, delta, nit;
3582   struct iv *iv = cand->iv;
3583   tree type = TREE_TYPE (iv->base);
3584
3585   tree_to_aff_combination (iv->step, type, &step);
3586   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3587   aff_combination_convert (&nit, type);
3588   aff_combination_mult (&nit, &step, &delta);
3589   if (stmt_after_increment (loop, cand, at))
3590     aff_combination_add (&delta, &step);
3591
3592   tree_to_aff_combination (iv->base, type, val);
3593   aff_combination_add (val, &delta);
3594 }
3595
3596 /* Returns period of induction variable iv.  */
3597
3598 static tree
3599 iv_period (struct iv *iv)
3600 {
3601   tree step = iv->step, period, type;
3602   tree pow2div;
3603
3604   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3605
3606   /* Period of the iv is gcd (step, type range).  Since type range is power
3607      of two, it suffices to determine the maximum power of two that divides
3608      step.  */
3609   pow2div = num_ending_zeros (step);
3610   type = unsigned_type_for (TREE_TYPE (step));
3611
3612   period = build_low_bits_mask (type,
3613                                 (TYPE_PRECISION (type)
3614                                  - tree_low_cst (pow2div, 1)));
3615
3616   return period;
3617 }
3618
3619 /* Returns the comparison operator used when eliminating the iv USE.  */
3620
3621 static enum tree_code
3622 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3623 {
3624   struct loop *loop = data->current_loop;
3625   basic_block ex_bb;
3626   edge exit;
3627
3628   ex_bb = bb_for_stmt (use->stmt);
3629   exit = EDGE_SUCC (ex_bb, 0);
3630   if (flow_bb_inside_loop_p (loop, exit->dest))
3631     exit = EDGE_SUCC (ex_bb, 1);
3632
3633   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3634 }
3635
3636 /* Check whether it is possible to express the condition in USE by comparison
3637    of candidate CAND.  If so, store the value compared with to BOUND.  */
3638
3639 static bool
3640 may_eliminate_iv (struct ivopts_data *data,
3641                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3642 {
3643   basic_block ex_bb;
3644   edge exit;
3645   tree nit, nit_type;
3646   tree wider_type, period, per_type;
3647   struct loop *loop = data->current_loop;
3648   aff_tree bnd;
3649
3650   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3651     return false;
3652
3653   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3654      for other conditions inside loop body.  */
3655   ex_bb = bb_for_stmt (use->stmt);
3656   if (use->stmt != last_stmt (ex_bb)
3657       || TREE_CODE (use->stmt) != COND_EXPR)
3658     return false;
3659   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3660     return false;
3661
3662   exit = EDGE_SUCC (ex_bb, 0);
3663   if (flow_bb_inside_loop_p (loop, exit->dest))
3664     exit = EDGE_SUCC (ex_bb, 1);
3665   if (flow_bb_inside_loop_p (loop, exit->dest))
3666     return false;
3667
3668   nit = niter_for_exit (data, exit);
3669   if (!nit)
3670     return false;
3671
3672   nit_type = TREE_TYPE (nit);
3673
3674   /* Determine whether we may use the variable to test whether niter iterations
3675      elapsed.  This is the case iff the period of the induction variable is
3676      greater than the number of iterations.  */
3677   period = iv_period (cand->iv);
3678   if (!period)
3679     return false;
3680   per_type = TREE_TYPE (period);
3681
3682   wider_type = TREE_TYPE (period);
3683   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3684     wider_type = per_type;
3685   else
3686     wider_type = nit_type;
3687
3688   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3689                                       fold_convert (wider_type, period),
3690                                       fold_convert (wider_type, nit))))
3691     return false;
3692
3693   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3694   *bound = aff_combination_to_tree (&bnd);
3695   return true;
3696 }
3697
3698 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3699
3700 static bool
3701 determine_use_iv_cost_condition (struct ivopts_data *data,
3702                                  struct iv_use *use, struct iv_cand *cand)
3703 {
3704   tree bound = NULL_TREE;
3705   struct iv *cmp_iv;
3706   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3707   unsigned elim_cost, express_cost, cost;
3708   bool ok;
3709
3710   /* Only consider real candidates.  */
3711   if (!cand->iv)
3712     {
3713       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3714       return false;
3715     }
3716
3717   /* Try iv elimination.  */
3718   if (may_eliminate_iv (data, use, cand, &bound))
3719     elim_cost = force_var_cost (data, bound, &depends_on_elim);
3720   else
3721     elim_cost = INFTY;
3722
3723   /* Try expressing the original giv.  If it is compared with an invariant,
3724      note that we cannot get rid of it.  */
3725   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3726   gcc_assert (ok);
3727
3728   express_cost = get_computation_cost (data, use, cand, false,
3729                                        &depends_on_express);
3730   fd_ivopts_data = data;
3731   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3732
3733   /* Choose the better approach.  */
3734   if (elim_cost < express_cost)
3735     {
3736       cost = elim_cost;
3737       depends_on = depends_on_elim;
3738       depends_on_elim = NULL;
3739     }
3740   else
3741     {
3742       cost = express_cost;
3743       depends_on = depends_on_express;
3744       depends_on_express = NULL;
3745       bound = NULL_TREE;
3746     }
3747
3748   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3749
3750   if (depends_on_elim)
3751     BITMAP_FREE (depends_on_elim);
3752   if (depends_on_express)
3753     BITMAP_FREE (depends_on_express);
3754
3755   return cost != INFTY;
3756 }
3757
3758 /* Determines cost of basing replacement of USE on CAND.  Returns false
3759    if USE cannot be based on CAND.  */
3760
3761 static bool
3762 determine_use_iv_cost (struct ivopts_data *data,
3763                        struct iv_use *use, struct iv_cand *cand)
3764 {
3765   switch (use->type)
3766     {
3767     case USE_NONLINEAR_EXPR:
3768       return determine_use_iv_cost_generic (data, use, cand);
3769
3770     case USE_ADDRESS:
3771       return determine_use_iv_cost_address (data, use, cand);
3772
3773     case USE_COMPARE:
3774       return determine_use_iv_cost_condition (data, use, cand);
3775
3776     default:
3777       gcc_unreachable ();
3778     }
3779 }
3780
3781 /* Determines costs of basing the use of the iv on an iv candidate.  */
3782
3783 static void
3784 determine_use_iv_costs (struct ivopts_data *data)
3785 {
3786   unsigned i, j;
3787   struct iv_use *use;
3788   struct iv_cand *cand;
3789   bitmap to_clear = BITMAP_ALLOC (NULL);
3790
3791   alloc_use_cost_map (data);
3792
3793   for (i = 0; i < n_iv_uses (data); i++)
3794     {
3795       use = iv_use (data, i);
3796
3797       if (data->consider_all_candidates)
3798         {
3799           for (j = 0; j < n_iv_cands (data); j++)
3800             {
3801               cand = iv_cand (data, j);
3802               determine_use_iv_cost (data, use, cand);
3803             }
3804         }
3805       else
3806         {
3807           bitmap_iterator bi;
3808
3809           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3810             {
3811               cand = iv_cand (data, j);
3812               if (!determine_use_iv_cost (data, use, cand))
3813                 bitmap_set_bit (to_clear, j);
3814             }
3815
3816           /* Remove the candidates for that the cost is infinite from
3817              the list of related candidates.  */
3818           bitmap_and_compl_into (use->related_cands, to_clear);
3819           bitmap_clear (to_clear);
3820         }
3821     }
3822
3823   BITMAP_FREE (to_clear);
3824
3825   if (dump_file && (dump_flags & TDF_DETAILS))
3826     {
3827       fprintf (dump_file, "Use-candidate costs:\n");
3828
3829       for (i = 0; i < n_iv_uses (data); i++)
3830         {
3831           use = iv_use (data, i);
3832
3833           fprintf (dump_file, "Use %d:\n", i);
3834           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3835           for (j = 0; j < use->n_map_members; j++)
3836             {
3837               if (!use->cost_map[j].cand
3838                   || use->cost_map[j].cost == INFTY)
3839                 continue;
3840
3841               fprintf (dump_file, "  %d\t%d\t",
3842                        use->cost_map[j].cand->id,
3843                        use->cost_map[j].cost);
3844               if (use->cost_map[j].depends_on)
3845                 bitmap_print (dump_file,
3846                               use->cost_map[j].depends_on, "","");
3847               fprintf (dump_file, "\n");
3848             }
3849
3850           fprintf (dump_file, "\n");
3851         }
3852       fprintf (dump_file, "\n");
3853     }
3854 }
3855
3856 /* Determines cost of the candidate CAND.  */
3857
3858 static void
3859 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3860 {
3861   unsigned cost_base, cost_step;
3862   tree base;
3863
3864   if (!cand->iv)
3865     {
3866       cand->cost = 0;
3867       return;
3868     }
3869
3870   /* There are two costs associated with the candidate -- its increment
3871      and its initialization.  The second is almost negligible for any loop
3872      that rolls enough, so we take it just very little into account.  */
3873
3874   base = cand->iv->base;
3875   cost_base = force_var_cost (data, base, NULL);
3876   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3877
3878   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3879
3880   /* Prefer the original iv unless we may gain something by replacing it;
3881      this is not really relevant for artificial ivs created by other
3882      passes.  */
3883   if (cand->pos == IP_ORIGINAL
3884       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3885     cand->cost--;
3886
3887   /* Prefer not to insert statements into latch unless there are some
3888      already (so that we do not create unnecessary jumps).  */
3889   if (cand->pos == IP_END
3890       && empty_block_p (ip_end_pos (data->current_loop)))
3891     cand->cost++;
3892 }
3893
3894 /* Determines costs of computation of the candidates.  */
3895
3896 static void
3897 determine_iv_costs (struct ivopts_data *data)
3898 {
3899   unsigned i;
3900
3901   if (dump_file && (dump_flags & TDF_DETAILS))
3902     {
3903       fprintf (dump_file, "Candidate costs:\n");
3904       fprintf (dump_file, "  cand\tcost\n");
3905     }
3906
3907   for (i = 0; i < n_iv_cands (data); i++)
3908     {
3909       struct iv_cand *cand = iv_cand (data, i);
3910
3911       determine_iv_cost (data, cand);
3912
3913       if (dump_file && (dump_flags & TDF_DETAILS))
3914         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3915     }
3916
3917   if (dump_file && (dump_flags & TDF_DETAILS))
3918     fprintf (dump_file, "\n");
3919 }
3920
3921 /* Calculates cost for having SIZE induction variables.  */
3922
3923 static unsigned
3924 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3925 {
3926   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3927 }
3928
3929 /* For each size of the induction variable set determine the penalty.  */
3930
3931 static void
3932 determine_set_costs (struct ivopts_data *data)
3933 {
3934   unsigned j, n;
3935   tree phi, op;
3936   struct loop *loop = data->current_loop;
3937   bitmap_iterator bi;
3938
3939   /* We use the following model (definitely improvable, especially the
3940      cost function -- TODO):
3941
3942      We estimate the number of registers available (using MD data), name it A.
3943
3944      We estimate the number of registers used by the loop, name it U.  This
3945      number is obtained as the number of loop phi nodes (not counting virtual
3946      registers and bivs) + the number of variables from outside of the loop.
3947
3948      We set a reserve R (free regs that are used for temporary computations,
3949      etc.).  For now the reserve is a constant 3.
3950
3951      Let I be the number of induction variables.
3952
3953      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3954         make a lot of ivs without a reason).
3955      -- if A - R < U + I <= A, the cost is I * PRES_COST
3956      -- if U + I > A, the cost is I * PRES_COST and
3957         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3958
3959   if (dump_file && (dump_flags & TDF_DETAILS))
3960     {
3961       fprintf (dump_file, "Global costs:\n");
3962       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3963       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3964       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3965       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3966     }
3967
3968   n = 0;
3969   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3970     {
3971       op = PHI_RESULT (phi);
3972
3973       if (!is_gimple_reg (op))
3974         continue;
3975
3976       if (get_iv (data, op))
3977         continue;
3978
3979       n++;
3980     }
3981
3982   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3983     {
3984       struct version_info *info = ver_info (data, j);
3985
3986       if (info->inv_id && info->has_nonlin_use)
3987         n++;
3988     }
3989
3990   data->regs_used = n;
3991   if (dump_file && (dump_flags & TDF_DETAILS))
3992     fprintf (dump_file, "  regs_used %d\n", n);
3993
3994   if (dump_file && (dump_flags & TDF_DETAILS))
3995     {
3996       fprintf (dump_file, "  cost for size:\n");
3997       fprintf (dump_file, "  ivs\tcost\n");
3998       for (j = 0; j <= 2 * target_avail_regs; j++)
3999         fprintf (dump_file, "  %d\t%d\n", j,
4000                  ivopts_global_cost_for_size (data, j));
4001       fprintf (dump_file, "\n");
4002     }
4003 }
4004
4005 /* Returns true if A is a cheaper cost pair than B.  */
4006
4007 static bool
4008 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4009 {
4010   if (!a)
4011     return false;
4012
4013   if (!b)
4014     return true;
4015
4016   if (a->cost < b->cost)
4017     return true;
4018
4019   if (a->cost > b->cost)
4020     return false;
4021
4022   /* In case the costs are the same, prefer the cheaper candidate.  */
4023   if (a->cand->cost < b->cand->cost)
4024     return true;
4025
4026   return false;
4027 }
4028
4029 /* Computes the cost field of IVS structure.  */
4030
4031 static void
4032 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4033 {
4034   unsigned cost = 0;
4035
4036   cost += ivs->cand_use_cost;
4037   cost += ivs->cand_cost;
4038   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4039
4040   ivs->cost = cost;
4041 }
4042
4043 /* Remove invariants in set INVS to set IVS.  */
4044
4045 static void
4046 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4047 {
4048   bitmap_iterator bi;
4049   unsigned iid;
4050
4051   if (!invs)
4052     return;
4053
4054   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4055     {
4056       ivs->n_invariant_uses[iid]--;
4057       if (ivs->n_invariant_uses[iid] == 0)
4058         ivs->n_regs--;
4059     }
4060 }
4061
4062 /* Set USE not to be expressed by any candidate in IVS.  */
4063
4064 static void
4065 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4066                  struct iv_use *use)
4067 {
4068   unsigned uid = use->id, cid;
4069   struct cost_pair *cp;
4070
4071   cp = ivs->cand_for_use[uid];
4072   if (!cp)
4073     return;
4074   cid = cp->cand->id;
4075
4076   ivs->bad_uses++;
4077   ivs->cand_for_use[uid] = NULL;
4078   ivs->n_cand_uses[cid]--;
4079
4080   if (ivs->n_cand_uses[cid] == 0)
4081     {
4082       bitmap_clear_bit (ivs->cands, cid);
4083       /* Do not count the pseudocandidates.  */
4084       if (cp->cand->iv)
4085         ivs->n_regs--;
4086       ivs->n_cands--;
4087       ivs->cand_cost -= cp->cand->cost;
4088
4089       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4090     }
4091
4092   ivs->cand_use_cost -= cp->cost;
4093
4094   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4095   iv_ca_recount_cost (data, ivs);
4096 }
4097
4098 /* Add invariants in set INVS to set IVS.  */
4099
4100 static void
4101 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4102 {
4103   bitmap_iterator bi;
4104   unsigned iid;
4105
4106   if (!invs)
4107     return;
4108
4109   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4110     {
4111       ivs->n_invariant_uses[iid]++;
4112       if (ivs->n_invariant_uses[iid] == 1)
4113         ivs->n_regs++;
4114     }
4115 }
4116
4117 /* Set cost pair for USE in set IVS to CP.  */
4118
4119 static void
4120 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4121               struct iv_use *use, struct cost_pair *cp)
4122 {
4123   unsigned uid = use->id, cid;
4124
4125   if (ivs->cand_for_use[uid] == cp)
4126     return;
4127
4128   if (ivs->cand_for_use[uid])
4129     iv_ca_set_no_cp (data, ivs, use);
4130
4131   if (cp)
4132     {
4133       cid = cp->cand->id;
4134
4135       ivs->bad_uses--;
4136       ivs->cand_for_use[uid] = cp;
4137       ivs->n_cand_uses[cid]++;
4138       if (ivs->n_cand_uses[cid] == 1)
4139         {
4140           bitmap_set_bit (ivs->cands, cid);
4141           /* Do not count the pseudocandidates.  */
4142           if (cp->cand->iv)
4143             ivs->n_regs++;
4144           ivs->n_cands++;
4145           ivs->cand_cost += cp->cand->cost;
4146
4147           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4148         }
4149
4150       ivs->cand_use_cost += cp->cost;
4151       iv_ca_set_add_invariants (ivs, cp->depends_on);
4152       iv_ca_recount_cost (data, ivs);
4153     }
4154 }
4155
4156 /* Extend set IVS by expressing USE by some of the candidates in it
4157    if possible.  */
4158
4159 static void
4160 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4161                struct iv_use *use)
4162 {
4163   struct cost_pair *best_cp = NULL, *cp;
4164   bitmap_iterator bi;
4165   unsigned i;
4166
4167   gcc_assert (ivs->upto >= use->id);
4168
4169   if (ivs->upto == use->id)
4170     {
4171       ivs->upto++;
4172       ivs->bad_uses++;
4173     }
4174
4175   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4176     {
4177       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4178
4179       if (cheaper_cost_pair (cp, best_cp))
4180         best_cp = cp;
4181     }
4182
4183   iv_ca_set_cp (data, ivs, use, best_cp);
4184 }
4185
4186 /* Get cost for assignment IVS.  */
4187
4188 static unsigned
4189 iv_ca_cost (struct iv_ca *ivs)
4190 {
4191   return (ivs->bad_uses ? INFTY : ivs->cost);
4192 }
4193
4194 /* Returns true if all dependences of CP are among invariants in IVS.  */
4195
4196 static bool
4197 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4198 {
4199   unsigned i;
4200   bitmap_iterator bi;
4201
4202   if (!cp->depends_on)
4203     return true;
4204
4205   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4206     {
4207       if (ivs->n_invariant_uses[i] == 0)
4208         return false;
4209     }
4210
4211   return true;
4212 }
4213
4214 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4215    it before NEXT_CHANGE.  */
4216
4217 static struct iv_ca_delta *
4218 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4219                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4220 {
4221   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4222
4223   change->use = use;
4224   change->old_cp = old_cp;
4225   change->new_cp = new_cp;
4226   change->next_change = next_change;
4227
4228   return change;
4229 }
4230
4231 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4232    are rewritten.  */
4233
4234 static struct iv_ca_delta *
4235 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4236 {
4237   struct iv_ca_delta *last;
4238
4239   if (!l2)
4240     return l1;
4241
4242   if (!l1)
4243     return l2;
4244
4245   for (last = l1; last->next_change; last = last->next_change)
4246     continue;
4247   last->next_change = l2;
4248
4249   return l1;
4250 }
4251
4252 /* Returns candidate by that USE is expressed in IVS.  */
4253
4254 static struct cost_pair *
4255 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4256 {
4257   return ivs->cand_for_use[use->id];
4258 }
4259
4260 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4261
4262 static struct iv_ca_delta *
4263 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4264 {
4265   struct iv_ca_delta *act, *next, *prev = NULL;
4266   struct cost_pair *tmp;
4267
4268   for (act = delta; act; act = next)
4269     {
4270       next = act->next_change;
4271       act->next_change = prev;
4272       prev = act;
4273
4274       tmp = act->old_cp;
4275       act->old_cp = act->new_cp;
4276       act->new_cp = tmp;
4277     }
4278
4279   return prev;
4280 }
4281
4282 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4283    reverted instead.  */
4284
4285 static void
4286 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4287                     struct iv_ca_delta *delta, bool forward)
4288 {
4289   struct cost_pair *from, *to;
4290   struct iv_ca_delta *act;
4291
4292   if (!forward)
4293     delta = iv_ca_delta_reverse (delta);
4294
4295   for (act = delta; act; act = act->next_change)
4296     {
4297       from = act->old_cp;
4298       to = act->new_cp;
4299       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4300       iv_ca_set_cp (data, ivs, act->use, to);
4301     }
4302
4303   if (!forward)
4304     iv_ca_delta_reverse (delta);
4305 }
4306
4307 /* Returns true if CAND is used in IVS.  */
4308
4309 static bool
4310 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4311 {
4312   return ivs->n_cand_uses[cand->id] > 0;
4313 }
4314
4315 /* Returns number of induction variable candidates in the set IVS.  */
4316
4317 static unsigned
4318 iv_ca_n_cands (struct iv_ca *ivs)
4319 {
4320   return ivs->n_cands;
4321 }
4322
4323 /* Free the list of changes DELTA.  */
4324
4325 static void
4326 iv_ca_delta_free (struct iv_ca_delta **delta)
4327 {
4328   struct iv_ca_delta *act, *next;
4329
4330   for (act = *delta; act; act = next)
4331     {
4332       next = act->next_change;
4333       free (act);
4334     }
4335
4336   *delta = NULL;
4337 }
4338
4339 /* Allocates new iv candidates assignment.  */
4340
4341 static struct iv_ca *
4342 iv_ca_new (struct ivopts_data *data)
4343 {
4344   struct iv_ca *nw = XNEW (struct iv_ca);
4345
4346   nw->upto = 0;
4347   nw->bad_uses = 0;
4348   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4349   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4350   nw->cands = BITMAP_ALLOC (NULL);
4351   nw->n_cands = 0;
4352   nw->n_regs = 0;
4353   nw->cand_use_cost = 0;
4354   nw->cand_cost = 0;
4355   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4356   nw->cost = 0;
4357
4358   return nw;
4359 }
4360
4361 /* Free memory occupied by the set IVS.  */
4362
4363 static void
4364 iv_ca_free (struct iv_ca **ivs)
4365 {
4366   free ((*ivs)->cand_for_use);
4367   free ((*ivs)->n_cand_uses);
4368   BITMAP_FREE ((*ivs)->cands);
4369   free ((*ivs)->n_invariant_uses);
4370   free (*ivs);
4371   *ivs = NULL;
4372 }
4373
4374 /* Dumps IVS to FILE.  */
4375
4376 static void
4377 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4378 {
4379   const char *pref = "  invariants ";
4380   unsigned i;
4381
4382   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4383   bitmap_print (file, ivs->cands, "  candidates ","\n");
4384
4385   for (i = 1; i <= data->max_inv_id; i++)
4386     if (ivs->n_invariant_uses[i])
4387       {
4388         fprintf (file, "%s%d", pref, i);
4389         pref = ", ";
4390       }
4391   fprintf (file, "\n");
4392 }
4393
4394 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4395    new set, and store differences in DELTA.  Number of induction variables
4396    in the new set is stored to N_IVS.  */
4397
4398 static unsigned
4399 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4400               struct iv_cand *cand, struct iv_ca_delta **delta,
4401               unsigned *n_ivs)
4402 {
4403   unsigned i, cost;
4404   struct iv_use *use;
4405   struct cost_pair *old_cp, *new_cp;
4406
4407   *delta = NULL;
4408   for (i = 0; i < ivs->upto; i++)
4409     {
4410       use = iv_use (data, i);
4411       old_cp = iv_ca_cand_for_use (ivs, use);
4412
4413       if (old_cp
4414           && old_cp->cand == cand)
4415         continue;
4416
4417       new_cp = get_use_iv_cost (data, use, cand);
4418       if (!new_cp)
4419         continue;
4420
4421       if (!iv_ca_has_deps (ivs, new_cp))
4422         continue;
4423
4424       if (!cheaper_cost_pair (new_cp, old_cp))
4425         continue;
4426
4427       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4428     }
4429
4430   iv_ca_delta_commit (data, ivs, *delta, true);
4431   cost = iv_ca_cost (ivs);
4432   if (n_ivs)
4433     *n_ivs = iv_ca_n_cands (ivs);
4434   iv_ca_delta_commit (data, ivs, *delta, false);
4435
4436   return cost;
4437 }
4438
4439 /* Try narrowing set IVS by removing CAND.  Return the cost of
4440    the new set and store the differences in DELTA.  */
4441
4442 static unsigned
4443 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4444               struct iv_cand *cand, struct iv_ca_delta **delta)
4445 {
4446   unsigned i, ci;
4447   struct iv_use *use;
4448   struct cost_pair *old_cp, *new_cp, *cp;
4449   bitmap_iterator bi;
4450   struct iv_cand *cnd;
4451   unsigned cost;
4452
4453   *delta = NULL;
4454   for (i = 0; i < n_iv_uses (data); i++)
4455     {
4456       use = iv_use (data, i);
4457
4458       old_cp = iv_ca_cand_for_use (ivs, use);
4459       if (old_cp->cand != cand)
4460         continue;
4461
4462       new_cp = NULL;
4463
4464       if (data->consider_all_candidates)
4465         {
4466           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4467             {
4468               if (ci == cand->id)
4469                 continue;
4470
4471               cnd = iv_cand (data, ci);
4472
4473               cp = get_use_iv_cost (data, use, cnd);
4474               if (!cp)
4475                 continue;
4476               if (!iv_ca_has_deps (ivs, cp))
4477                 continue;
4478
4479               if (!cheaper_cost_pair (cp, new_cp))
4480                 continue;
4481
4482               new_cp = cp;
4483             }
4484         }
4485       else
4486         {
4487           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4488             {
4489               if (ci == cand->id)
4490                 continue;
4491
4492               cnd = iv_cand (data, ci);
4493
4494               cp = get_use_iv_cost (data, use, cnd);
4495               if (!cp)
4496                 continue;
4497               if (!iv_ca_has_deps (ivs, cp))
4498                 continue;
4499
4500               if (!cheaper_cost_pair (cp, new_cp))
4501                 continue;
4502
4503               new_cp = cp;
4504             }
4505         }
4506
4507       if (!new_cp)
4508         {
4509           iv_ca_delta_free (delta);
4510           return INFTY;
4511         }
4512
4513       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4514     }
4515
4516   iv_ca_delta_commit (data, ivs, *delta, true);
4517   cost = iv_ca_cost (ivs);
4518   iv_ca_delta_commit (data, ivs, *delta, false);
4519
4520   return cost;
4521 }
4522
4523 /* Try optimizing the set of candidates IVS by removing candidates different
4524    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4525    differences in DELTA.  */
4526
4527 static unsigned
4528 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4529              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4530 {
4531   bitmap_iterator bi;
4532   struct iv_ca_delta *act_delta, *best_delta;
4533   unsigned i, best_cost, acost;
4534   struct iv_cand *cand;
4535
4536   best_delta = NULL;
4537   best_cost = iv_ca_cost (ivs);
4538
4539   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4540     {
4541       cand = iv_cand (data, i);
4542
4543       if (cand == except_cand)
4544         continue;
4545
4546       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4547
4548       if (acost < best_cost)
4549         {
4550           best_cost = acost;
4551           iv_ca_delta_free (&best_delta);
4552           best_delta = act_delta;
4553         }
4554       else
4555         iv_ca_delta_free (&act_delta);
4556     }
4557
4558   if (!best_delta)
4559     {
4560       *delta = NULL;
4561       return best_cost;
4562     }
4563
4564   /* Recurse to possibly remove other unnecessary ivs.  */
4565   iv_ca_delta_commit (data, ivs, best_delta, true);
4566   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4567   iv_ca_delta_commit (data, ivs, best_delta, false);
4568   *delta = iv_ca_delta_join (best_delta, *delta);
4569   return best_cost;
4570 }
4571
4572 /* Tries to extend the sets IVS in the best possible way in order
4573    to express the USE.  */
4574
4575 static bool
4576 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4577                   struct iv_use *use)
4578 {
4579   unsigned best_cost, act_cost;
4580   unsigned i;
4581   bitmap_iterator bi;
4582   struct iv_cand *cand;
4583   struct iv_ca_delta *best_delta = NULL, *act_delta;
4584   struct cost_pair *cp;
4585
4586   iv_ca_add_use (data, ivs, use);
4587   best_cost = iv_ca_cost (ivs);
4588
4589   cp = iv_ca_cand_for_use (ivs, use);
4590   if (cp)
4591     {
4592       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4593       iv_ca_set_no_cp (data, ivs, use);
4594     }
4595
4596   /* First try important candidates.  Only if it fails, try the specific ones.
4597      Rationale -- in loops with many variables the best choice often is to use
4598      just one generic biv.  If we added here many ivs specific to the uses,
4599      the optimization algorithm later would be likely to get stuck in a local
4600      minimum, thus causing us to create too many ivs.  The approach from
4601      few ivs to more seems more likely to be successful -- starting from few
4602      ivs, replacing an expensive use by a specific iv should always be a
4603      win.  */
4604   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4605     {
4606       cand = iv_cand (data, i);
4607
4608       if (iv_ca_cand_used_p (ivs, cand))
4609         continue;
4610
4611       cp = get_use_iv_cost (data, use, cand);
4612       if (!cp)
4613         continue;
4614
4615       iv_ca_set_cp (data, ivs, use, cp);
4616       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4617       iv_ca_set_no_cp (data, ivs, use);
4618       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4619
4620       if (act_cost < best_cost)
4621         {
4622           best_cost = act_cost;
4623
4624           iv_ca_delta_free (&best_delta);
4625           best_delta = act_delta;
4626         }
4627       else
4628         iv_ca_delta_free (&act_delta);
4629     }
4630
4631   if (best_cost == INFTY)
4632     {
4633       for (i = 0; i < use->n_map_members; i++)
4634         {
4635           cp = use->cost_map + i;
4636           cand = cp->cand;
4637           if (!cand)
4638             continue;
4639
4640           /* Already tried this.  */
4641           if (cand->important)
4642             continue;
4643
4644           if (iv_ca_cand_used_p (ivs, cand))
4645             continue;
4646
4647           act_delta = NULL;
4648           iv_ca_set_cp (data, ivs, use, cp);
4649           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4650           iv_ca_set_no_cp (data, ivs, use);
4651           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4652                                        cp, act_delta);
4653
4654           if (act_cost < best_cost)
4655             {
4656               best_cost = act_cost;
4657
4658               if (best_delta)
4659                 iv_ca_delta_free (&best_delta);
4660               best_delta = act_delta;
4661             }
4662           else
4663             iv_ca_delta_free (&act_delta);
4664         }
4665     }
4666
4667   iv_ca_delta_commit (data, ivs, best_delta, true);
4668   iv_ca_delta_free (&best_delta);
4669
4670   return (best_cost != INFTY);
4671 }
4672
4673 /* Finds an initial assignment of candidates to uses.  */
4674
4675 static struct iv_ca *
4676 get_initial_solution (struct ivopts_data *data)
4677 {
4678   struct iv_ca *ivs = iv_ca_new (data);
4679   unsigned i;
4680
4681   for (i = 0; i < n_iv_uses (data); i++)
4682     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4683       {
4684         iv_ca_free (&ivs);
4685         return NULL;
4686       }
4687
4688   return ivs;
4689 }
4690
4691 /* Tries to improve set of induction variables IVS.  */
4692
4693 static bool
4694 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4695 {
4696   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4697   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4698   struct iv_cand *cand;
4699
4700   /* Try extending the set of induction variables by one.  */
4701   for (i = 0; i < n_iv_cands (data); i++)
4702     {
4703       cand = iv_cand (data, i);
4704
4705       if (iv_ca_cand_used_p (ivs, cand))
4706         continue;
4707
4708       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4709       if (!act_delta)
4710         continue;
4711
4712       /* If we successfully added the candidate and the set is small enough,
4713          try optimizing it by removing other candidates.  */
4714       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4715         {
4716           iv_ca_delta_commit (data, ivs, act_delta, true);
4717           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4718           iv_ca_delta_commit (data, ivs, act_delta, false);
4719           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4720         }
4721
4722       if (acost < best_cost)
4723         {
4724           best_cost = acost;
4725           iv_ca_delta_free (&best_delta);
4726           best_delta = act_delta;
4727         }
4728       else
4729         iv_ca_delta_free (&act_delta);
4730     }
4731
4732   if (!best_delta)
4733     {
4734       /* Try removing the candidates from the set instead.  */
4735       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4736
4737       /* Nothing more we can do.  */
4738       if (!best_delta)
4739         return false;
4740     }
4741
4742   iv_ca_delta_commit (data, ivs, best_delta, true);
4743   gcc_assert (best_cost == iv_ca_cost (ivs));
4744   iv_ca_delta_free (&best_delta);
4745   return true;
4746 }
4747
4748 /* Attempts to find the optimal set of induction variables.  We do simple
4749    greedy heuristic -- we try to replace at most one candidate in the selected
4750    solution and remove the unused ivs while this improves the cost.  */
4751
4752 static struct iv_ca *
4753 find_optimal_iv_set (struct ivopts_data *data)
4754 {
4755   unsigned i;
4756   struct iv_ca *set;
4757   struct iv_use *use;
4758
4759   /* Get the initial solution.  */
4760   set = get_initial_solution (data);
4761   if (!set)
4762     {
4763       if (dump_file && (dump_flags & TDF_DETAILS))
4764         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4765       return NULL;
4766     }
4767
4768   if (dump_file && (dump_flags & TDF_DETAILS))
4769     {
4770       fprintf (dump_file, "Initial set of candidates:\n");
4771       iv_ca_dump (data, dump_file, set);
4772     }
4773
4774   while (try_improve_iv_set (data, set))
4775     {
4776       if (dump_file && (dump_flags & TDF_DETAILS))
4777         {
4778           fprintf (dump_file, "Improved to:\n");
4779           iv_ca_dump (data, dump_file, set);
4780         }
4781     }
4782
4783   if (dump_file && (dump_flags & TDF_DETAILS))
4784     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4785
4786   for (i = 0; i < n_iv_uses (data); i++)
4787     {
4788       use = iv_use (data, i);
4789       use->selected = iv_ca_cand_for_use (set, use)->cand;
4790     }
4791
4792   return set;
4793 }
4794
4795 /* Creates a new induction variable corresponding to CAND.  */
4796
4797 static void
4798 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4799 {
4800   block_stmt_iterator incr_pos;
4801   tree base;
4802   bool after = false;
4803
4804   if (!cand->iv)
4805     return;
4806
4807   switch (cand->pos)
4808     {
4809     case IP_NORMAL:
4810       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4811       break;
4812
4813     case IP_END:
4814       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4815       after = true;
4816       break;
4817
4818     case IP_ORIGINAL:
4819       /* Mark that the iv is preserved.  */
4820       name_info (data, cand->var_before)->preserve_biv = true;
4821       name_info (data, cand->var_after)->preserve_biv = true;
4822
4823       /* Rewrite the increment so that it uses var_before directly.  */
4824       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4825
4826       return;
4827     }
4828
4829   gimple_add_tmp_var (cand->var_before);
4830   add_referenced_var (cand->var_before);
4831
4832   base = unshare_expr (cand->iv->base);
4833
4834   create_iv (base, unshare_expr (cand->iv->step),
4835              cand->var_before, data->current_loop,
4836              &incr_pos, after, &cand->var_before, &cand->var_after);
4837 }
4838
4839 /* Creates new induction variables described in SET.  */
4840
4841 static void
4842 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4843 {
4844   unsigned i;
4845   struct iv_cand *cand;
4846   bitmap_iterator bi;
4847
4848   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4849     {
4850       cand = iv_cand (data, i);
4851       create_new_iv (data, cand);
4852     }
4853 }
4854
4855 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4856    is true, remove also the ssa name defined by the statement.  */
4857
4858 static void
4859 remove_statement (tree stmt, bool including_defined_name)
4860 {
4861   if (TREE_CODE (stmt) == PHI_NODE)
4862     {
4863       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4864     }
4865   else
4866     {
4867       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4868
4869       bsi_remove (&bsi, true);
4870       release_defs (stmt);
4871     }
4872 }
4873
4874 /* Rewrites USE (definition of iv used in a nonlinear expression)
4875    using candidate CAND.  */
4876
4877 static void
4878 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4879                             struct iv_use *use, struct iv_cand *cand)
4880 {
4881   tree comp;
4882   tree op, stmts, tgt, ass;
4883   block_stmt_iterator bsi, pbsi;
4884
4885   /* An important special case -- if we are asked to express value of
4886      the original iv by itself, just exit; there is no need to
4887      introduce a new computation (that might also need casting the
4888      variable to unsigned and back).  */
4889   if (cand->pos == IP_ORIGINAL
4890       && cand->incremented_at == use->stmt)
4891     {
4892       tree step, ctype, utype;
4893       enum tree_code incr_code = PLUS_EXPR;
4894
4895       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4896       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4897
4898       step = cand->iv->step;
4899       ctype = TREE_TYPE (step);
4900       utype = TREE_TYPE (cand->var_after);
4901       if (TREE_CODE (step) == NEGATE_EXPR)
4902         {
4903           incr_code = MINUS_EXPR;
4904           step = TREE_OPERAND (step, 0);
4905         }
4906
4907       /* Check whether we may leave the computation unchanged.
4908          This is the case only if it does not rely on other
4909          computations in the loop -- otherwise, the computation
4910          we rely upon may be removed in remove_unused_ivs,
4911          thus leading to ICE.  */
4912       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4913       if (TREE_CODE (op) == PLUS_EXPR
4914           || TREE_CODE (op) == MINUS_EXPR)
4915         {
4916           if (TREE_OPERAND (op, 0) == cand->var_before)
4917             op = TREE_OPERAND (op, 1);
4918           else if (TREE_CODE (op) == PLUS_EXPR
4919                    && TREE_OPERAND (op, 1) == cand->var_before)
4920             op = TREE_OPERAND (op, 0);
4921           else
4922             op = NULL_TREE;
4923         }
4924       else
4925         op = NULL_TREE;
4926
4927       if (op
4928           && (TREE_CODE (op) == INTEGER_CST
4929               || operand_equal_p (op, step, 0)))
4930         return;
4931
4932       /* Otherwise, add the necessary computations to express
4933          the iv.  */
4934       op = fold_convert (ctype, cand->var_before);
4935       comp = fold_convert (utype,
4936                            build2 (incr_code, ctype, op,
4937                                    unshare_expr (step)));
4938     }
4939   else
4940     {
4941       comp = get_computation (data->current_loop, use, cand);
4942       gcc_assert (comp != NULL_TREE);
4943     }
4944
4945   switch (TREE_CODE (use->stmt))
4946     {
4947     case PHI_NODE:
4948       tgt = PHI_RESULT (use->stmt);
4949
4950       /* If we should keep the biv, do not replace it.  */
4951       if (name_info (data, tgt)->preserve_biv)
4952         return;
4953
4954       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4955       while (!bsi_end_p (pbsi)
4956              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4957         {
4958           bsi = pbsi;
4959           bsi_next (&pbsi);
4960         }
4961       break;
4962
4963     case GIMPLE_MODIFY_STMT:
4964       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4965       bsi = bsi_for_stmt (use->stmt);
4966       break;
4967
4968     default:
4969       gcc_unreachable ();
4970     }
4971
4972   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4973
4974   if (TREE_CODE (use->stmt) == PHI_NODE)
4975     {
4976       if (stmts)
4977         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4978       ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4979       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4980       remove_statement (use->stmt, false);
4981       SSA_NAME_DEF_STMT (tgt) = ass;
4982     }
4983   else
4984     {
4985       if (stmts)
4986         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4987       GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4988     }
4989 }
4990
4991 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4992    for_each_index.  */
4993
4994 static bool
4995 idx_remove_ssa_names (tree base, tree *idx,
4996                       void *data ATTRIBUTE_UNUSED)
4997 {
4998   tree *op;
4999
5000   if (TREE_CODE (*idx) == SSA_NAME)
5001     *idx = SSA_NAME_VAR (*idx);
5002
5003   if (TREE_CODE (base) == ARRAY_REF)
5004     {
5005       op = &TREE_OPERAND (base, 2);
5006       if (*op
5007           && TREE_CODE (*op) == SSA_NAME)
5008         *op = SSA_NAME_VAR (*op);
5009       op = &TREE_OPERAND (base, 3);
5010       if (*op
5011           && TREE_CODE (*op) == SSA_NAME)
5012         *op = SSA_NAME_VAR (*op);
5013     }
5014
5015   return true;
5016 }
5017
5018 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5019
5020 static tree
5021 unshare_and_remove_ssa_names (tree ref)
5022 {
5023   ref = unshare_expr (ref);
5024   for_each_index (&ref, idx_remove_ssa_names, NULL);
5025
5026   return ref;
5027 }
5028
5029 /* Extract the alias analysis info for the memory reference REF.  There are
5030    several ways how this information may be stored and what precisely is
5031    its semantics depending on the type of the reference, but there always is
5032    somewhere hidden one _DECL node that is used to determine the set of
5033    virtual operands for the reference.  The code below deciphers this jungle
5034    and extracts this single useful piece of information.  */
5035
5036 static tree
5037 get_ref_tag (tree ref, tree orig)
5038 {
5039   tree var = get_base_address (ref);
5040   tree aref = NULL_TREE, tag, sv;
5041   HOST_WIDE_INT offset, size, maxsize;
5042
5043   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5044     {
5045       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5046       if (ref)
5047         break;
5048     }
5049
5050   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5051     return unshare_expr (sv);
5052
5053   if (!var)
5054     return NULL_TREE;
5055
5056   if (TREE_CODE (var) == INDIRECT_REF)
5057     {
5058       /* If the base is a dereference of a pointer, first check its name memory
5059          tag.  If it does not have one, use its symbol memory tag.  */
5060       var = TREE_OPERAND (var, 0);
5061       if (TREE_CODE (var) != SSA_NAME)
5062         return NULL_TREE;
5063
5064       if (SSA_NAME_PTR_INFO (var))
5065         {
5066           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5067           if (tag)
5068             return tag;
5069         }
5070
5071       var = SSA_NAME_VAR (var);
5072       tag = symbol_mem_tag (var);
5073       gcc_assert (tag != NULL_TREE);
5074       return tag;
5075     }
5076   else
5077     {
5078       if (!DECL_P (var))
5079         return NULL_TREE;
5080
5081       tag = symbol_mem_tag (var);
5082       if (tag)
5083         return tag;
5084
5085       return var;
5086     }
5087 }
5088
5089 /* Copies the reference information from OLD_REF to NEW_REF.  */
5090
5091 static void
5092 copy_ref_info (tree new_ref, tree old_ref)
5093 {
5094   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5095     copy_mem_ref_info (new_ref, old_ref);
5096   else
5097     {
5098       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5099       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5100     }
5101 }
5102
5103 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5104
5105 static void
5106 rewrite_use_address (struct ivopts_data *data,
5107                      struct iv_use *use, struct iv_cand *cand)
5108 {
5109   aff_tree aff;
5110   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5111   tree ref;
5112   bool ok;
5113
5114   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5115   gcc_assert (ok);
5116   unshare_aff_combination (&aff);
5117
5118   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5119   copy_ref_info (ref, *use->op_p);
5120   *use->op_p = ref;
5121 }
5122
5123 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5124    candidate CAND.  */
5125
5126 static void
5127 rewrite_use_compare (struct ivopts_data *data,
5128                      struct iv_use *use, struct iv_cand *cand)
5129 {
5130   tree comp, *var_p, op, bound;
5131   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5132   enum tree_code compare;
5133   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5134   bool ok;
5135
5136   bound = cp->value;
5137   if (bound)
5138     {
5139       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5140       tree var_type = TREE_TYPE (var);
5141
5142       compare = iv_elimination_compare (data, use);
5143       bound = unshare_expr (fold_convert (var_type, bound));
5144       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5145
5146       *use->op_p = build2 (compare, boolean_type_node, var, op);
5147       return;
5148     }
5149
5150   /* The induction variable elimination failed; just express the original
5151      giv.  */
5152   comp = get_computation (data->current_loop, use, cand);
5153   gcc_assert (comp != NULL_TREE);
5154
5155   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5156   gcc_assert (ok);
5157
5158   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5159 }
5160
5161 /* Rewrites USE using candidate CAND.  */
5162
5163 static void
5164 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5165 {
5166   push_stmt_changes (&use->stmt);
5167
5168   switch (use->type)
5169     {
5170       case USE_NONLINEAR_EXPR:
5171         rewrite_use_nonlinear_expr (data, use, cand);
5172         break;
5173
5174       case USE_ADDRESS:
5175         rewrite_use_address (data, use, cand);
5176         break;
5177
5178       case USE_COMPARE:
5179         rewrite_use_compare (data, use, cand);
5180         break;
5181
5182       default:
5183         gcc_unreachable ();
5184     }
5185
5186   pop_stmt_changes (&use->stmt);
5187 }
5188
5189 /* Rewrite the uses using the selected induction variables.  */
5190
5191 static void
5192 rewrite_uses (struct ivopts_data *data)
5193 {
5194   unsigned i;
5195   struct iv_cand *cand;
5196   struct iv_use *use;
5197
5198   for (i = 0; i < n_iv_uses (data); i++)
5199     {
5200       use = iv_use (data, i);
5201       cand = use->selected;
5202       gcc_assert (cand);
5203
5204       rewrite_use (data, use, cand);
5205     }
5206 }
5207
5208 /* Removes the ivs that are not used after rewriting.  */
5209
5210 static void
5211 remove_unused_ivs (struct ivopts_data *data)
5212 {
5213   unsigned j;
5214   bitmap_iterator bi;
5215
5216   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5217     {
5218       struct version_info *info;
5219
5220       info = ver_info (data, j);
5221       if (info->iv
5222           && !integer_zerop (info->iv->step)
5223           && !info->inv_id
5224           && !info->iv->have_use_for
5225           && !info->preserve_biv)
5226         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5227     }
5228 }
5229
5230 /* Frees data allocated by the optimization of a single loop.  */
5231
5232 static void
5233 free_loop_data (struct ivopts_data *data)
5234 {
5235   unsigned i, j;
5236   bitmap_iterator bi;
5237   tree obj;
5238
5239   htab_empty (data->niters);
5240
5241   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5242     {
5243       struct version_info *info;
5244
5245       info = ver_info (data, i);
5246       if (info->iv)
5247         free (info->iv);
5248       info->iv = NULL;
5249       info->has_nonlin_use = false;
5250       info->preserve_biv = false;
5251       info->inv_id = 0;
5252     }
5253   bitmap_clear (data->relevant);
5254   bitmap_clear (data->important_candidates);
5255
5256   for (i = 0; i < n_iv_uses (data); i++)
5257     {
5258       struct iv_use *use = iv_use (data, i);
5259
5260       free (use->iv);
5261       BITMAP_FREE (use->related_cands);
5262       for (j = 0; j < use->n_map_members; j++)
5263         if (use->cost_map[j].depends_on)
5264           BITMAP_FREE (use->cost_map[j].depends_on);
5265       free (use->cost_map);
5266       free (use);
5267     }
5268   VEC_truncate (iv_use_p, data->iv_uses, 0);
5269
5270   for (i = 0; i < n_iv_cands (data); i++)
5271     {
5272       struct iv_cand *cand = iv_cand (data, i);
5273
5274       if (cand->iv)
5275         free (cand->iv);
5276       if (cand->depends_on)
5277         BITMAP_FREE (cand->depends_on);
5278       free (cand);
5279     }
5280   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5281
5282   if (data->version_info_size < num_ssa_names)
5283     {
5284       data->version_info_size = 2 * num_ssa_names;
5285       free (data->version_info);
5286       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5287     }
5288
5289   data->max_inv_id = 0;
5290
5291   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5292     SET_DECL_RTL (obj, NULL_RTX);
5293
5294   VEC_truncate (tree, decl_rtl_to_reset, 0);
5295 }
5296
5297 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5298    loop tree.  */
5299
5300 static void
5301 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5302 {
5303   free_loop_data (data);
5304   free (data->version_info);
5305   BITMAP_FREE (data->relevant);
5306   BITMAP_FREE (data->important_candidates);
5307   htab_delete (data->niters);
5308
5309   VEC_free (tree, heap, decl_rtl_to_reset);
5310   VEC_free (iv_use_p, heap, data->iv_uses);
5311   VEC_free (iv_cand_p, heap, data->iv_candidates);
5312 }
5313
5314 /* Optimizes the LOOP.  Returns true if anything changed.  */
5315
5316 static bool
5317 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5318 {
5319   bool changed = false;
5320   struct iv_ca *iv_ca;
5321   edge exit;
5322
5323   data->current_loop = loop;
5324
5325   if (dump_file && (dump_flags & TDF_DETAILS))
5326     {
5327       fprintf (dump_file, "Processing loop %d\n", loop->num);
5328
5329       exit = single_dom_exit (loop);
5330       if (exit)
5331         {
5332           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5333                    exit->src->index, exit->dest->index);
5334           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5335           fprintf (dump_file, "\n");
5336         }
5337
5338       fprintf (dump_file, "\n");
5339     }
5340
5341   /* For each ssa name determines whether it behaves as an induction variable
5342      in some loop.  */
5343   if (!find_induction_variables (data))
5344     goto finish;
5345
5346   /* Finds interesting uses (item 1).  */
5347   find_interesting_uses (data);
5348   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5349     goto finish;
5350
5351   /* Finds candidates for the induction variables (item 2).  */
5352   find_iv_candidates (data);
5353
5354   /* Calculates the costs (item 3, part 1).  */
5355   determine_use_iv_costs (data);
5356   determine_iv_costs (data);
5357   determine_set_costs (data);
5358
5359   /* Find the optimal set of induction variables (item 3, part 2).  */
5360   iv_ca = find_optimal_iv_set (data);
5361   if (!iv_ca)
5362     goto finish;
5363   changed = true;
5364
5365   /* Create the new induction variables (item 4, part 1).  */
5366   create_new_ivs (data, iv_ca);
5367   iv_ca_free (&iv_ca);
5368
5369   /* Rewrite the uses (item 4, part 2).  */
5370   rewrite_uses (data);
5371
5372   /* Remove the ivs that are unused after rewriting.  */
5373   remove_unused_ivs (data);
5374
5375   /* We have changed the structure of induction variables; it might happen
5376      that definitions in the scev database refer to some of them that were
5377      eliminated.  */
5378   scev_reset ();
5379
5380 finish:
5381   free_loop_data (data);
5382
5383   return changed;
5384 }
5385
5386 /* Main entry point.  Optimizes induction variables in loops.  */
5387
5388 void
5389 tree_ssa_iv_optimize (void)
5390 {
5391   struct loop *loop;
5392   struct ivopts_data data;
5393   loop_iterator li;
5394
5395   tree_ssa_iv_optimize_init (&data);
5396
5397   /* Optimize the loops starting with the innermost ones.  */
5398   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5399     {
5400       if (dump_file && (dump_flags & TDF_DETAILS))
5401         flow_loop_dump (loop, dump_file, NULL, 1);
5402
5403       tree_ssa_iv_optimize_loop (&data, loop);
5404     }
5405
5406   tree_ssa_iv_optimize_finalize (&data);
5407 }