gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "pointer-set.h"
  87 #include "hashtab.h"
  88 #include "tree-chrec.h"
  89 #include "tree-scalar-evolution.h"
  90 #include "cfgloop.h"
  91 #include "params.h"
  92 #include "langhooks.h"
  93 #include "tree-affine.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* The candidate - cost pair.  */
 135 struct cost_pair
 136 {
 137   struct iv_cand *cand; /* The candidate.  */
 138   unsigned cost;        /* The cost.  */
 139   bitmap depends_on;    /* The list of invariants that have to be
 140                            preserved.  */
 141   tree value;           /* For final value elimination, the expression for
 142                            the final value of the iv.  For iv elimination,
 143                            the new bound to compare with.  */
 144 };
 145
 146 /* Use.  */
 147 struct iv_use
 148 {
 149   unsigned id;          /* The id of the use.  */
 150   enum use_type type;   /* Type of the use.  */
 151   struct iv *iv;        /* The induction variable it is based on.  */
 152   tree stmt;            /* Statement in that it occurs.  */
 153   tree *op_p;           /* The place where it occurs.  */
 154   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 155                            important ones.  */
 156
 157   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 158   struct cost_pair *cost_map;
 159                         /* The costs wrto the iv candidates.  */
 160
 161   struct iv_cand *selected;
 162                         /* The selected candidate.  */
 163 };
 164
 165 /* The position where the iv is computed.  */
 166 enum iv_position
 167 {
 168   IP_NORMAL,            /* At the end, just before the exit condition.  */
 169   IP_END,               /* At the end of the latch block.  */
 170   IP_ORIGINAL           /* The original biv.  */
 171 };
 172
 173 /* The induction variable candidate.  */
 174 struct iv_cand
 175 {
 176   unsigned id;          /* The number of the candidate.  */
 177   bool important;       /* Whether this is an "important" candidate, i.e. such
 178                            that it should be considered by all uses.  */
 179   enum iv_position pos; /* Where it is computed.  */
 180   tree incremented_at;  /* For original biv, the statement where it is
 181                            incremented.  */
 182   tree var_before;      /* The variable used for it before increment.  */
 183   tree var_after;       /* The variable used for it after increment.  */
 184   struct iv *iv;        /* The value of the candidate.  NULL for
 185                            "pseudocandidate" used to indicate the possibility
 186                            to replace the final value of an iv by direct
 187                            computation of the value.  */
 188   unsigned cost;        /* Cost of the candidate.  */
 189   bitmap depends_on;    /* The list of invariants that are used in step of the
 190                            biv.  */
 191 };
 192
 193 /* The data used by the induction variable optimizations.  */
 194
 195 typedef struct iv_use *iv_use_p;
 196 DEF_VEC_P(iv_use_p);
 197 DEF_VEC_ALLOC_P(iv_use_p,heap);
 198
 199 typedef struct iv_cand *iv_cand_p;
 200 DEF_VEC_P(iv_cand_p);
 201 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 202
 203 struct ivopts_data
 204 {
 205   /* The currently optimized loop.  */
 206   struct loop *current_loop;
 207
 208   /* Number of registers used in it.  */
 209   unsigned regs_used;
 210
 211   /* Numbers of iterations for all exits of the current loop.  */
 212   struct pointer_map_t *niters;
 213
 214   /* The size of version_info array allocated.  */
 215   unsigned version_info_size;
 216
 217   /* The array of information for the ssa names.  */
 218   struct version_info *version_info;
 219
 220   /* The bitmap of indices in version_info whose value was changed.  */
 221   bitmap relevant;
 222
 223   /* The maximum invariant id.  */
 224   unsigned max_inv_id;
 225
 226   /* The uses of induction variables.  */
 227   VEC(iv_use_p,heap) *iv_uses;
 228
 229   /* The candidates.  */
 230   VEC(iv_cand_p,heap) *iv_candidates;
 231
 232   /* A bitmap of important candidates.  */
 233   bitmap important_candidates;
 234
 235   /* Whether to consider just related and important candidates when replacing a
 236      use.  */
 237   bool consider_all_candidates;
 238 };
 239
 240 /* An assignment of iv candidates to uses.  */
 241
 242 struct iv_ca
 243 {
 244   /* The number of uses covered by the assignment.  */
 245   unsigned upto;
 246
 247   /* Number of uses that cannot be expressed by the candidates in the set.  */
 248   unsigned bad_uses;
 249
 250   /* Candidate assigned to a use, together with the related costs.  */
 251   struct cost_pair **cand_for_use;
 252
 253   /* Number of times each candidate is used.  */
 254   unsigned *n_cand_uses;
 255
 256   /* The candidates used.  */
 257   bitmap cands;
 258
 259   /* The number of candidates in the set.  */
 260   unsigned n_cands;
 261
 262   /* Total number of registers needed.  */
 263   unsigned n_regs;
 264
 265   /* Total cost of expressing uses.  */
 266   unsigned cand_use_cost;
 267
 268   /* Total cost of candidates.  */
 269   unsigned cand_cost;
 270
 271   /* Number of times each invariant is used.  */
 272   unsigned *n_invariant_uses;
 273
 274   /* Total cost of the assignment.  */
 275   unsigned cost;
 276 };
 277
 278 /* Difference of two iv candidate assignments.  */
 279
 280 struct iv_ca_delta
 281 {
 282   /* Changed use.  */
 283   struct iv_use *use;
 284
 285   /* An old assignment (for rollback purposes).  */
 286   struct cost_pair *old_cp;
 287
 288   /* A new assignment.  */
 289   struct cost_pair *new_cp;
 290
 291   /* Next change in the list.  */
 292   struct iv_ca_delta *next_change;
 293 };
 294
 295 /* Bound on number of candidates below that all candidates are considered.  */
 296
 297 #define CONSIDER_ALL_CANDIDATES_BOUND \
 298   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 299
 300 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 301    optimizing such a loop would help, and it would take ages).  */
 302
 303 #define MAX_CONSIDERED_USES \
 304   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 305
 306 /* If there are at most this number of ivs in the set, try removing unnecessary
 307    ivs from the set always.  */
 308
 309 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 310   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 311
 312 /* The list of trees for that the decl_rtl field must be reset is stored
 313    here.  */
 314
 315 static VEC(tree,heap) *decl_rtl_to_reset;
 316
 317 /* Number of uses recorded in DATA.  */
 318
 319 static inline unsigned
 320 n_iv_uses (struct ivopts_data *data)
 321 {
 322   return VEC_length (iv_use_p, data->iv_uses);
 323 }
 324
 325 /* Ith use recorded in DATA.  */
 326
 327 static inline struct iv_use *
 328 iv_use (struct ivopts_data *data, unsigned i)
 329 {
 330   return VEC_index (iv_use_p, data->iv_uses, i);
 331 }
 332
 333 /* Number of candidates recorded in DATA.  */
 334
 335 static inline unsigned
 336 n_iv_cands (struct ivopts_data *data)
 337 {
 338   return VEC_length (iv_cand_p, data->iv_candidates);
 339 }
 340
 341 /* Ith candidate recorded in DATA.  */
 342
 343 static inline struct iv_cand *
 344 iv_cand (struct ivopts_data *data, unsigned i)
 345 {
 346   return VEC_index (iv_cand_p, data->iv_candidates, i);
 347 }
 348
 349 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 350
 351 edge
 352 single_dom_exit (struct loop *loop)
 353 {
 354   edge exit = single_exit (loop);
 355
 356   if (!exit)
 357     return NULL;
 358
 359   if (!just_once_each_iteration_p (loop, exit->src))
 360     return NULL;
 361
 362   return exit;
 363 }
 364
 365 /* Dumps information about the induction variable IV to FILE.  */
 366
 367 extern void dump_iv (FILE *, struct iv *);
 368 void
 369 dump_iv (FILE *file, struct iv *iv)
 370 {
 371   if (iv->ssa_name)
 372     {
 373       fprintf (file, "ssa name ");
 374       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 375       fprintf (file, "\n");
 376     }
 377
 378   fprintf (file, "  type ");
 379   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 380   fprintf (file, "\n");
 381
 382   if (iv->step)
 383     {
 384       fprintf (file, "  base ");
 385       print_generic_expr (file, iv->base, TDF_SLIM);
 386       fprintf (file, "\n");
 387
 388       fprintf (file, "  step ");
 389       print_generic_expr (file, iv->step, TDF_SLIM);
 390       fprintf (file, "\n");
 391     }
 392   else
 393     {
 394       fprintf (file, "  invariant ");
 395       print_generic_expr (file, iv->base, TDF_SLIM);
 396       fprintf (file, "\n");
 397     }
 398
 399   if (iv->base_object)
 400     {
 401       fprintf (file, "  base object ");
 402       print_generic_expr (file, iv->base_object, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405
 406   if (iv->biv_p)
 407     fprintf (file, "  is a biv\n");
 408 }
 409
 410 /* Dumps information about the USE to FILE.  */
 411
 412 extern void dump_use (FILE *, struct iv_use *);
 413 void
 414 dump_use (FILE *file, struct iv_use *use)
 415 {
 416   fprintf (file, "use %d\n", use->id);
 417
 418   switch (use->type)
 419     {
 420     case USE_NONLINEAR_EXPR:
 421       fprintf (file, "  generic\n");
 422       break;
 423
 424     case USE_ADDRESS:
 425       fprintf (file, "  address\n");
 426       break;
 427
 428     case USE_COMPARE:
 429       fprintf (file, "  compare\n");
 430       break;
 431
 432     default:
 433       gcc_unreachable ();
 434     }
 435
 436   fprintf (file, "  in statement ");
 437   print_generic_expr (file, use->stmt, TDF_SLIM);
 438   fprintf (file, "\n");
 439
 440   fprintf (file, "  at position ");
 441   if (use->op_p)
 442     print_generic_expr (file, *use->op_p, TDF_SLIM);
 443   fprintf (file, "\n");
 444
 445   dump_iv (file, use->iv);
 446
 447   if (use->related_cands)
 448     {
 449       fprintf (file, "  related candidates ");
 450       dump_bitmap (file, use->related_cands);
 451     }
 452 }
 453
 454 /* Dumps information about the uses to FILE.  */
 455
 456 extern void dump_uses (FILE *, struct ivopts_data *);
 457 void
 458 dump_uses (FILE *file, struct ivopts_data *data)
 459 {
 460   unsigned i;
 461   struct iv_use *use;
 462
 463   for (i = 0; i < n_iv_uses (data); i++)
 464     {
 465       use = iv_use (data, i);
 466
 467       dump_use (file, use);
 468       fprintf (file, "\n");
 469     }
 470 }
 471
 472 /* Dumps information about induction variable candidate CAND to FILE.  */
 473
 474 extern void dump_cand (FILE *, struct iv_cand *);
 475 void
 476 dump_cand (FILE *file, struct iv_cand *cand)
 477 {
 478   struct iv *iv = cand->iv;
 479
 480   fprintf (file, "candidate %d%s\n",
 481            cand->id, cand->important ? " (important)" : "");
 482
 483   if (cand->depends_on)
 484     {
 485       fprintf (file, "  depends on ");
 486       dump_bitmap (file, cand->depends_on);
 487     }
 488
 489   if (!iv)
 490     {
 491       fprintf (file, "  final value replacement\n");
 492       return;
 493     }
 494
 495   switch (cand->pos)
 496     {
 497     case IP_NORMAL:
 498       fprintf (file, "  incremented before exit test\n");
 499       break;
 500
 501     case IP_END:
 502       fprintf (file, "  incremented at end\n");
 503       break;
 504
 505     case IP_ORIGINAL:
 506       fprintf (file, "  original biv\n");
 507       break;
 508     }
 509
 510   dump_iv (file, iv);
 511 }
 512
 513 /* Returns the info for ssa version VER.  */
 514
 515 static inline struct version_info *
 516 ver_info (struct ivopts_data *data, unsigned ver)
 517 {
 518   return data->version_info + ver;
 519 }
 520
 521 /* Returns the info for ssa name NAME.  */
 522
 523 static inline struct version_info *
 524 name_info (struct ivopts_data *data, tree name)
 525 {
 526   return ver_info (data, SSA_NAME_VERSION (name));
 527 }
 528
 529 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 530    emitted in LOOP.  */
 531
 532 static bool
 533 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 534 {
 535   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 536
 537   gcc_assert (bb);
 538
 539   if (sbb == loop->latch)
 540     return true;
 541
 542   if (sbb != bb)
 543     return false;
 544
 545   return stmt == last_stmt (bb);
 546 }
 547
 548 /* Returns true if STMT if after the place where the original induction
 549    variable CAND is incremented.  */
 550
 551 static bool
 552 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 553 {
 554   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 555   basic_block stmt_bb = bb_for_stmt (stmt);
 556   block_stmt_iterator bsi;
 557
 558   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 559     return false;
 560
 561   if (stmt_bb != cand_bb)
 562     return true;
 563
 564   /* Scan the block from the end, since the original ivs are usually
 565      incremented at the end of the loop body.  */
 566   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 567     {
 568       if (bsi_stmt (bsi) == cand->incremented_at)
 569         return false;
 570       if (bsi_stmt (bsi) == stmt)
 571         return true;
 572     }
 573 }
 574
 575 /* Returns true if STMT if after the place where the induction variable
 576    CAND is incremented in LOOP.  */
 577
 578 static bool
 579 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 580 {
 581   switch (cand->pos)
 582     {
 583     case IP_END:
 584       return false;
 585
 586     case IP_NORMAL:
 587       return stmt_after_ip_normal_pos (loop, stmt);
 588
 589     case IP_ORIGINAL:
 590       return stmt_after_ip_original_pos (cand, stmt);
 591
 592     default:
 593       gcc_unreachable ();
 594     }
 595 }
 596
 597 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 598
 599 static bool
 600 abnormal_ssa_name_p (tree exp)
 601 {
 602   if (!exp)
 603     return false;
 604
 605   if (TREE_CODE (exp) != SSA_NAME)
 606     return false;
 607
 608   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 609 }
 610
 611 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 612    abnormal phi node.  Callback for for_each_index.  */
 613
 614 static bool
 615 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 616                                   void *data ATTRIBUTE_UNUSED)
 617 {
 618   if (TREE_CODE (base) == ARRAY_REF)
 619     {
 620       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 621         return false;
 622       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 623         return false;
 624     }
 625
 626   return !abnormal_ssa_name_p (*index);
 627 }
 628
 629 /* Returns true if EXPR contains a ssa name that occurs in an
 630    abnormal phi node.  */
 631
 632 bool
 633 contains_abnormal_ssa_name_p (tree expr)
 634 {
 635   enum tree_code code;
 636   enum tree_code_class class;
 637
 638   if (!expr)
 639     return false;
 640
 641   code = TREE_CODE (expr);
 642   class = TREE_CODE_CLASS (code);
 643
 644   if (code == SSA_NAME)
 645     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 646
 647   if (code == INTEGER_CST
 648       || is_gimple_min_invariant (expr))
 649     return false;
 650
 651   if (code == ADDR_EXPR)
 652     return !for_each_index (&TREE_OPERAND (expr, 0),
 653                             idx_contains_abnormal_ssa_name_p,
 654                             NULL);
 655
 656   switch (class)
 657     {
 658     case tcc_binary:
 659     case tcc_comparison:
 660       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 661         return true;
 662
 663       /* Fallthru.  */
 664     case tcc_unary:
 665       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 666         return true;
 667
 668       break;
 669
 670     default:
 671       gcc_unreachable ();
 672     }
 673
 674   return false;
 675 }
 676
 677 /*  Returns tree describing number of iterations determined from
 678     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 679
 680 static tree
 681 niter_for_exit (struct ivopts_data *data, edge exit)
 682 {
 683   struct tree_niter_desc desc;
 684   tree niter;
 685   void **slot;
 686
 687   if (!data->niters)
 688     {
 689       data->niters = pointer_map_create ();
 690       slot = NULL;
 691     }
 692   else
 693     slot = pointer_map_contains (data->niters, exit);
 694
 695   if (!slot)
 696     {
 697       /* Try to determine number of iterations.  We must know it
 698          unconditionally (i.e., without possibility of # of iterations
 699          being zero).  Also, we cannot safely work with ssa names that
 700          appear in phi nodes on abnormal edges, so that we do not create
 701          overlapping life ranges for them (PR 27283).  */
 702       if (number_of_iterations_exit (data->current_loop,
 703                                      exit, &desc, true)
 704           && integer_zerop (desc.may_be_zero)
 705           && !contains_abnormal_ssa_name_p (desc.niter))
 706         niter = desc.niter;
 707       else
 708         niter = NULL_TREE;
 709
 710       *pointer_map_insert (data->niters, exit) = niter;
 711     }
 712   else
 713     niter = *slot;
 714
 715   return niter;
 716 }
 717
 718 /* Returns tree describing number of iterations determined from
 719    single dominating exit of DATA->current_loop, or NULL if something
 720    goes wrong.  */
 721
 722 static tree
 723 niter_for_single_dom_exit (struct ivopts_data *data)
 724 {
 725   edge exit = single_dom_exit (data->current_loop);
 726
 727   if (!exit)
 728     return NULL;
 729
 730   return niter_for_exit (data, exit);
 731 }
 732
 733 /* Initializes data structures used by the iv optimization pass, stored
 734    in DATA.  */
 735
 736 static void
 737 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 738 {
 739   data->version_info_size = 2 * num_ssa_names;
 740   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 741   data->relevant = BITMAP_ALLOC (NULL);
 742   data->important_candidates = BITMAP_ALLOC (NULL);
 743   data->max_inv_id = 0;
 744   data->niters = NULL;
 745   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 746   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 747   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 748 }
 749
 750 /* Returns a memory object to that EXPR points.  In case we are able to
 751    determine that it does not point to any such object, NULL is returned.  */
 752
 753 static tree
 754 determine_base_object (tree expr)
 755 {
 756   enum tree_code code = TREE_CODE (expr);
 757   tree base, obj, op0, op1;
 758
 759   /* If this is a pointer casted to any type, we need to determine
 760      the base object for the pointer; so handle conversions before
 761      throwing away non-pointer expressions.  */
 762   if (TREE_CODE (expr) == NOP_EXPR
 763       || TREE_CODE (expr) == CONVERT_EXPR)
 764     return determine_base_object (TREE_OPERAND (expr, 0));
 765
 766   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 767     return NULL_TREE;
 768
 769   switch (code)
 770     {
 771     case INTEGER_CST:
 772       return NULL_TREE;
 773
 774     case ADDR_EXPR:
 775       obj = TREE_OPERAND (expr, 0);
 776       base = get_base_address (obj);
 777
 778       if (!base)
 779         return expr;
 780
 781       if (TREE_CODE (base) == INDIRECT_REF)
 782         return determine_base_object (TREE_OPERAND (base, 0));
 783
 784       return fold_convert (ptr_type_node,
 785                            build_fold_addr_expr (base));
 786
 787     case PLUS_EXPR:
 788     case MINUS_EXPR:
 789       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 790       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 791
 792       if (!op1)
 793         return op0;
 794
 795       if (!op0)
 796         return (code == PLUS_EXPR
 797                 ? op1
 798                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 799
 800       return fold_build2 (code, ptr_type_node, op0, op1);
 801
 802     default:
 803       return fold_convert (ptr_type_node, expr);
 804     }
 805 }
 806
 807 /* Allocates an induction variable with given initial value BASE and step STEP
 808    for loop LOOP.  */
 809
 810 static struct iv *
 811 alloc_iv (tree base, tree step)
 812 {
 813   struct iv *iv = XCNEW (struct iv);
 814   gcc_assert (step != NULL_TREE);
 815
 816   iv->base = base;
 817   iv->base_object = determine_base_object (base);
 818   iv->step = step;
 819   iv->biv_p = false;
 820   iv->have_use_for = false;
 821   iv->use_id = 0;
 822   iv->ssa_name = NULL_TREE;
 823
 824   return iv;
 825 }
 826
 827 /* Sets STEP and BASE for induction variable IV.  */
 828
 829 static void
 830 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 831 {
 832   struct version_info *info = name_info (data, iv);
 833
 834   gcc_assert (!info->iv);
 835
 836   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 837   info->iv = alloc_iv (base, step);
 838   info->iv->ssa_name = iv;
 839 }
 840
 841 /* Finds induction variable declaration for VAR.  */
 842
 843 static struct iv *
 844 get_iv (struct ivopts_data *data, tree var)
 845 {
 846   basic_block bb;
 847   tree type = TREE_TYPE (var);
 848
 849   if (!POINTER_TYPE_P (type)
 850       && !INTEGRAL_TYPE_P (type))
 851     return NULL;
 852
 853   if (!name_info (data, var)->iv)
 854     {
 855       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 856
 857       if (!bb
 858           || !flow_bb_inside_loop_p (data->current_loop, bb))
 859         set_iv (data, var, var, build_int_cst (type, 0));
 860     }
 861
 862   return name_info (data, var)->iv;
 863 }
 864
 865 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 866    not define a simple affine biv with nonzero step.  */
 867
 868 static tree
 869 determine_biv_step (tree phi)
 870 {
 871   struct loop *loop = bb_for_stmt (phi)->loop_father;
 872   tree name = PHI_RESULT (phi);
 873   affine_iv iv;
 874
 875   if (!is_gimple_reg (name))
 876     return NULL_TREE;
 877
 878   if (!simple_iv (loop, phi, name, &iv, true))
 879     return NULL_TREE;
 880
 881   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 882 }
 883
 884 /* Finds basic ivs.  */
 885
 886 static bool
 887 find_bivs (struct ivopts_data *data)
 888 {
 889   tree phi, step, type, base;
 890   bool found = false;
 891   struct loop *loop = data->current_loop;
 892
 893   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 894     {
 895       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 896         continue;
 897
 898       step = determine_biv_step (phi);
 899       if (!step)
 900         continue;
 901
 902       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 903       base = expand_simple_operations (base);
 904       if (contains_abnormal_ssa_name_p (base)
 905           || contains_abnormal_ssa_name_p (step))
 906         continue;
 907
 908       type = TREE_TYPE (PHI_RESULT (phi));
 909       base = fold_convert (type, base);
 910       if (step)
 911         step = fold_convert (type, step);
 912
 913       set_iv (data, PHI_RESULT (phi), base, step);
 914       found = true;
 915     }
 916
 917   return found;
 918 }
 919
 920 /* Marks basic ivs.  */
 921
 922 static void
 923 mark_bivs (struct ivopts_data *data)
 924 {
 925   tree phi, var;
 926   struct iv *iv, *incr_iv;
 927   struct loop *loop = data->current_loop;
 928   basic_block incr_bb;
 929
 930   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 931     {
 932       iv = get_iv (data, PHI_RESULT (phi));
 933       if (!iv)
 934         continue;
 935
 936       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 937       incr_iv = get_iv (data, var);
 938       if (!incr_iv)
 939         continue;
 940
 941       /* If the increment is in the subloop, ignore it.  */
 942       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 943       if (incr_bb->loop_father != data->current_loop
 944           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 945         continue;
 946
 947       iv->biv_p = true;
 948       incr_iv->biv_p = true;
 949     }
 950 }
 951
 952 /* Checks whether STMT defines a linear induction variable and stores its
 953    parameters to IV.  */
 954
 955 static bool
 956 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 957 {
 958   tree lhs;
 959   struct loop *loop = data->current_loop;
 960
 961   iv->base = NULL_TREE;
 962   iv->step = NULL_TREE;
 963
 964   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 965     return false;
 966
 967   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 968   if (TREE_CODE (lhs) != SSA_NAME)
 969     return false;
 970
 971   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 972     return false;
 973   iv->base = expand_simple_operations (iv->base);
 974
 975   if (contains_abnormal_ssa_name_p (iv->base)
 976       || contains_abnormal_ssa_name_p (iv->step))
 977     return false;
 978
 979   return true;
 980 }
 981
 982 /* Finds general ivs in statement STMT.  */
 983
 984 static void
 985 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 986 {
 987   affine_iv iv;
 988
 989   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 990     return;
 991
 992   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 993 }
 994
 995 /* Finds general ivs in basic block BB.  */
 996
 997 static void
 998 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
 999 {
1000   block_stmt_iterator bsi;
1001
1002   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1003     find_givs_in_stmt (data, bsi_stmt (bsi));
1004 }
1005
1006 /* Finds general ivs.  */
1007
1008 static void
1009 find_givs (struct ivopts_data *data)
1010 {
1011   struct loop *loop = data->current_loop;
1012   basic_block *body = get_loop_body_in_dom_order (loop);
1013   unsigned i;
1014
1015   for (i = 0; i < loop->num_nodes; i++)
1016     find_givs_in_bb (data, body[i]);
1017   free (body);
1018 }
1019
1020 /* For each ssa name defined in LOOP determines whether it is an induction
1021    variable and if so, its initial value and step.  */
1022
1023 static bool
1024 find_induction_variables (struct ivopts_data *data)
1025 {
1026   unsigned i;
1027   bitmap_iterator bi;
1028
1029   if (!find_bivs (data))
1030     return false;
1031
1032   find_givs (data);
1033   mark_bivs (data);
1034
1035   if (dump_file && (dump_flags & TDF_DETAILS))
1036     {
1037       tree niter = niter_for_single_dom_exit (data);
1038
1039       if (niter)
1040         {
1041           fprintf (dump_file, "  number of iterations ");
1042           print_generic_expr (dump_file, niter, TDF_SLIM);
1043           fprintf (dump_file, "\n\n");
1044         };
1045
1046       fprintf (dump_file, "Induction variables:\n\n");
1047
1048       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1049         {
1050           if (ver_info (data, i)->iv)
1051             dump_iv (dump_file, ver_info (data, i)->iv);
1052         }
1053     }
1054
1055   return true;
1056 }
1057
1058 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1059
1060 static struct iv_use *
1061 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1062             tree stmt, enum use_type use_type)
1063 {
1064   struct iv_use *use = XCNEW (struct iv_use);
1065
1066   use->id = n_iv_uses (data);
1067   use->type = use_type;
1068   use->iv = iv;
1069   use->stmt = stmt;
1070   use->op_p = use_p;
1071   use->related_cands = BITMAP_ALLOC (NULL);
1072
1073   /* To avoid showing ssa name in the dumps, if it was not reset by the
1074      caller.  */
1075   iv->ssa_name = NULL_TREE;
1076
1077   if (dump_file && (dump_flags & TDF_DETAILS))
1078     dump_use (dump_file, use);
1079
1080   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1081
1082   return use;
1083 }
1084
1085 /* Checks whether OP is a loop-level invariant and if so, records it.
1086    NONLINEAR_USE is true if the invariant is used in a way we do not
1087    handle specially.  */
1088
1089 static void
1090 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1091 {
1092   basic_block bb;
1093   struct version_info *info;
1094
1095   if (TREE_CODE (op) != SSA_NAME
1096       || !is_gimple_reg (op))
1097     return;
1098
1099   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1100   if (bb
1101       && flow_bb_inside_loop_p (data->current_loop, bb))
1102     return;
1103
1104   info = name_info (data, op);
1105   info->name = op;
1106   info->has_nonlin_use |= nonlinear_use;
1107   if (!info->inv_id)
1108     info->inv_id = ++data->max_inv_id;
1109   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1110 }
1111
1112 /* Checks whether the use OP is interesting and if so, records it.  */
1113
1114 static struct iv_use *
1115 find_interesting_uses_op (struct ivopts_data *data, tree op)
1116 {
1117   struct iv *iv;
1118   struct iv *civ;
1119   tree stmt;
1120   struct iv_use *use;
1121
1122   if (TREE_CODE (op) != SSA_NAME)
1123     return NULL;
1124
1125   iv = get_iv (data, op);
1126   if (!iv)
1127     return NULL;
1128
1129   if (iv->have_use_for)
1130     {
1131       use = iv_use (data, iv->use_id);
1132
1133       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1134       return use;
1135     }
1136
1137   if (integer_zerop (iv->step))
1138     {
1139       record_invariant (data, op, true);
1140       return NULL;
1141     }
1142   iv->have_use_for = true;
1143
1144   civ = XNEW (struct iv);
1145   *civ = *iv;
1146
1147   stmt = SSA_NAME_DEF_STMT (op);
1148   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1149               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1150
1151   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1152   iv->use_id = use->id;
1153
1154   return use;
1155 }
1156
1157 /* Given a condition *COND_P, checks whether it is a compare of an induction
1158    variable and an invariant.  If this is the case, CONTROL_VAR is set
1159    to location of the iv, BOUND to the location of the invariant,
1160    IV_VAR and IV_BOUND are set to the corresponding induction variable
1161    descriptions, and true is returned.  If this is not the case,
1162    CONTROL_VAR and BOUND are set to the arguments of the condition and
1163    false is returned.  */
1164
1165 static bool
1166 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1167                        tree **control_var, tree **bound,
1168                        struct iv **iv_var, struct iv **iv_bound)
1169 {
1170   /* The nodes returned when COND has just one operand.  Note that you should
1171      not modify anything in BOUND or IV_BOUND because of this.  */
1172   static struct iv const_iv;
1173   static tree zero;
1174   tree cond = *cond_p;
1175   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1176   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1177   bool ret = false;
1178
1179   zero = integer_zero_node;
1180   const_iv.step = integer_zero_node;
1181
1182   if (TREE_CODE (cond) == SSA_NAME)
1183     {
1184       op0 = cond_p;
1185       iv0 = get_iv (data, cond);
1186       ret = (iv0 && !integer_zerop (iv0->step));
1187       goto end;
1188     }
1189
1190   if (!COMPARISON_CLASS_P (cond))
1191     {
1192       op0 = cond_p;
1193       goto end;
1194     }
1195
1196   op0 = &TREE_OPERAND (cond, 0);
1197   op1 = &TREE_OPERAND (cond, 1);
1198   if (TREE_CODE (*op0) == SSA_NAME)
1199     iv0 = get_iv (data, *op0);
1200   if (TREE_CODE (*op1) == SSA_NAME)
1201     iv1 = get_iv (data, *op1);
1202
1203   /* Exactly one of the compared values must be an iv, and the other one must
1204      be an invariant.  */
1205   if (!iv0 || !iv1)
1206     goto end;
1207
1208   if (integer_zerop (iv0->step))
1209     {
1210       /* Control variable may be on the other side.  */
1211       tmp_op = op0; op0 = op1; op1 = tmp_op;
1212       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1213     }
1214   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1215
1216 end:
1217   if (control_var)
1218     *control_var = op0;;
1219   if (iv_var)
1220     *iv_var = iv0;;
1221   if (bound)
1222     *bound = op1;
1223   if (iv_bound)
1224     *iv_bound = iv1;
1225
1226   return ret;
1227 }
1228
1229 /* Checks whether the condition *COND_P in STMT is interesting
1230    and if so, records it.  */
1231
1232 static void
1233 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1234 {
1235   tree *var_p, *bound_p;
1236   struct iv *var_iv, *civ;
1237
1238   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1239     {
1240       find_interesting_uses_op (data, *var_p);
1241       find_interesting_uses_op (data, *bound_p);
1242       return;
1243     }
1244
1245   civ = XNEW (struct iv);
1246   *civ = *var_iv;
1247   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1248 }
1249
1250 /* Returns true if expression EXPR is obviously invariant in LOOP,
1251    i.e. if all its operands are defined outside of the LOOP.  */
1252
1253 bool
1254 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1255 {
1256   basic_block def_bb;
1257   unsigned i, len;
1258
1259   if (is_gimple_min_invariant (expr))
1260     return true;
1261
1262   if (TREE_CODE (expr) == SSA_NAME)
1263     {
1264       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1265       if (def_bb
1266           && flow_bb_inside_loop_p (loop, def_bb))
1267         return false;
1268
1269       return true;
1270     }
1271
1272   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1273     return false;
1274
1275   len = TREE_OPERAND_LENGTH (expr);
1276   for (i = 0; i < len; i++)
1277     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1278       return false;
1279
1280   return true;
1281 }
1282
1283 /* Cumulates the steps of indices into DATA and replaces their values with the
1284    initial ones.  Returns false when the value of the index cannot be determined.
1285    Callback for for_each_index.  */
1286
1287 struct ifs_ivopts_data
1288 {
1289   struct ivopts_data *ivopts_data;
1290   tree stmt;
1291   tree step;
1292 };
1293
1294 static bool
1295 idx_find_step (tree base, tree *idx, void *data)
1296 {
1297   struct ifs_ivopts_data *dta = data;
1298   struct iv *iv;
1299   tree step, iv_base, iv_step, lbound, off;
1300   struct loop *loop = dta->ivopts_data->current_loop;
1301
1302   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1303       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1304     return false;
1305
1306   /* If base is a component ref, require that the offset of the reference
1307      be invariant.  */
1308   if (TREE_CODE (base) == COMPONENT_REF)
1309     {
1310       off = component_ref_field_offset (base);
1311       return expr_invariant_in_loop_p (loop, off);
1312     }
1313
1314   /* If base is array, first check whether we will be able to move the
1315      reference out of the loop (in order to take its address in strength
1316      reduction).  In order for this to work we need both lower bound
1317      and step to be loop invariants.  */
1318   if (TREE_CODE (base) == ARRAY_REF)
1319     {
1320       step = array_ref_element_size (base);
1321       lbound = array_ref_low_bound (base);
1322
1323       if (!expr_invariant_in_loop_p (loop, step)
1324           || !expr_invariant_in_loop_p (loop, lbound))
1325         return false;
1326     }
1327
1328   if (TREE_CODE (*idx) != SSA_NAME)
1329     return true;
1330
1331   iv = get_iv (dta->ivopts_data, *idx);
1332   if (!iv)
1333     return false;
1334
1335   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1336           *&x[0], which is not folded and does not trigger the
1337           ARRAY_REF path below.  */
1338   *idx = iv->base;
1339
1340   if (integer_zerop (iv->step))
1341     return true;
1342
1343   if (TREE_CODE (base) == ARRAY_REF)
1344     {
1345       step = array_ref_element_size (base);
1346
1347       /* We only handle addresses whose step is an integer constant.  */
1348       if (TREE_CODE (step) != INTEGER_CST)
1349         return false;
1350     }
1351   else
1352     /* The step for pointer arithmetics already is 1 byte.  */
1353     step = build_int_cst (sizetype, 1);
1354
1355   iv_base = iv->base;
1356   iv_step = iv->step;
1357   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1358                             sizetype, &iv_base, &iv_step, dta->stmt,
1359                             false))
1360     {
1361       /* The index might wrap.  */
1362       return false;
1363     }
1364
1365   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1366   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1367
1368   return true;
1369 }
1370
1371 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1372    object is passed to it in DATA.  */
1373
1374 static bool
1375 idx_record_use (tree base, tree *idx,
1376                 void *data)
1377 {
1378   find_interesting_uses_op (data, *idx);
1379   if (TREE_CODE (base) == ARRAY_REF)
1380     {
1381       find_interesting_uses_op (data, array_ref_element_size (base));
1382       find_interesting_uses_op (data, array_ref_low_bound (base));
1383     }
1384   return true;
1385 }
1386
1387 /* Returns true if memory reference REF may be unaligned.  */
1388
1389 static bool
1390 may_be_unaligned_p (tree ref)
1391 {
1392   tree base;
1393   tree base_type;
1394   HOST_WIDE_INT bitsize;
1395   HOST_WIDE_INT bitpos;
1396   tree toffset;
1397   enum machine_mode mode;
1398   int unsignedp, volatilep;
1399   unsigned base_align;
1400
1401   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1402      thus they are not misaligned.  */
1403   if (TREE_CODE (ref) == TARGET_MEM_REF)
1404     return false;
1405
1406   /* The test below is basically copy of what expr.c:normal_inner_ref
1407      does to check whether the object must be loaded by parts when
1408      STRICT_ALIGNMENT is true.  */
1409   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1410                               &unsignedp, &volatilep, true);
1411   base_type = TREE_TYPE (base);
1412   base_align = TYPE_ALIGN (base_type);
1413
1414   if (mode != BLKmode
1415       && (base_align < GET_MODE_ALIGNMENT (mode)
1416           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1417           || bitpos % BITS_PER_UNIT != 0))
1418     return true;
1419
1420   return false;
1421 }
1422
1423 /* Return true if EXPR may be non-addressable.   */
1424
1425 static bool
1426 may_be_nonaddressable_p (tree expr)
1427 {
1428   switch (TREE_CODE (expr))
1429     {
1430     case COMPONENT_REF:
1431       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1432              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1433
1434     case ARRAY_REF:
1435     case ARRAY_RANGE_REF:
1436       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1437
1438     case VIEW_CONVERT_EXPR:
1439       /* This kind of view-conversions may wrap non-addressable objects
1440          and make them look addressable.  After some processing the
1441          non-addressability may be uncovered again, causing ADDR_EXPRs
1442          of inappropriate objects to be built.  */
1443       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1444              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1445
1446     default:
1447       break;
1448     }
1449
1450   return false;
1451 }
1452
1453 /* Finds addresses in *OP_P inside STMT.  */
1454
1455 static void
1456 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1457 {
1458   tree base = *op_p, step = build_int_cst (sizetype, 0);
1459   struct iv *civ;
1460   struct ifs_ivopts_data ifs_ivopts_data;
1461
1462   /* Do not play with volatile memory references.  A bit too conservative,
1463      perhaps, but safe.  */
1464   if (stmt_ann (stmt)->has_volatile_ops)
1465     goto fail;
1466
1467   /* Ignore bitfields for now.  Not really something terribly complicated
1468      to handle.  TODO.  */
1469   if (TREE_CODE (base) == BIT_FIELD_REF)
1470     goto fail;
1471
1472   if (may_be_nonaddressable_p (base))
1473     goto fail;
1474
1475   if (STRICT_ALIGNMENT
1476       && may_be_unaligned_p (base))
1477     goto fail;
1478
1479   base = unshare_expr (base);
1480
1481   if (TREE_CODE (base) == TARGET_MEM_REF)
1482     {
1483       tree type = build_pointer_type (TREE_TYPE (base));
1484       tree astep;
1485
1486       if (TMR_BASE (base)
1487           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1488         {
1489           civ = get_iv (data, TMR_BASE (base));
1490           if (!civ)
1491             goto fail;
1492
1493           TMR_BASE (base) = civ->base;
1494           step = civ->step;
1495         }
1496       if (TMR_INDEX (base)
1497           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1498         {
1499           civ = get_iv (data, TMR_INDEX (base));
1500           if (!civ)
1501             goto fail;
1502
1503           TMR_INDEX (base) = civ->base;
1504           astep = civ->step;
1505
1506           if (astep)
1507             {
1508               if (TMR_STEP (base))
1509                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1510
1511               step = fold_build2 (PLUS_EXPR, type, step, astep);
1512             }
1513         }
1514
1515       if (integer_zerop (step))
1516         goto fail;
1517       base = tree_mem_ref_addr (type, base);
1518     }
1519   else
1520     {
1521       ifs_ivopts_data.ivopts_data = data;
1522       ifs_ivopts_data.stmt = stmt;
1523       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1524       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1525           || integer_zerop (ifs_ivopts_data.step))
1526         goto fail;
1527       step = ifs_ivopts_data.step;
1528
1529       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1530       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1531
1532       base = build_fold_addr_expr (base);
1533
1534       /* Substituting bases of IVs into the base expression might
1535          have caused folding opportunities.  */
1536       if (TREE_CODE (base) == ADDR_EXPR)
1537         {
1538           tree *ref = &TREE_OPERAND (base, 0);
1539           while (handled_component_p (*ref))
1540             ref = &TREE_OPERAND (*ref, 0);
1541           if (TREE_CODE (*ref) == INDIRECT_REF)
1542             *ref = fold_indirect_ref (*ref);
1543         }
1544     }
1545
1546   civ = alloc_iv (base, step);
1547   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1548   return;
1549
1550 fail:
1551   for_each_index (op_p, idx_record_use, data);
1552 }
1553
1554 /* Finds and records invariants used in STMT.  */
1555
1556 static void
1557 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1558 {
1559   ssa_op_iter iter;
1560   use_operand_p use_p;
1561   tree op;
1562
1563   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1564     {
1565       op = USE_FROM_PTR (use_p);
1566       record_invariant (data, op, false);
1567     }
1568 }
1569
1570 /* Finds interesting uses of induction variables in the statement STMT.  */
1571
1572 static void
1573 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1574 {
1575   struct iv *iv;
1576   tree op, lhs, rhs;
1577   ssa_op_iter iter;
1578   use_operand_p use_p;
1579
1580   find_invariants_stmt (data, stmt);
1581
1582   if (TREE_CODE (stmt) == COND_EXPR)
1583     {
1584       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1585       return;
1586     }
1587
1588   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1589     {
1590       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1591       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1592
1593       if (TREE_CODE (lhs) == SSA_NAME)
1594         {
1595           /* If the statement defines an induction variable, the uses are not
1596              interesting by themselves.  */
1597
1598           iv = get_iv (data, lhs);
1599
1600           if (iv && !integer_zerop (iv->step))
1601             return;
1602         }
1603
1604       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1605         {
1606         case tcc_comparison:
1607           find_interesting_uses_cond (data, stmt,
1608                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1609           return;
1610
1611         case tcc_reference:
1612           find_interesting_uses_address (data, stmt,
1613                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1614           if (REFERENCE_CLASS_P (lhs))
1615             find_interesting_uses_address (data, stmt,
1616                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1617           return;
1618
1619         default: ;
1620         }
1621
1622       if (REFERENCE_CLASS_P (lhs)
1623           && is_gimple_val (rhs))
1624         {
1625           find_interesting_uses_address (data, stmt,
1626                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1627           find_interesting_uses_op (data, rhs);
1628           return;
1629         }
1630
1631       /* TODO -- we should also handle address uses of type
1632
1633          memory = call (whatever);
1634
1635          and
1636
1637          call (memory).  */
1638     }
1639
1640   if (TREE_CODE (stmt) == PHI_NODE
1641       && bb_for_stmt (stmt) == data->current_loop->header)
1642     {
1643       lhs = PHI_RESULT (stmt);
1644       iv = get_iv (data, lhs);
1645
1646       if (iv && !integer_zerop (iv->step))
1647         return;
1648     }
1649
1650   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1651     {
1652       op = USE_FROM_PTR (use_p);
1653
1654       if (TREE_CODE (op) != SSA_NAME)
1655         continue;
1656
1657       iv = get_iv (data, op);
1658       if (!iv)
1659         continue;
1660
1661       find_interesting_uses_op (data, op);
1662     }
1663 }
1664
1665 /* Finds interesting uses of induction variables outside of loops
1666    on loop exit edge EXIT.  */
1667
1668 static void
1669 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1670 {
1671   tree phi, def;
1672
1673   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1674     {
1675       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1676       if (is_gimple_reg (def))
1677         find_interesting_uses_op (data, def);
1678     }
1679 }
1680
1681 /* Finds uses of the induction variables that are interesting.  */
1682
1683 static void
1684 find_interesting_uses (struct ivopts_data *data)
1685 {
1686   basic_block bb;
1687   block_stmt_iterator bsi;
1688   tree phi;
1689   basic_block *body = get_loop_body (data->current_loop);
1690   unsigned i;
1691   struct version_info *info;
1692   edge e;
1693
1694   if (dump_file && (dump_flags & TDF_DETAILS))
1695     fprintf (dump_file, "Uses:\n\n");
1696
1697   for (i = 0; i < data->current_loop->num_nodes; i++)
1698     {
1699       edge_iterator ei;
1700       bb = body[i];
1701
1702       FOR_EACH_EDGE (e, ei, bb->succs)
1703         if (e->dest != EXIT_BLOCK_PTR
1704             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1705           find_interesting_uses_outside (data, e);
1706
1707       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1708         find_interesting_uses_stmt (data, phi);
1709       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1710         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1711     }
1712
1713   if (dump_file && (dump_flags & TDF_DETAILS))
1714     {
1715       bitmap_iterator bi;
1716
1717       fprintf (dump_file, "\n");
1718
1719       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1720         {
1721           info = ver_info (data, i);
1722           if (info->inv_id)
1723             {
1724               fprintf (dump_file, "  ");
1725               print_generic_expr (dump_file, info->name, TDF_SLIM);
1726               fprintf (dump_file, " is invariant (%d)%s\n",
1727                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1728             }
1729         }
1730
1731       fprintf (dump_file, "\n");
1732     }
1733
1734   free (body);
1735 }
1736
1737 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1738    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1739    we are at the top-level of the processed address.  */
1740
1741 static tree
1742 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1743                 unsigned HOST_WIDE_INT *offset)
1744 {
1745   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1746   enum tree_code code;
1747   tree type, orig_type = TREE_TYPE (expr);
1748   unsigned HOST_WIDE_INT off0, off1, st;
1749   tree orig_expr = expr;
1750
1751   STRIP_NOPS (expr);
1752
1753   type = TREE_TYPE (expr);
1754   code = TREE_CODE (expr);
1755   *offset = 0;
1756
1757   switch (code)
1758     {
1759     case INTEGER_CST:
1760       if (!cst_and_fits_in_hwi (expr)
1761           || integer_zerop (expr))
1762         return orig_expr;
1763
1764       *offset = int_cst_value (expr);
1765       return build_int_cst (orig_type, 0);
1766
1767     case PLUS_EXPR:
1768     case MINUS_EXPR:
1769       op0 = TREE_OPERAND (expr, 0);
1770       op1 = TREE_OPERAND (expr, 1);
1771
1772       op0 = strip_offset_1 (op0, false, false, &off0);
1773       op1 = strip_offset_1 (op1, false, false, &off1);
1774
1775       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1776       if (op0 == TREE_OPERAND (expr, 0)
1777           && op1 == TREE_OPERAND (expr, 1))
1778         return orig_expr;
1779
1780       if (integer_zerop (op1))
1781         expr = op0;
1782       else if (integer_zerop (op0))
1783         {
1784           if (code == PLUS_EXPR)
1785             expr = op1;
1786           else
1787             expr = fold_build1 (NEGATE_EXPR, type, op1);
1788         }
1789       else
1790         expr = fold_build2 (code, type, op0, op1);
1791
1792       return fold_convert (orig_type, expr);
1793
1794     case ARRAY_REF:
1795       if (!inside_addr)
1796         return orig_expr;
1797
1798       step = array_ref_element_size (expr);
1799       if (!cst_and_fits_in_hwi (step))
1800         break;
1801
1802       st = int_cst_value (step);
1803       op1 = TREE_OPERAND (expr, 1);
1804       op1 = strip_offset_1 (op1, false, false, &off1);
1805       *offset = off1 * st;
1806
1807       if (top_compref
1808           && integer_zerop (op1))
1809         {
1810           /* Strip the component reference completely.  */
1811           op0 = TREE_OPERAND (expr, 0);
1812           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1813           *offset += off0;
1814           return op0;
1815         }
1816       break;
1817
1818     case COMPONENT_REF:
1819       if (!inside_addr)
1820         return orig_expr;
1821
1822       tmp = component_ref_field_offset (expr);
1823       if (top_compref
1824           && cst_and_fits_in_hwi (tmp))
1825         {
1826           /* Strip the component reference completely.  */
1827           op0 = TREE_OPERAND (expr, 0);
1828           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1829           *offset = off0 + int_cst_value (tmp);
1830           return op0;
1831         }
1832       break;
1833
1834     case ADDR_EXPR:
1835       op0 = TREE_OPERAND (expr, 0);
1836       op0 = strip_offset_1 (op0, true, true, &off0);
1837       *offset += off0;
1838
1839       if (op0 == TREE_OPERAND (expr, 0))
1840         return orig_expr;
1841
1842       expr = build_fold_addr_expr (op0);
1843       return fold_convert (orig_type, expr);
1844
1845     case INDIRECT_REF:
1846       inside_addr = false;
1847       break;
1848
1849     default:
1850       return orig_expr;
1851     }
1852
1853   /* Default handling of expressions for that we want to recurse into
1854      the first operand.  */
1855   op0 = TREE_OPERAND (expr, 0);
1856   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1857   *offset += off0;
1858
1859   if (op0 == TREE_OPERAND (expr, 0)
1860       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1861     return orig_expr;
1862
1863   expr = copy_node (expr);
1864   TREE_OPERAND (expr, 0) = op0;
1865   if (op1)
1866     TREE_OPERAND (expr, 1) = op1;
1867
1868   /* Inside address, we might strip the top level component references,
1869      thus changing type of the expression.  Handling of ADDR_EXPR
1870      will fix that.  */
1871   expr = fold_convert (orig_type, expr);
1872
1873   return expr;
1874 }
1875
1876 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1877
1878 static tree
1879 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1880 {
1881   return strip_offset_1 (expr, false, false, offset);
1882 }
1883
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885    We return unsigned type with the same precision, which avoids problems
1886    with overflows.  */
1887
1888 static tree
1889 generic_type_for (tree type)
1890 {
1891   if (POINTER_TYPE_P (type))
1892     return unsigned_type_for (type);
1893
1894   if (TYPE_UNSIGNED (type))
1895     return type;
1896
1897   return unsigned_type_for (type);
1898 }
1899
1900 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1901    the bitmap to that we should store it.  */
1902
1903 static struct ivopts_data *fd_ivopts_data;
1904 static tree
1905 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1906 {
1907   bitmap *depends_on = data;
1908   struct version_info *info;
1909
1910   if (TREE_CODE (*expr_p) != SSA_NAME)
1911     return NULL_TREE;
1912   info = name_info (fd_ivopts_data, *expr_p);
1913
1914   if (!info->inv_id || info->has_nonlin_use)
1915     return NULL_TREE;
1916
1917   if (!*depends_on)
1918     *depends_on = BITMAP_ALLOC (NULL);
1919   bitmap_set_bit (*depends_on, info->inv_id);
1920
1921   return NULL_TREE;
1922 }
1923
1924 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1925    position to POS.  If USE is not NULL, the candidate is set as related to
1926    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1927    replacement of the final value of the iv by a direct computation.  */
1928
1929 static struct iv_cand *
1930 add_candidate_1 (struct ivopts_data *data,
1931                  tree base, tree step, bool important, enum iv_position pos,
1932                  struct iv_use *use, tree incremented_at)
1933 {
1934   unsigned i;
1935   struct iv_cand *cand = NULL;
1936   tree type, orig_type;
1937
1938   if (base)
1939     {
1940       orig_type = TREE_TYPE (base);
1941       type = generic_type_for (orig_type);
1942       if (type != orig_type)
1943         {
1944           base = fold_convert (type, base);
1945           step = fold_convert (type, step);
1946         }
1947     }
1948
1949   for (i = 0; i < n_iv_cands (data); i++)
1950     {
1951       cand = iv_cand (data, i);
1952
1953       if (cand->pos != pos)
1954         continue;
1955
1956       if (cand->incremented_at != incremented_at)
1957         continue;
1958
1959       if (!cand->iv)
1960         {
1961           if (!base && !step)
1962             break;
1963
1964           continue;
1965         }
1966
1967       if (!base && !step)
1968         continue;
1969
1970       if (operand_equal_p (base, cand->iv->base, 0)
1971           && operand_equal_p (step, cand->iv->step, 0))
1972         break;
1973     }
1974
1975   if (i == n_iv_cands (data))
1976     {
1977       cand = XCNEW (struct iv_cand);
1978       cand->id = i;
1979
1980       if (!base && !step)
1981         cand->iv = NULL;
1982       else
1983         cand->iv = alloc_iv (base, step);
1984
1985       cand->pos = pos;
1986       if (pos != IP_ORIGINAL && cand->iv)
1987         {
1988           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1989           cand->var_after = cand->var_before;
1990         }
1991       cand->important = important;
1992       cand->incremented_at = incremented_at;
1993       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1994
1995       if (step
1996           && TREE_CODE (step) != INTEGER_CST)
1997         {
1998           fd_ivopts_data = data;
1999           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2000         }
2001
2002       if (dump_file && (dump_flags & TDF_DETAILS))
2003         dump_cand (dump_file, cand);
2004     }
2005
2006   if (important && !cand->important)
2007     {
2008       cand->important = true;
2009       if (dump_file && (dump_flags & TDF_DETAILS))
2010         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2011     }
2012
2013   if (use)
2014     {
2015       bitmap_set_bit (use->related_cands, i);
2016       if (dump_file && (dump_flags & TDF_DETAILS))
2017         fprintf (dump_file, "Candidate %d is related to use %d\n",
2018                  cand->id, use->id);
2019     }
2020
2021   return cand;
2022 }
2023
2024 /* Returns true if incrementing the induction variable at the end of the LOOP
2025    is allowed.
2026
2027    The purpose is to avoid splitting latch edge with a biv increment, thus
2028    creating a jump, possibly confusing other optimization passes and leaving
2029    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2030    is not available (so we do not have a better alternative), or if the latch
2031    edge is already nonempty.  */
2032
2033 static bool
2034 allow_ip_end_pos_p (struct loop *loop)
2035 {
2036   if (!ip_normal_pos (loop))
2037     return true;
2038
2039   if (!empty_block_p (ip_end_pos (loop)))
2040     return true;
2041
2042   return false;
2043 }
2044
2045 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2046    position to POS.  If USE is not NULL, the candidate is set as related to
2047    it.  The candidate computation is scheduled on all available positions.  */
2048
2049 static void
2050 add_candidate (struct ivopts_data *data,
2051                tree base, tree step, bool important, struct iv_use *use)
2052 {
2053   if (ip_normal_pos (data->current_loop))
2054     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2055   if (ip_end_pos (data->current_loop)
2056       && allow_ip_end_pos_p (data->current_loop))
2057     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2058 }
2059
2060 /* Add a standard "0 + 1 * iteration" iv candidate for a
2061    type with SIZE bits.  */
2062
2063 static void
2064 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2065                                      unsigned int size)
2066 {
2067   tree type = lang_hooks.types.type_for_size (size, true);
2068   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2069                  true, NULL);
2070 }
2071
2072 /* Adds standard iv candidates.  */
2073
2074 static void
2075 add_standard_iv_candidates (struct ivopts_data *data)
2076 {
2077   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2078
2079   /* The same for a double-integer type if it is still fast enough.  */
2080   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2081     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2082 }
2083
2084
2085 /* Adds candidates bases on the old induction variable IV.  */
2086
2087 static void
2088 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2089 {
2090   tree phi, def;
2091   struct iv_cand *cand;
2092
2093   add_candidate (data, iv->base, iv->step, true, NULL);
2094
2095   /* The same, but with initial value zero.  */
2096   add_candidate (data,
2097                  build_int_cst (TREE_TYPE (iv->base), 0),
2098                  iv->step, true, NULL);
2099
2100   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2101   if (TREE_CODE (phi) == PHI_NODE)
2102     {
2103       /* Additionally record the possibility of leaving the original iv
2104          untouched.  */
2105       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2106       cand = add_candidate_1 (data,
2107                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2108                               SSA_NAME_DEF_STMT (def));
2109       cand->var_before = iv->ssa_name;
2110       cand->var_after = def;
2111     }
2112 }
2113
2114 /* Adds candidates based on the old induction variables.  */
2115
2116 static void
2117 add_old_ivs_candidates (struct ivopts_data *data)
2118 {
2119   unsigned i;
2120   struct iv *iv;
2121   bitmap_iterator bi;
2122
2123   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2124     {
2125       iv = ver_info (data, i)->iv;
2126       if (iv && iv->biv_p && !integer_zerop (iv->step))
2127         add_old_iv_candidates (data, iv);
2128     }
2129 }
2130
2131 /* Adds candidates based on the value of the induction variable IV and USE.  */
2132
2133 static void
2134 add_iv_value_candidates (struct ivopts_data *data,
2135                          struct iv *iv, struct iv_use *use)
2136 {
2137   unsigned HOST_WIDE_INT offset;
2138   tree base;
2139
2140   add_candidate (data, iv->base, iv->step, false, use);
2141
2142   /* The same, but with initial value zero.  Make such variable important,
2143      since it is generic enough so that possibly many uses may be based
2144      on it.  */
2145   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2146                  iv->step, true, use);
2147
2148   /* Third, try removing the constant offset.  */
2149   base = strip_offset (iv->base, &offset);
2150   if (offset)
2151     add_candidate (data, base, iv->step, false, use);
2152 }
2153
2154 /* Adds candidates based on the uses.  */
2155
2156 static void
2157 add_derived_ivs_candidates (struct ivopts_data *data)
2158 {
2159   unsigned i;
2160
2161   for (i = 0; i < n_iv_uses (data); i++)
2162     {
2163       struct iv_use *use = iv_use (data, i);
2164
2165       if (!use)
2166         continue;
2167
2168       switch (use->type)
2169         {
2170         case USE_NONLINEAR_EXPR:
2171         case USE_COMPARE:
2172         case USE_ADDRESS:
2173           /* Just add the ivs based on the value of the iv used here.  */
2174           add_iv_value_candidates (data, use->iv, use);
2175           break;
2176
2177         default:
2178           gcc_unreachable ();
2179         }
2180     }
2181 }
2182
2183 /* Record important candidates and add them to related_cands bitmaps
2184    if needed.  */
2185
2186 static void
2187 record_important_candidates (struct ivopts_data *data)
2188 {
2189   unsigned i;
2190   struct iv_use *use;
2191
2192   for (i = 0; i < n_iv_cands (data); i++)
2193     {
2194       struct iv_cand *cand = iv_cand (data, i);
2195
2196       if (cand->important)
2197         bitmap_set_bit (data->important_candidates, i);
2198     }
2199
2200   data->consider_all_candidates = (n_iv_cands (data)
2201                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2202
2203   if (data->consider_all_candidates)
2204     {
2205       /* We will not need "related_cands" bitmaps in this case,
2206          so release them to decrease peak memory consumption.  */
2207       for (i = 0; i < n_iv_uses (data); i++)
2208         {
2209           use = iv_use (data, i);
2210           BITMAP_FREE (use->related_cands);
2211         }
2212     }
2213   else
2214     {
2215       /* Add important candidates to the related_cands bitmaps.  */
2216       for (i = 0; i < n_iv_uses (data); i++)
2217         bitmap_ior_into (iv_use (data, i)->related_cands,
2218                          data->important_candidates);
2219     }
2220 }
2221
2222 /* Finds the candidates for the induction variables.  */
2223
2224 static void
2225 find_iv_candidates (struct ivopts_data *data)
2226 {
2227   /* Add commonly used ivs.  */
2228   add_standard_iv_candidates (data);
2229
2230   /* Add old induction variables.  */
2231   add_old_ivs_candidates (data);
2232
2233   /* Add induction variables derived from uses.  */
2234   add_derived_ivs_candidates (data);
2235
2236   /* Record the important candidates.  */
2237   record_important_candidates (data);
2238 }
2239
2240 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2241    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2242    we allocate a simple list to every use.  */
2243
2244 static void
2245 alloc_use_cost_map (struct ivopts_data *data)
2246 {
2247   unsigned i, size, s, j;
2248
2249   for (i = 0; i < n_iv_uses (data); i++)
2250     {
2251       struct iv_use *use = iv_use (data, i);
2252       bitmap_iterator bi;
2253
2254       if (data->consider_all_candidates)
2255         size = n_iv_cands (data);
2256       else
2257         {
2258           s = 0;
2259           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2260             {
2261               s++;
2262             }
2263
2264           /* Round up to the power of two, so that moduling by it is fast.  */
2265           for (size = 1; size < s; size <<= 1)
2266             continue;
2267         }
2268
2269       use->n_map_members = size;
2270       use->cost_map = XCNEWVEC (struct cost_pair, size);
2271     }
2272 }
2273
2274 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2275    on invariants DEPENDS_ON and that the value used in expressing it
2276    is VALUE.*/
2277
2278 static void
2279 set_use_iv_cost (struct ivopts_data *data,
2280                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2281                  bitmap depends_on, tree value)
2282 {
2283   unsigned i, s;
2284
2285   if (cost == INFTY)
2286     {
2287       BITMAP_FREE (depends_on);
2288       return;
2289     }
2290
2291   if (data->consider_all_candidates)
2292     {
2293       use->cost_map[cand->id].cand = cand;
2294       use->cost_map[cand->id].cost = cost;
2295       use->cost_map[cand->id].depends_on = depends_on;
2296       use->cost_map[cand->id].value = value;
2297       return;
2298     }
2299
2300   /* n_map_members is a power of two, so this computes modulo.  */
2301   s = cand->id & (use->n_map_members - 1);
2302   for (i = s; i < use->n_map_members; i++)
2303     if (!use->cost_map[i].cand)
2304       goto found;
2305   for (i = 0; i < s; i++)
2306     if (!use->cost_map[i].cand)
2307       goto found;
2308
2309   gcc_unreachable ();
2310
2311 found:
2312   use->cost_map[i].cand = cand;
2313   use->cost_map[i].cost = cost;
2314   use->cost_map[i].depends_on = depends_on;
2315   use->cost_map[i].value = value;
2316 }
2317
2318 /* Gets cost of (USE, CANDIDATE) pair.  */
2319
2320 static struct cost_pair *
2321 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2322                  struct iv_cand *cand)
2323 {
2324   unsigned i, s;
2325   struct cost_pair *ret;
2326
2327   if (!cand)
2328     return NULL;
2329
2330   if (data->consider_all_candidates)
2331     {
2332       ret = use->cost_map + cand->id;
2333       if (!ret->cand)
2334         return NULL;
2335
2336       return ret;
2337     }
2338
2339   /* n_map_members is a power of two, so this computes modulo.  */
2340   s = cand->id & (use->n_map_members - 1);
2341   for (i = s; i < use->n_map_members; i++)
2342     if (use->cost_map[i].cand == cand)
2343       return use->cost_map + i;
2344
2345   for (i = 0; i < s; i++)
2346     if (use->cost_map[i].cand == cand)
2347       return use->cost_map + i;
2348
2349   return NULL;
2350 }
2351
2352 /* Returns estimate on cost of computing SEQ.  */
2353
2354 static unsigned
2355 seq_cost (rtx seq)
2356 {
2357   unsigned cost = 0;
2358   rtx set;
2359
2360   for (; seq; seq = NEXT_INSN (seq))
2361     {
2362       set = single_set (seq);
2363       if (set)
2364         cost += rtx_cost (set, SET);
2365       else
2366         cost++;
2367     }
2368
2369   return cost;
2370 }
2371
2372 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2373 static rtx
2374 produce_memory_decl_rtl (tree obj, int *regno)
2375 {
2376   rtx x;
2377
2378   gcc_assert (obj);
2379   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2380     {
2381       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2382       x = gen_rtx_SYMBOL_REF (Pmode, name);
2383     }
2384   else
2385     x = gen_raw_REG (Pmode, (*regno)++);
2386
2387   return gen_rtx_MEM (DECL_MODE (obj), x);
2388 }
2389
2390 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2391    walk_tree.  DATA contains the actual fake register number.  */
2392
2393 static tree
2394 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2395 {
2396   tree obj = NULL_TREE;
2397   rtx x = NULL_RTX;
2398   int *regno = data;
2399
2400   switch (TREE_CODE (*expr_p))
2401     {
2402     case ADDR_EXPR:
2403       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2404            handled_component_p (*expr_p);
2405            expr_p = &TREE_OPERAND (*expr_p, 0))
2406         continue;
2407       obj = *expr_p;
2408       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2409         x = produce_memory_decl_rtl (obj, regno);
2410       break;
2411
2412     case SSA_NAME:
2413       *ws = 0;
2414       obj = SSA_NAME_VAR (*expr_p);
2415       if (!DECL_RTL_SET_P (obj))
2416         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2417       break;
2418
2419     case VAR_DECL:
2420     case PARM_DECL:
2421     case RESULT_DECL:
2422       *ws = 0;
2423       obj = *expr_p;
2424
2425       if (DECL_RTL_SET_P (obj))
2426         break;
2427
2428       if (DECL_MODE (obj) == BLKmode)
2429         x = produce_memory_decl_rtl (obj, regno);
2430       else
2431         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2432
2433       break;
2434
2435     default:
2436       break;
2437     }
2438
2439   if (x)
2440     {
2441       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2442       SET_DECL_RTL (obj, x);
2443     }
2444
2445   return NULL_TREE;
2446 }
2447
2448 /* Determines cost of the computation of EXPR.  */
2449
2450 static unsigned
2451 computation_cost (tree expr)
2452 {
2453   rtx seq, rslt;
2454   tree type = TREE_TYPE (expr);
2455   unsigned cost;
2456   /* Avoid using hard regs in ways which may be unsupported.  */
2457   int regno = LAST_VIRTUAL_REGISTER + 1;
2458
2459   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2460   start_sequence ();
2461   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2462   seq = get_insns ();
2463   end_sequence ();
2464
2465   cost = seq_cost (seq);
2466   if (MEM_P (rslt))
2467     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2468
2469   return cost;
2470 }
2471
2472 /* Returns variable containing the value of candidate CAND at statement AT.  */
2473
2474 static tree
2475 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2476 {
2477   if (stmt_after_increment (loop, cand, stmt))
2478     return cand->var_after;
2479   else
2480     return cand->var_before;
2481 }
2482
2483 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2484    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2485
2486 int
2487 tree_int_cst_sign_bit (tree t)
2488 {
2489   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2490   unsigned HOST_WIDE_INT w;
2491
2492   if (bitno < HOST_BITS_PER_WIDE_INT)
2493     w = TREE_INT_CST_LOW (t);
2494   else
2495     {
2496       w = TREE_INT_CST_HIGH (t);
2497       bitno -= HOST_BITS_PER_WIDE_INT;
2498     }
2499
2500   return (w >> bitno) & 1;
2501 }
2502
2503 /* If we can prove that TOP = cst * BOT for some constant cst,
2504    store cst to MUL and return true.  Otherwise return false.
2505    The returned value is always sign-extended, regardless of the
2506    signedness of TOP and BOT.  */
2507
2508 static bool
2509 constant_multiple_of (tree top, tree bot, double_int *mul)
2510 {
2511   tree mby;
2512   enum tree_code code;
2513   double_int res, p0, p1;
2514   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2515
2516   STRIP_NOPS (top);
2517   STRIP_NOPS (bot);
2518
2519   if (operand_equal_p (top, bot, 0))
2520     {
2521       *mul = double_int_one;
2522       return true;
2523     }
2524
2525   code = TREE_CODE (top);
2526   switch (code)
2527     {
2528     case MULT_EXPR:
2529       mby = TREE_OPERAND (top, 1);
2530       if (TREE_CODE (mby) != INTEGER_CST)
2531         return false;
2532
2533       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2534         return false;
2535
2536       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2537                               precision);
2538       return true;
2539
2540     case PLUS_EXPR:
2541     case MINUS_EXPR:
2542       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2543           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2544         return false;
2545
2546       if (code == MINUS_EXPR)
2547         p1 = double_int_neg (p1);
2548       *mul = double_int_sext (double_int_add (p0, p1), precision);
2549       return true;
2550
2551     case INTEGER_CST:
2552       if (TREE_CODE (bot) != INTEGER_CST)
2553         return false;
2554
2555       p0 = double_int_sext (tree_to_double_int (top), precision);
2556       p1 = double_int_sext (tree_to_double_int (bot), precision);
2557       if (double_int_zero_p (p1))
2558         return false;
2559       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2560                               precision);
2561       return double_int_zero_p (res);
2562
2563     default:
2564       return false;
2565     }
2566 }
2567
2568 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2569    same precision that is at least as wide as the precision of TYPE, stores
2570    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2571    type of A and B.  */
2572
2573 static tree
2574 determine_common_wider_type (tree *a, tree *b)
2575 {
2576   tree wider_type = NULL;
2577   tree suba, subb;
2578   tree atype = TREE_TYPE (*a);
2579
2580   if ((TREE_CODE (*a) == NOP_EXPR
2581        || TREE_CODE (*a) == CONVERT_EXPR))
2582     {
2583       suba = TREE_OPERAND (*a, 0);
2584       wider_type = TREE_TYPE (suba);
2585       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2586         return atype;
2587     }
2588   else
2589     return atype;
2590
2591   if ((TREE_CODE (*b) == NOP_EXPR
2592        || TREE_CODE (*b) == CONVERT_EXPR))
2593     {
2594       subb = TREE_OPERAND (*b, 0);
2595       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2596         return atype;
2597     }
2598   else
2599     return atype;
2600
2601   *a = suba;
2602   *b = subb;
2603   return wider_type;
2604 }
2605
2606 /* Determines the expression by that USE is expressed from induction variable
2607    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2608    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2609
2610 static bool
2611 get_computation_aff (struct loop *loop,
2612                      struct iv_use *use, struct iv_cand *cand, tree at,
2613                      struct affine_tree_combination *aff)
2614 {
2615   tree ubase = use->iv->base;
2616   tree ustep = use->iv->step;
2617   tree cbase = cand->iv->base;
2618   tree cstep = cand->iv->step, cstep_common;
2619   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2620   tree common_type, var;
2621   tree uutype;
2622   aff_tree cbase_aff, var_aff;
2623   double_int rat;
2624
2625   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2626     {
2627       /* We do not have a precision to express the values of use.  */
2628       return false;
2629     }
2630
2631   var = var_at_stmt (loop, cand, at);
2632   uutype = unsigned_type_for (utype);
2633
2634   /* If the conversion is not noop, perform it.  */
2635   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2636     {
2637       cstep = fold_convert (uutype, cstep);
2638       cbase = fold_convert (uutype, cbase);
2639       var = fold_convert (uutype, var);
2640     }
2641
2642   if (!constant_multiple_of (ustep, cstep, &rat))
2643     return false;
2644
2645   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2646      type, we achieve better folding by computing their difference in this
2647      wider type, and cast the result to UUTYPE.  We do not need to worry about
2648      overflows, as all the arithmetics will in the end be performed in UUTYPE
2649      anyway.  */
2650   common_type = determine_common_wider_type (&ubase, &cbase);
2651
2652   /* use = ubase - ratio * cbase + ratio * var.  */
2653   tree_to_aff_combination (ubase, common_type, aff);
2654   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2655   tree_to_aff_combination (var, uutype, &var_aff);
2656
2657   /* We need to shift the value if we are after the increment.  */
2658   if (stmt_after_increment (loop, cand, at))
2659     {
2660       aff_tree cstep_aff;
2661
2662       if (common_type != uutype)
2663         cstep_common = fold_convert (common_type, cstep);
2664       else
2665         cstep_common = cstep;
2666
2667       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2668       aff_combination_add (&cbase_aff, &cstep_aff);
2669     }
2670
2671   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2672   aff_combination_add (aff, &cbase_aff);
2673   if (common_type != uutype)
2674     aff_combination_convert (aff, uutype);
2675
2676   aff_combination_scale (&var_aff, rat);
2677   aff_combination_add (aff, &var_aff);
2678
2679   return true;
2680 }
2681
2682 /* Determines the expression by that USE is expressed from induction variable
2683    CAND at statement AT in LOOP.  The computation is unshared.  */
2684
2685 static tree
2686 get_computation_at (struct loop *loop,
2687                     struct iv_use *use, struct iv_cand *cand, tree at)
2688 {
2689   aff_tree aff;
2690   tree type = TREE_TYPE (use->iv->base);
2691
2692   if (!get_computation_aff (loop, use, cand, at, &aff))
2693     return NULL_TREE;
2694   unshare_aff_combination (&aff);
2695   return fold_convert (type, aff_combination_to_tree (&aff));
2696 }
2697
2698 /* Determines the expression by that USE is expressed from induction variable
2699    CAND in LOOP.  The computation is unshared.  */
2700
2701 static tree
2702 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2703 {
2704   return get_computation_at (loop, use, cand, use->stmt);
2705 }
2706
2707 /* Returns cost of addition in MODE.  */
2708
2709 static unsigned
2710 add_cost (enum machine_mode mode)
2711 {
2712   static unsigned costs[NUM_MACHINE_MODES];
2713   rtx seq;
2714   unsigned cost;
2715
2716   if (costs[mode])
2717     return costs[mode];
2718
2719   start_sequence ();
2720   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2721                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2722                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2723                  NULL_RTX);
2724   seq = get_insns ();
2725   end_sequence ();
2726
2727   cost = seq_cost (seq);
2728   if (!cost)
2729     cost = 1;
2730
2731   costs[mode] = cost;
2732
2733   if (dump_file && (dump_flags & TDF_DETAILS))
2734     fprintf (dump_file, "Addition in %s costs %d\n",
2735              GET_MODE_NAME (mode), cost);
2736   return cost;
2737 }
2738
2739 /* Entry in a hashtable of already known costs for multiplication.  */
2740 struct mbc_entry
2741 {
2742   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2743   enum machine_mode mode;       /* In mode.  */
2744   unsigned cost;                /* The cost.  */
2745 };
2746
2747 /* Counts hash value for the ENTRY.  */
2748
2749 static hashval_t
2750 mbc_entry_hash (const void *entry)
2751 {
2752   const struct mbc_entry *e = entry;
2753
2754   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2755 }
2756
2757 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2758
2759 static int
2760 mbc_entry_eq (const void *entry1, const void *entry2)
2761 {
2762   const struct mbc_entry *e1 = entry1;
2763   const struct mbc_entry *e2 = entry2;
2764
2765   return (e1->mode == e2->mode
2766           && e1->cst == e2->cst);
2767 }
2768
2769 /* Returns cost of multiplication by constant CST in MODE.  */
2770
2771 unsigned
2772 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2773 {
2774   static htab_t costs;
2775   struct mbc_entry **cached, act;
2776   rtx seq;
2777   unsigned cost;
2778
2779   if (!costs)
2780     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2781
2782   act.mode = mode;
2783   act.cst = cst;
2784   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2785   if (*cached)
2786     return (*cached)->cost;
2787
2788   *cached = XNEW (struct mbc_entry);
2789   (*cached)->mode = mode;
2790   (*cached)->cst = cst;
2791
2792   start_sequence ();
2793   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2794                gen_int_mode (cst, mode), NULL_RTX, 0);
2795   seq = get_insns ();
2796   end_sequence ();
2797
2798   cost = seq_cost (seq);
2799
2800   if (dump_file && (dump_flags & TDF_DETAILS))
2801     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2802              (int) cst, GET_MODE_NAME (mode), cost);
2803
2804   (*cached)->cost = cost;
2805
2806   return cost;
2807 }
2808
2809 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2810    validity for a memory reference accessing memory of mode MODE.  */
2811
2812 bool
2813 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2814 {
2815 #define MAX_RATIO 128
2816   static sbitmap valid_mult[MAX_MACHINE_MODE];
2817
2818   if (!valid_mult[mode])
2819     {
2820       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2821       rtx addr;
2822       HOST_WIDE_INT i;
2823
2824       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2825       sbitmap_zero (valid_mult[mode]);
2826       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2827       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2828         {
2829           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2830           if (memory_address_p (mode, addr))
2831             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2832         }
2833
2834       if (dump_file && (dump_flags & TDF_DETAILS))
2835         {
2836           fprintf (dump_file, "  allowed multipliers:");
2837           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2838             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2839               fprintf (dump_file, " %d", (int) i);
2840           fprintf (dump_file, "\n");
2841           fprintf (dump_file, "\n");
2842         }
2843     }
2844
2845   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2846     return false;
2847
2848   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2849 }
2850
2851 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2852    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2853    variable is omitted.  Compute the cost for a memory reference that accesses
2854    a memory location of mode MEM_MODE.
2855
2856    TODO -- there must be some better way.  This all is quite crude.  */
2857
2858 static unsigned
2859 get_address_cost (bool symbol_present, bool var_present,
2860                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2861                   enum machine_mode mem_mode)
2862 {
2863   static bool initialized[MAX_MACHINE_MODE];
2864   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2865   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2866   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2867   unsigned cost, acost;
2868   bool offset_p, ratio_p;
2869   HOST_WIDE_INT s_offset;
2870   unsigned HOST_WIDE_INT mask;
2871   unsigned bits;
2872
2873   if (!initialized[mem_mode])
2874     {
2875       HOST_WIDE_INT i;
2876       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2877       int old_cse_not_expected;
2878       unsigned sym_p, var_p, off_p, rat_p, add_c;
2879       rtx seq, addr, base;
2880       rtx reg0, reg1;
2881
2882       initialized[mem_mode] = true;
2883
2884       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2885
2886       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2887       for (i = start; i <= 1 << 20; i <<= 1)
2888         {
2889           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2890           if (!memory_address_p (mem_mode, addr))
2891             break;
2892         }
2893       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2894       off[mem_mode] = max_offset[mem_mode];
2895
2896       for (i = start; i <= 1 << 20; i <<= 1)
2897         {
2898           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2899           if (!memory_address_p (mem_mode, addr))
2900             break;
2901         }
2902       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2903
2904       if (dump_file && (dump_flags & TDF_DETAILS))
2905         {
2906           fprintf (dump_file, "get_address_cost:\n");
2907           fprintf (dump_file, "  min offset %s %d\n",
2908                    GET_MODE_NAME (mem_mode),
2909                    (int) min_offset[mem_mode]);
2910           fprintf (dump_file, "  max offset %s %d\n",
2911                    GET_MODE_NAME (mem_mode),
2912                    (int) max_offset[mem_mode]);
2913         }
2914
2915       rat[mem_mode] = 1;
2916       for (i = 2; i <= MAX_RATIO; i++)
2917         if (multiplier_allowed_in_address_p (i, mem_mode))
2918           {
2919             rat[mem_mode] = i;
2920             break;
2921           }
2922
2923       /* Compute the cost of various addressing modes.  */
2924       acost = 0;
2925       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2926       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2927
2928       for (i = 0; i < 16; i++)
2929         {
2930           sym_p = i & 1;
2931           var_p = (i >> 1) & 1;
2932           off_p = (i >> 2) & 1;
2933           rat_p = (i >> 3) & 1;
2934
2935           addr = reg0;
2936           if (rat_p)
2937             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2938                                    gen_int_mode (rat[mem_mode], Pmode));
2939
2940           if (var_p)
2941             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2942
2943           if (sym_p)
2944             {
2945               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2946               if (off_p)
2947                 base = gen_rtx_fmt_e (CONST, Pmode,
2948                                       gen_rtx_fmt_ee (PLUS, Pmode,
2949                                                       base,
2950                                                       gen_int_mode (off[mem_mode],
2951                                                                     Pmode)));
2952             }
2953           else if (off_p)
2954             base = gen_int_mode (off[mem_mode], Pmode);
2955           else
2956             base = NULL_RTX;
2957
2958           if (base)
2959             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2960
2961           start_sequence ();
2962           /* To avoid splitting addressing modes, pretend that no cse will
2963              follow.  */
2964           old_cse_not_expected = cse_not_expected;
2965           cse_not_expected = true;
2966           addr = memory_address (mem_mode, addr);
2967           cse_not_expected = old_cse_not_expected;
2968           seq = get_insns ();
2969           end_sequence ();
2970
2971           acost = seq_cost (seq);
2972           acost += address_cost (addr, mem_mode);
2973
2974           if (!acost)
2975             acost = 1;
2976           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2977         }
2978
2979       /* On some targets, it is quite expensive to load symbol to a register,
2980          which makes addresses that contain symbols look much more expensive.
2981          However, the symbol will have to be loaded in any case before the
2982          loop (and quite likely we have it in register already), so it does not
2983          make much sense to penalize them too heavily.  So make some final
2984          tweaks for the SYMBOL_PRESENT modes:
2985
2986          If VAR_PRESENT is false, and the mode obtained by changing symbol to
2987          var is cheaper, use this mode with small penalty.
2988          If VAR_PRESENT is true, try whether the mode with
2989          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
2990          if this is the case, use it.  */
2991       add_c = add_cost (Pmode);
2992       for (i = 0; i < 8; i++)
2993         {
2994           var_p = i & 1;
2995           off_p = (i >> 1) & 1;
2996           rat_p = (i >> 2) & 1;
2997
2998           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
2999           if (var_p)
3000             acost += add_c;
3001
3002           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3003             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3004         }
3005
3006       if (dump_file && (dump_flags & TDF_DETAILS))
3007         {
3008           fprintf (dump_file, "Address costs:\n");
3009
3010           for (i = 0; i < 16; i++)
3011             {
3012               sym_p = i & 1;
3013               var_p = (i >> 1) & 1;
3014               off_p = (i >> 2) & 1;
3015               rat_p = (i >> 3) & 1;
3016
3017               fprintf (dump_file, "  ");
3018               if (sym_p)
3019                 fprintf (dump_file, "sym + ");
3020               if (var_p)
3021                 fprintf (dump_file, "var + ");
3022               if (off_p)
3023                 fprintf (dump_file, "cst + ");
3024               if (rat_p)
3025                 fprintf (dump_file, "rat * ");
3026
3027               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3028               fprintf (dump_file, "index costs %d\n", acost);
3029             }
3030           fprintf (dump_file, "\n");
3031         }
3032     }
3033
3034   bits = GET_MODE_BITSIZE (Pmode);
3035   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3036   offset &= mask;
3037   if ((offset >> (bits - 1) & 1))
3038     offset |= ~mask;
3039   s_offset = offset;
3040
3041   cost = 0;
3042   offset_p = (s_offset != 0
3043               && min_offset[mem_mode] <= s_offset
3044               && s_offset <= max_offset[mem_mode]);
3045   ratio_p = (ratio != 1
3046              && multiplier_allowed_in_address_p (ratio, mem_mode));
3047
3048   if (ratio != 1 && !ratio_p)
3049     cost += multiply_by_cost (ratio, Pmode);
3050
3051   if (s_offset && !offset_p && !symbol_present)
3052     cost += add_cost (Pmode);
3053
3054   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3055   return cost + acost;
3056 }
3057
3058 /* Estimates cost of forcing expression EXPR into a variable.  */
3059
3060 unsigned
3061 force_expr_to_var_cost (tree expr)
3062 {
3063   static bool costs_initialized = false;
3064   static unsigned integer_cost;
3065   static unsigned symbol_cost;
3066   static unsigned address_cost;
3067   tree op0, op1;
3068   unsigned cost0, cost1, cost;
3069   enum machine_mode mode;
3070
3071   if (!costs_initialized)
3072     {
3073       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3074       rtx x = gen_rtx_MEM (DECL_MODE (var),
3075                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3076       tree addr;
3077       tree type = build_pointer_type (integer_type_node);
3078
3079       integer_cost = computation_cost (build_int_cst (integer_type_node,
3080                                                       2000));
3081
3082       SET_DECL_RTL (var, x);
3083       TREE_STATIC (var) = 1;
3084       addr = build1 (ADDR_EXPR, type, var);
3085       symbol_cost = computation_cost (addr) + 1;
3086
3087       address_cost
3088         = computation_cost (build2 (PLUS_EXPR, type,
3089                                     addr,
3090                                     build_int_cst (type, 2000))) + 1;
3091       if (dump_file && (dump_flags & TDF_DETAILS))
3092         {
3093           fprintf (dump_file, "force_expr_to_var_cost:\n");
3094           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3095           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3096           fprintf (dump_file, "  address %d\n", (int) address_cost);
3097           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3098           fprintf (dump_file, "\n");
3099         }
3100
3101       costs_initialized = true;
3102     }
3103
3104   STRIP_NOPS (expr);
3105
3106   if (SSA_VAR_P (expr))
3107     return 0;
3108
3109   if (TREE_INVARIANT (expr))
3110     {
3111       if (TREE_CODE (expr) == INTEGER_CST)
3112         return integer_cost;
3113
3114       if (TREE_CODE (expr) == ADDR_EXPR)
3115         {
3116           tree obj = TREE_OPERAND (expr, 0);
3117
3118           if (TREE_CODE (obj) == VAR_DECL
3119               || TREE_CODE (obj) == PARM_DECL
3120               || TREE_CODE (obj) == RESULT_DECL)
3121             return symbol_cost;
3122         }
3123
3124       return address_cost;
3125     }
3126
3127   switch (TREE_CODE (expr))
3128     {
3129     case PLUS_EXPR:
3130     case MINUS_EXPR:
3131     case MULT_EXPR:
3132       op0 = TREE_OPERAND (expr, 0);
3133       op1 = TREE_OPERAND (expr, 1);
3134       STRIP_NOPS (op0);
3135       STRIP_NOPS (op1);
3136
3137       if (is_gimple_val (op0))
3138         cost0 = 0;
3139       else
3140         cost0 = force_expr_to_var_cost (op0);
3141
3142       if (is_gimple_val (op1))
3143         cost1 = 0;
3144       else
3145         cost1 = force_expr_to_var_cost (op1);
3146
3147       break;
3148
3149     default:
3150       /* Just an arbitrary value, FIXME.  */
3151       return target_spill_cost;
3152     }
3153
3154   mode = TYPE_MODE (TREE_TYPE (expr));
3155   switch (TREE_CODE (expr))
3156     {
3157     case PLUS_EXPR:
3158     case MINUS_EXPR:
3159       cost = add_cost (mode);
3160       break;
3161
3162     case MULT_EXPR:
3163       if (cst_and_fits_in_hwi (op0))
3164         cost = multiply_by_cost (int_cst_value (op0), mode);
3165       else if (cst_and_fits_in_hwi (op1))
3166         cost = multiply_by_cost (int_cst_value (op1), mode);
3167       else
3168         return target_spill_cost;
3169       break;
3170
3171     default:
3172       gcc_unreachable ();
3173     }
3174
3175   cost += cost0;
3176   cost += cost1;
3177
3178   /* Bound the cost by target_spill_cost.  The parts of complicated
3179      computations often are either loop invariant or at least can
3180      be shared between several iv uses, so letting this grow without
3181      limits would not give reasonable results.  */
3182   return cost < target_spill_cost ? cost : target_spill_cost;
3183 }
3184
3185 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3186    invariants the computation depends on.  */
3187
3188 static unsigned
3189 force_var_cost (struct ivopts_data *data,
3190                 tree expr, bitmap *depends_on)
3191 {
3192   if (depends_on)
3193     {
3194       fd_ivopts_data = data;
3195       walk_tree (&expr, find_depends, depends_on, NULL);
3196     }
3197
3198   return force_expr_to_var_cost (expr);
3199 }
3200
3201 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3202    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3203    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3204    invariants the computation depends on.  */
3205
3206 static unsigned
3207 split_address_cost (struct ivopts_data *data,
3208                     tree addr, bool *symbol_present, bool *var_present,
3209                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3210 {
3211   tree core;
3212   HOST_WIDE_INT bitsize;
3213   HOST_WIDE_INT bitpos;
3214   tree toffset;
3215   enum machine_mode mode;
3216   int unsignedp, volatilep;
3217
3218   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3219                               &unsignedp, &volatilep, false);
3220
3221   if (toffset != 0
3222       || bitpos % BITS_PER_UNIT != 0
3223       || TREE_CODE (core) != VAR_DECL)
3224     {
3225       *symbol_present = false;
3226       *var_present = true;
3227       fd_ivopts_data = data;
3228       walk_tree (&addr, find_depends, depends_on, NULL);
3229       return target_spill_cost;
3230     }
3231
3232   *offset += bitpos / BITS_PER_UNIT;
3233   if (TREE_STATIC (core)
3234       || DECL_EXTERNAL (core))
3235     {
3236       *symbol_present = true;
3237       *var_present = false;
3238       return 0;
3239     }
3240
3241   *symbol_present = false;
3242   *var_present = true;
3243   return 0;
3244 }
3245
3246 /* Estimates cost of expressing difference of addresses E1 - E2 as
3247    var + symbol + offset.  The value of offset is added to OFFSET,
3248    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3249    part is missing.  DEPENDS_ON is a set of the invariants the computation
3250    depends on.  */
3251
3252 static unsigned
3253 ptr_difference_cost (struct ivopts_data *data,
3254                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3255                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3256 {
3257   HOST_WIDE_INT diff = 0;
3258   unsigned cost;
3259
3260   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3261
3262   if (ptr_difference_const (e1, e2, &diff))
3263     {
3264       *offset += diff;
3265       *symbol_present = false;
3266       *var_present = false;
3267       return 0;
3268     }
3269
3270   if (e2 == integer_zero_node)
3271     return split_address_cost (data, TREE_OPERAND (e1, 0),
3272                                symbol_present, var_present, offset, depends_on);
3273
3274   *symbol_present = false;
3275   *var_present = true;
3276
3277   cost = force_var_cost (data, e1, depends_on);
3278   cost += force_var_cost (data, e2, depends_on);
3279   cost += add_cost (Pmode);
3280
3281   return cost;
3282 }
3283
3284 /* Estimates cost of expressing difference E1 - E2 as
3285    var + symbol + offset.  The value of offset is added to OFFSET,
3286    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3287    part is missing.  DEPENDS_ON is a set of the invariants the computation
3288    depends on.  */
3289
3290 static unsigned
3291 difference_cost (struct ivopts_data *data,
3292                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3293                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3294 {
3295   unsigned cost;
3296   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3297   unsigned HOST_WIDE_INT off1, off2;
3298
3299   e1 = strip_offset (e1, &off1);
3300   e2 = strip_offset (e2, &off2);
3301   *offset += off1 - off2;
3302
3303   STRIP_NOPS (e1);
3304   STRIP_NOPS (e2);
3305
3306   if (TREE_CODE (e1) == ADDR_EXPR)
3307     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3308                                 depends_on);
3309   *symbol_present = false;
3310
3311   if (operand_equal_p (e1, e2, 0))
3312     {
3313       *var_present = false;
3314       return 0;
3315     }
3316   *var_present = true;
3317   if (integer_zerop (e2))
3318     return force_var_cost (data, e1, depends_on);
3319
3320   if (integer_zerop (e1))
3321     {
3322       cost = force_var_cost (data, e2, depends_on);
3323       cost += multiply_by_cost (-1, mode);
3324
3325       return cost;
3326     }
3327
3328   cost = force_var_cost (data, e1, depends_on);
3329   cost += force_var_cost (data, e2, depends_on);
3330   cost += add_cost (mode);
3331
3332   return cost;
3333 }
3334
3335 /* Determines the cost of the computation by that USE is expressed
3336    from induction variable CAND.  If ADDRESS_P is true, we just need
3337    to create an address from it, otherwise we want to get it into
3338    register.  A set of invariants we depend on is stored in
3339    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3340
3341 static unsigned
3342 get_computation_cost_at (struct ivopts_data *data,
3343                          struct iv_use *use, struct iv_cand *cand,
3344                          bool address_p, bitmap *depends_on, tree at)
3345 {
3346   tree ubase = use->iv->base, ustep = use->iv->step;
3347   tree cbase, cstep;
3348   tree utype = TREE_TYPE (ubase), ctype;
3349   unsigned HOST_WIDE_INT cstepi, offset = 0;
3350   HOST_WIDE_INT ratio, aratio;
3351   bool var_present, symbol_present;
3352   unsigned cost = 0, n_sums;
3353   double_int rat;
3354
3355   *depends_on = NULL;
3356
3357   /* Only consider real candidates.  */
3358   if (!cand->iv)
3359     return INFTY;
3360
3361   cbase = cand->iv->base;
3362   cstep = cand->iv->step;
3363   ctype = TREE_TYPE (cbase);
3364
3365   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3366     {
3367       /* We do not have a precision to express the values of use.  */
3368       return INFTY;
3369     }
3370
3371   if (address_p)
3372     {
3373       /* Do not try to express address of an object with computation based
3374          on address of a different object.  This may cause problems in rtl
3375          level alias analysis (that does not expect this to be happening,
3376          as this is illegal in C), and would be unlikely to be useful
3377          anyway.  */
3378       if (use->iv->base_object
3379           && cand->iv->base_object
3380           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3381         return INFTY;
3382     }
3383
3384   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3385     {
3386       /* TODO -- add direct handling of this case.  */
3387       goto fallback;
3388     }
3389
3390   /* CSTEPI is removed from the offset in case statement is after the
3391      increment.  If the step is not constant, we use zero instead.
3392      This is a bit imprecise (there is the extra addition), but
3393      redundancy elimination is likely to transform the code so that
3394      it uses value of the variable before increment anyway,
3395      so it is not that much unrealistic.  */
3396   if (cst_and_fits_in_hwi (cstep))
3397     cstepi = int_cst_value (cstep);
3398   else
3399     cstepi = 0;
3400
3401   if (!constant_multiple_of (ustep, cstep, &rat))
3402     return INFTY;
3403
3404   if (double_int_fits_in_shwi_p (rat))
3405     ratio = double_int_to_shwi (rat);
3406   else
3407     return INFTY;
3408
3409   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3410      or ratio == 1, it is better to handle this like
3411
3412      ubase - ratio * cbase + ratio * var
3413
3414      (also holds in the case ratio == -1, TODO.  */
3415
3416   if (cst_and_fits_in_hwi (cbase))
3417     {
3418       offset = - ratio * int_cst_value (cbase);
3419       cost += difference_cost (data,
3420                                ubase, integer_zero_node,
3421                                &symbol_present, &var_present, &offset,
3422                                depends_on);
3423     }
3424   else if (ratio == 1)
3425     {
3426       cost += difference_cost (data,
3427                                ubase, cbase,
3428                                &symbol_present, &var_present, &offset,
3429                                depends_on);
3430     }
3431   else
3432     {
3433       cost += force_var_cost (data, cbase, depends_on);
3434       cost += add_cost (TYPE_MODE (ctype));
3435       cost += difference_cost (data,
3436                                ubase, integer_zero_node,
3437                                &symbol_present, &var_present, &offset,
3438                                depends_on);
3439     }
3440
3441   /* If we are after the increment, the value of the candidate is higher by
3442      one iteration.  */
3443   if (stmt_after_increment (data->current_loop, cand, at))
3444     offset -= ratio * cstepi;
3445
3446   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3447      (symbol/var/const parts may be omitted).  If we are looking for an address,
3448      find the cost of addressing this.  */
3449   if (address_p)
3450     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3451                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3452
3453   /* Otherwise estimate the costs for computing the expression.  */
3454   aratio = ratio > 0 ? ratio : -ratio;
3455   if (!symbol_present && !var_present && !offset)
3456     {
3457       if (ratio != 1)
3458         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3459
3460       return cost;
3461     }
3462
3463   if (aratio != 1)
3464     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3465
3466   n_sums = 1;
3467   if (var_present
3468       /* Symbol + offset should be compile-time computable.  */
3469       && (symbol_present || offset))
3470     n_sums++;
3471
3472   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3473
3474 fallback:
3475   {
3476     /* Just get the expression, expand it and measure the cost.  */
3477     tree comp = get_computation_at (data->current_loop, use, cand, at);
3478
3479     if (!comp)
3480       return INFTY;
3481
3482     if (address_p)
3483       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3484
3485     return computation_cost (comp);
3486   }
3487 }
3488
3489 /* Determines the cost of the computation by that USE is expressed
3490    from induction variable CAND.  If ADDRESS_P is true, we just need
3491    to create an address from it, otherwise we want to get it into
3492    register.  A set of invariants we depend on is stored in
3493    DEPENDS_ON.  */
3494
3495 static unsigned
3496 get_computation_cost (struct ivopts_data *data,
3497                       struct iv_use *use, struct iv_cand *cand,
3498                       bool address_p, bitmap *depends_on)
3499 {
3500   return get_computation_cost_at (data,
3501                                   use, cand, address_p, depends_on, use->stmt);
3502 }
3503
3504 /* Determines cost of basing replacement of USE on CAND in a generic
3505    expression.  */
3506
3507 static bool
3508 determine_use_iv_cost_generic (struct ivopts_data *data,
3509                                struct iv_use *use, struct iv_cand *cand)
3510 {
3511   bitmap depends_on;
3512   unsigned cost;
3513
3514   /* The simple case first -- if we need to express value of the preserved
3515      original biv, the cost is 0.  This also prevents us from counting the
3516      cost of increment twice -- once at this use and once in the cost of
3517      the candidate.  */
3518   if (cand->pos == IP_ORIGINAL
3519       && cand->incremented_at == use->stmt)
3520     {
3521       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3522       return true;
3523     }
3524
3525   cost = get_computation_cost (data, use, cand, false, &depends_on);
3526   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3527
3528   return cost != INFTY;
3529 }
3530
3531 /* Determines cost of basing replacement of USE on CAND in an address.  */
3532
3533 static bool
3534 determine_use_iv_cost_address (struct ivopts_data *data,
3535                                struct iv_use *use, struct iv_cand *cand)
3536 {
3537   bitmap depends_on;
3538   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3539
3540   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3541
3542   return cost != INFTY;
3543 }
3544
3545 /* Computes value of candidate CAND at position AT in iteration NITER, and
3546    stores it to VAL.  */
3547
3548 static void
3549 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3550                aff_tree *val)
3551 {
3552   aff_tree step, delta, nit;
3553   struct iv *iv = cand->iv;
3554   tree type = TREE_TYPE (iv->base);
3555
3556   tree_to_aff_combination (iv->step, type, &step);
3557   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3558   aff_combination_convert (&nit, type);
3559   aff_combination_mult (&nit, &step, &delta);
3560   if (stmt_after_increment (loop, cand, at))
3561     aff_combination_add (&delta, &step);
3562
3563   tree_to_aff_combination (iv->base, type, val);
3564   aff_combination_add (val, &delta);
3565 }
3566
3567 /* Returns period of induction variable iv.  */
3568
3569 static tree
3570 iv_period (struct iv *iv)
3571 {
3572   tree step = iv->step, period, type;
3573   tree pow2div;
3574
3575   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3576
3577   /* Period of the iv is gcd (step, type range).  Since type range is power
3578      of two, it suffices to determine the maximum power of two that divides
3579      step.  */
3580   pow2div = num_ending_zeros (step);
3581   type = unsigned_type_for (TREE_TYPE (step));
3582
3583   period = build_low_bits_mask (type,
3584                                 (TYPE_PRECISION (type)
3585                                  - tree_low_cst (pow2div, 1)));
3586
3587   return period;
3588 }
3589
3590 /* Returns the comparison operator used when eliminating the iv USE.  */
3591
3592 static enum tree_code
3593 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3594 {
3595   struct loop *loop = data->current_loop;
3596   basic_block ex_bb;
3597   edge exit;
3598
3599   ex_bb = bb_for_stmt (use->stmt);
3600   exit = EDGE_SUCC (ex_bb, 0);
3601   if (flow_bb_inside_loop_p (loop, exit->dest))
3602     exit = EDGE_SUCC (ex_bb, 1);
3603
3604   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3605 }
3606
3607 /* Check whether it is possible to express the condition in USE by comparison
3608    of candidate CAND.  If so, store the value compared with to BOUND.  */
3609
3610 static bool
3611 may_eliminate_iv (struct ivopts_data *data,
3612                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3613 {
3614   basic_block ex_bb;
3615   edge exit;
3616   tree nit, nit_type;
3617   tree wider_type, period, per_type;
3618   struct loop *loop = data->current_loop;
3619   aff_tree bnd;
3620
3621   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3622     return false;
3623
3624   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3625      for other conditions inside loop body.  */
3626   ex_bb = bb_for_stmt (use->stmt);
3627   if (use->stmt != last_stmt (ex_bb)
3628       || TREE_CODE (use->stmt) != COND_EXPR)
3629     return false;
3630   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3631     return false;
3632
3633   exit = EDGE_SUCC (ex_bb, 0);
3634   if (flow_bb_inside_loop_p (loop, exit->dest))
3635     exit = EDGE_SUCC (ex_bb, 1);
3636   if (flow_bb_inside_loop_p (loop, exit->dest))
3637     return false;
3638
3639   nit = niter_for_exit (data, exit);
3640   if (!nit)
3641     return false;
3642
3643   nit_type = TREE_TYPE (nit);
3644
3645   /* Determine whether we may use the variable to test whether niter iterations
3646      elapsed.  This is the case iff the period of the induction variable is
3647      greater than the number of iterations.  */
3648   period = iv_period (cand->iv);
3649   if (!period)
3650     return false;
3651   per_type = TREE_TYPE (period);
3652
3653   wider_type = TREE_TYPE (period);
3654   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3655     wider_type = per_type;
3656   else
3657     wider_type = nit_type;
3658
3659   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3660                                       fold_convert (wider_type, period),
3661                                       fold_convert (wider_type, nit))))
3662     return false;
3663
3664   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3665   *bound = aff_combination_to_tree (&bnd);
3666   return true;
3667 }
3668
3669 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3670
3671 static bool
3672 determine_use_iv_cost_condition (struct ivopts_data *data,
3673                                  struct iv_use *use, struct iv_cand *cand)
3674 {
3675   tree bound = NULL_TREE;
3676   struct iv *cmp_iv;
3677   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3678   unsigned elim_cost, express_cost, cost;
3679   bool ok;
3680
3681   /* Only consider real candidates.  */
3682   if (!cand->iv)
3683     {
3684       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3685       return false;
3686     }
3687
3688   /* Try iv elimination.  */
3689   if (may_eliminate_iv (data, use, cand, &bound))
3690     elim_cost = force_var_cost (data, bound, &depends_on_elim);
3691   else
3692     elim_cost = INFTY;
3693
3694   /* Try expressing the original giv.  If it is compared with an invariant,
3695      note that we cannot get rid of it.  */
3696   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3697   gcc_assert (ok);
3698
3699   express_cost = get_computation_cost (data, use, cand, false,
3700                                        &depends_on_express);
3701   fd_ivopts_data = data;
3702   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3703
3704   /* Choose the better approach.  */
3705   if (elim_cost < express_cost)
3706     {
3707       cost = elim_cost;
3708       depends_on = depends_on_elim;
3709       depends_on_elim = NULL;
3710     }
3711   else
3712     {
3713       cost = express_cost;
3714       depends_on = depends_on_express;
3715       depends_on_express = NULL;
3716       bound = NULL_TREE;
3717     }
3718
3719   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3720
3721   if (depends_on_elim)
3722     BITMAP_FREE (depends_on_elim);
3723   if (depends_on_express)
3724     BITMAP_FREE (depends_on_express);
3725
3726   return cost != INFTY;
3727 }
3728
3729 /* Determines cost of basing replacement of USE on CAND.  Returns false
3730    if USE cannot be based on CAND.  */
3731
3732 static bool
3733 determine_use_iv_cost (struct ivopts_data *data,
3734                        struct iv_use *use, struct iv_cand *cand)
3735 {
3736   switch (use->type)
3737     {
3738     case USE_NONLINEAR_EXPR:
3739       return determine_use_iv_cost_generic (data, use, cand);
3740
3741     case USE_ADDRESS:
3742       return determine_use_iv_cost_address (data, use, cand);
3743
3744     case USE_COMPARE:
3745       return determine_use_iv_cost_condition (data, use, cand);
3746
3747     default:
3748       gcc_unreachable ();
3749     }
3750 }
3751
3752 /* Determines costs of basing the use of the iv on an iv candidate.  */
3753
3754 static void
3755 determine_use_iv_costs (struct ivopts_data *data)
3756 {
3757   unsigned i, j;
3758   struct iv_use *use;
3759   struct iv_cand *cand;
3760   bitmap to_clear = BITMAP_ALLOC (NULL);
3761
3762   alloc_use_cost_map (data);
3763
3764   for (i = 0; i < n_iv_uses (data); i++)
3765     {
3766       use = iv_use (data, i);
3767
3768       if (data->consider_all_candidates)
3769         {
3770           for (j = 0; j < n_iv_cands (data); j++)
3771             {
3772               cand = iv_cand (data, j);
3773               determine_use_iv_cost (data, use, cand);
3774             }
3775         }
3776       else
3777         {
3778           bitmap_iterator bi;
3779
3780           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3781             {
3782               cand = iv_cand (data, j);
3783               if (!determine_use_iv_cost (data, use, cand))
3784                 bitmap_set_bit (to_clear, j);
3785             }
3786
3787           /* Remove the candidates for that the cost is infinite from
3788              the list of related candidates.  */
3789           bitmap_and_compl_into (use->related_cands, to_clear);
3790           bitmap_clear (to_clear);
3791         }
3792     }
3793
3794   BITMAP_FREE (to_clear);
3795
3796   if (dump_file && (dump_flags & TDF_DETAILS))
3797     {
3798       fprintf (dump_file, "Use-candidate costs:\n");
3799
3800       for (i = 0; i < n_iv_uses (data); i++)
3801         {
3802           use = iv_use (data, i);
3803
3804           fprintf (dump_file, "Use %d:\n", i);
3805           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3806           for (j = 0; j < use->n_map_members; j++)
3807             {
3808               if (!use->cost_map[j].cand
3809                   || use->cost_map[j].cost == INFTY)
3810                 continue;
3811
3812               fprintf (dump_file, "  %d\t%d\t",
3813                        use->cost_map[j].cand->id,
3814                        use->cost_map[j].cost);
3815               if (use->cost_map[j].depends_on)
3816                 bitmap_print (dump_file,
3817                               use->cost_map[j].depends_on, "","");
3818               fprintf (dump_file, "\n");
3819             }
3820
3821           fprintf (dump_file, "\n");
3822         }
3823       fprintf (dump_file, "\n");
3824     }
3825 }
3826
3827 /* Determines cost of the candidate CAND.  */
3828
3829 static void
3830 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3831 {
3832   unsigned cost_base, cost_step;
3833   tree base;
3834
3835   if (!cand->iv)
3836     {
3837       cand->cost = 0;
3838       return;
3839     }
3840
3841   /* There are two costs associated with the candidate -- its increment
3842      and its initialization.  The second is almost negligible for any loop
3843      that rolls enough, so we take it just very little into account.  */
3844
3845   base = cand->iv->base;
3846   cost_base = force_var_cost (data, base, NULL);
3847   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3848
3849   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3850
3851   /* Prefer the original iv unless we may gain something by replacing it;
3852      this is not really relevant for artificial ivs created by other
3853      passes.  */
3854   if (cand->pos == IP_ORIGINAL
3855       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3856     cand->cost--;
3857
3858   /* Prefer not to insert statements into latch unless there are some
3859      already (so that we do not create unnecessary jumps).  */
3860   if (cand->pos == IP_END
3861       && empty_block_p (ip_end_pos (data->current_loop)))
3862     cand->cost++;
3863 }
3864
3865 /* Determines costs of computation of the candidates.  */
3866
3867 static void
3868 determine_iv_costs (struct ivopts_data *data)
3869 {
3870   unsigned i;
3871
3872   if (dump_file && (dump_flags & TDF_DETAILS))
3873     {
3874       fprintf (dump_file, "Candidate costs:\n");
3875       fprintf (dump_file, "  cand\tcost\n");
3876     }
3877
3878   for (i = 0; i < n_iv_cands (data); i++)
3879     {
3880       struct iv_cand *cand = iv_cand (data, i);
3881
3882       determine_iv_cost (data, cand);
3883
3884       if (dump_file && (dump_flags & TDF_DETAILS))
3885         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3886     }
3887
3888   if (dump_file && (dump_flags & TDF_DETAILS))
3889     fprintf (dump_file, "\n");
3890 }
3891
3892 /* Calculates cost for having SIZE induction variables.  */
3893
3894 static unsigned
3895 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3896 {
3897   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3898 }
3899
3900 /* For each size of the induction variable set determine the penalty.  */
3901
3902 static void
3903 determine_set_costs (struct ivopts_data *data)
3904 {
3905   unsigned j, n;
3906   tree phi, op;
3907   struct loop *loop = data->current_loop;
3908   bitmap_iterator bi;
3909
3910   /* We use the following model (definitely improvable, especially the
3911      cost function -- TODO):
3912
3913      We estimate the number of registers available (using MD data), name it A.
3914
3915      We estimate the number of registers used by the loop, name it U.  This
3916      number is obtained as the number of loop phi nodes (not counting virtual
3917      registers and bivs) + the number of variables from outside of the loop.
3918
3919      We set a reserve R (free regs that are used for temporary computations,
3920      etc.).  For now the reserve is a constant 3.
3921
3922      Let I be the number of induction variables.
3923
3924      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3925         make a lot of ivs without a reason).
3926      -- if A - R < U + I <= A, the cost is I * PRES_COST
3927      -- if U + I > A, the cost is I * PRES_COST and
3928         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3929
3930   if (dump_file && (dump_flags & TDF_DETAILS))
3931     {
3932       fprintf (dump_file, "Global costs:\n");
3933       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3934       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3935       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3936       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3937     }
3938
3939   n = 0;
3940   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3941     {
3942       op = PHI_RESULT (phi);
3943
3944       if (!is_gimple_reg (op))
3945         continue;
3946
3947       if (get_iv (data, op))
3948         continue;
3949
3950       n++;
3951     }
3952
3953   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3954     {
3955       struct version_info *info = ver_info (data, j);
3956
3957       if (info->inv_id && info->has_nonlin_use)
3958         n++;
3959     }
3960
3961   data->regs_used = n;
3962   if (dump_file && (dump_flags & TDF_DETAILS))
3963     fprintf (dump_file, "  regs_used %d\n", n);
3964
3965   if (dump_file && (dump_flags & TDF_DETAILS))
3966     {
3967       fprintf (dump_file, "  cost for size:\n");
3968       fprintf (dump_file, "  ivs\tcost\n");
3969       for (j = 0; j <= 2 * target_avail_regs; j++)
3970         fprintf (dump_file, "  %d\t%d\n", j,
3971                  ivopts_global_cost_for_size (data, j));
3972       fprintf (dump_file, "\n");
3973     }
3974 }
3975
3976 /* Returns true if A is a cheaper cost pair than B.  */
3977
3978 static bool
3979 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3980 {
3981   if (!a)
3982     return false;
3983
3984   if (!b)
3985     return true;
3986
3987   if (a->cost < b->cost)
3988     return true;
3989
3990   if (a->cost > b->cost)
3991     return false;
3992
3993   /* In case the costs are the same, prefer the cheaper candidate.  */
3994   if (a->cand->cost < b->cand->cost)
3995     return true;
3996
3997   return false;
3998 }
3999
4000 /* Computes the cost field of IVS structure.  */
4001
4002 static void
4003 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4004 {
4005   unsigned cost = 0;
4006
4007   cost += ivs->cand_use_cost;
4008   cost += ivs->cand_cost;
4009   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4010
4011   ivs->cost = cost;
4012 }
4013
4014 /* Remove invariants in set INVS to set IVS.  */
4015
4016 static void
4017 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4018 {
4019   bitmap_iterator bi;
4020   unsigned iid;
4021
4022   if (!invs)
4023     return;
4024
4025   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4026     {
4027       ivs->n_invariant_uses[iid]--;
4028       if (ivs->n_invariant_uses[iid] == 0)
4029         ivs->n_regs--;
4030     }
4031 }
4032
4033 /* Set USE not to be expressed by any candidate in IVS.  */
4034
4035 static void
4036 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4037                  struct iv_use *use)
4038 {
4039   unsigned uid = use->id, cid;
4040   struct cost_pair *cp;
4041
4042   cp = ivs->cand_for_use[uid];
4043   if (!cp)
4044     return;
4045   cid = cp->cand->id;
4046
4047   ivs->bad_uses++;
4048   ivs->cand_for_use[uid] = NULL;
4049   ivs->n_cand_uses[cid]--;
4050
4051   if (ivs->n_cand_uses[cid] == 0)
4052     {
4053       bitmap_clear_bit (ivs->cands, cid);
4054       /* Do not count the pseudocandidates.  */
4055       if (cp->cand->iv)
4056         ivs->n_regs--;
4057       ivs->n_cands--;
4058       ivs->cand_cost -= cp->cand->cost;
4059
4060       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4061     }
4062
4063   ivs->cand_use_cost -= cp->cost;
4064
4065   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4066   iv_ca_recount_cost (data, ivs);
4067 }
4068
4069 /* Add invariants in set INVS to set IVS.  */
4070
4071 static void
4072 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4073 {
4074   bitmap_iterator bi;
4075   unsigned iid;
4076
4077   if (!invs)
4078     return;
4079
4080   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4081     {
4082       ivs->n_invariant_uses[iid]++;
4083       if (ivs->n_invariant_uses[iid] == 1)
4084         ivs->n_regs++;
4085     }
4086 }
4087
4088 /* Set cost pair for USE in set IVS to CP.  */
4089
4090 static void
4091 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4092               struct iv_use *use, struct cost_pair *cp)
4093 {
4094   unsigned uid = use->id, cid;
4095
4096   if (ivs->cand_for_use[uid] == cp)
4097     return;
4098
4099   if (ivs->cand_for_use[uid])
4100     iv_ca_set_no_cp (data, ivs, use);
4101
4102   if (cp)
4103     {
4104       cid = cp->cand->id;
4105
4106       ivs->bad_uses--;
4107       ivs->cand_for_use[uid] = cp;
4108       ivs->n_cand_uses[cid]++;
4109       if (ivs->n_cand_uses[cid] == 1)
4110         {
4111           bitmap_set_bit (ivs->cands, cid);
4112           /* Do not count the pseudocandidates.  */
4113           if (cp->cand->iv)
4114             ivs->n_regs++;
4115           ivs->n_cands++;
4116           ivs->cand_cost += cp->cand->cost;
4117
4118           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4119         }
4120
4121       ivs->cand_use_cost += cp->cost;
4122       iv_ca_set_add_invariants (ivs, cp->depends_on);
4123       iv_ca_recount_cost (data, ivs);
4124     }
4125 }
4126
4127 /* Extend set IVS by expressing USE by some of the candidates in it
4128    if possible.  */
4129
4130 static void
4131 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4132                struct iv_use *use)
4133 {
4134   struct cost_pair *best_cp = NULL, *cp;
4135   bitmap_iterator bi;
4136   unsigned i;
4137
4138   gcc_assert (ivs->upto >= use->id);
4139
4140   if (ivs->upto == use->id)
4141     {
4142       ivs->upto++;
4143       ivs->bad_uses++;
4144     }
4145
4146   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4147     {
4148       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4149
4150       if (cheaper_cost_pair (cp, best_cp))
4151         best_cp = cp;
4152     }
4153
4154   iv_ca_set_cp (data, ivs, use, best_cp);
4155 }
4156
4157 /* Get cost for assignment IVS.  */
4158
4159 static unsigned
4160 iv_ca_cost (struct iv_ca *ivs)
4161 {
4162   return (ivs->bad_uses ? INFTY : ivs->cost);
4163 }
4164
4165 /* Returns true if all dependences of CP are among invariants in IVS.  */
4166
4167 static bool
4168 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4169 {
4170   unsigned i;
4171   bitmap_iterator bi;
4172
4173   if (!cp->depends_on)
4174     return true;
4175
4176   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4177     {
4178       if (ivs->n_invariant_uses[i] == 0)
4179         return false;
4180     }
4181
4182   return true;
4183 }
4184
4185 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4186    it before NEXT_CHANGE.  */
4187
4188 static struct iv_ca_delta *
4189 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4190                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4191 {
4192   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4193
4194   change->use = use;
4195   change->old_cp = old_cp;
4196   change->new_cp = new_cp;
4197   change->next_change = next_change;
4198
4199   return change;
4200 }
4201
4202 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4203    are rewritten.  */
4204
4205 static struct iv_ca_delta *
4206 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4207 {
4208   struct iv_ca_delta *last;
4209
4210   if (!l2)
4211     return l1;
4212
4213   if (!l1)
4214     return l2;
4215
4216   for (last = l1; last->next_change; last = last->next_change)
4217     continue;
4218   last->next_change = l2;
4219
4220   return l1;
4221 }
4222
4223 /* Returns candidate by that USE is expressed in IVS.  */
4224
4225 static struct cost_pair *
4226 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4227 {
4228   return ivs->cand_for_use[use->id];
4229 }
4230
4231 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4232
4233 static struct iv_ca_delta *
4234 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4235 {
4236   struct iv_ca_delta *act, *next, *prev = NULL;
4237   struct cost_pair *tmp;
4238
4239   for (act = delta; act; act = next)
4240     {
4241       next = act->next_change;
4242       act->next_change = prev;
4243       prev = act;
4244
4245       tmp = act->old_cp;
4246       act->old_cp = act->new_cp;
4247       act->new_cp = tmp;
4248     }
4249
4250   return prev;
4251 }
4252
4253 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4254    reverted instead.  */
4255
4256 static void
4257 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4258                     struct iv_ca_delta *delta, bool forward)
4259 {
4260   struct cost_pair *from, *to;
4261   struct iv_ca_delta *act;
4262
4263   if (!forward)
4264     delta = iv_ca_delta_reverse (delta);
4265
4266   for (act = delta; act; act = act->next_change)
4267     {
4268       from = act->old_cp;
4269       to = act->new_cp;
4270       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4271       iv_ca_set_cp (data, ivs, act->use, to);
4272     }
4273
4274   if (!forward)
4275     iv_ca_delta_reverse (delta);
4276 }
4277
4278 /* Returns true if CAND is used in IVS.  */
4279
4280 static bool
4281 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4282 {
4283   return ivs->n_cand_uses[cand->id] > 0;
4284 }
4285
4286 /* Returns number of induction variable candidates in the set IVS.  */
4287
4288 static unsigned
4289 iv_ca_n_cands (struct iv_ca *ivs)
4290 {
4291   return ivs->n_cands;
4292 }
4293
4294 /* Free the list of changes DELTA.  */
4295
4296 static void
4297 iv_ca_delta_free (struct iv_ca_delta **delta)
4298 {
4299   struct iv_ca_delta *act, *next;
4300
4301   for (act = *delta; act; act = next)
4302     {
4303       next = act->next_change;
4304       free (act);
4305     }
4306
4307   *delta = NULL;
4308 }
4309
4310 /* Allocates new iv candidates assignment.  */
4311
4312 static struct iv_ca *
4313 iv_ca_new (struct ivopts_data *data)
4314 {
4315   struct iv_ca *nw = XNEW (struct iv_ca);
4316
4317   nw->upto = 0;
4318   nw->bad_uses = 0;
4319   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4320   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4321   nw->cands = BITMAP_ALLOC (NULL);
4322   nw->n_cands = 0;
4323   nw->n_regs = 0;
4324   nw->cand_use_cost = 0;
4325   nw->cand_cost = 0;
4326   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4327   nw->cost = 0;
4328
4329   return nw;
4330 }
4331
4332 /* Free memory occupied by the set IVS.  */
4333
4334 static void
4335 iv_ca_free (struct iv_ca **ivs)
4336 {
4337   free ((*ivs)->cand_for_use);
4338   free ((*ivs)->n_cand_uses);
4339   BITMAP_FREE ((*ivs)->cands);
4340   free ((*ivs)->n_invariant_uses);
4341   free (*ivs);
4342   *ivs = NULL;
4343 }
4344
4345 /* Dumps IVS to FILE.  */
4346
4347 static void
4348 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4349 {
4350   const char *pref = "  invariants ";
4351   unsigned i;
4352
4353   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4354   bitmap_print (file, ivs->cands, "  candidates ","\n");
4355
4356   for (i = 1; i <= data->max_inv_id; i++)
4357     if (ivs->n_invariant_uses[i])
4358       {
4359         fprintf (file, "%s%d", pref, i);
4360         pref = ", ";
4361       }
4362   fprintf (file, "\n");
4363 }
4364
4365 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4366    new set, and store differences in DELTA.  Number of induction variables
4367    in the new set is stored to N_IVS.  */
4368
4369 static unsigned
4370 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4371               struct iv_cand *cand, struct iv_ca_delta **delta,
4372               unsigned *n_ivs)
4373 {
4374   unsigned i, cost;
4375   struct iv_use *use;
4376   struct cost_pair *old_cp, *new_cp;
4377
4378   *delta = NULL;
4379   for (i = 0; i < ivs->upto; i++)
4380     {
4381       use = iv_use (data, i);
4382       old_cp = iv_ca_cand_for_use (ivs, use);
4383
4384       if (old_cp
4385           && old_cp->cand == cand)
4386         continue;
4387
4388       new_cp = get_use_iv_cost (data, use, cand);
4389       if (!new_cp)
4390         continue;
4391
4392       if (!iv_ca_has_deps (ivs, new_cp))
4393         continue;
4394
4395       if (!cheaper_cost_pair (new_cp, old_cp))
4396         continue;
4397
4398       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4399     }
4400
4401   iv_ca_delta_commit (data, ivs, *delta, true);
4402   cost = iv_ca_cost (ivs);
4403   if (n_ivs)
4404     *n_ivs = iv_ca_n_cands (ivs);
4405   iv_ca_delta_commit (data, ivs, *delta, false);
4406
4407   return cost;
4408 }
4409
4410 /* Try narrowing set IVS by removing CAND.  Return the cost of
4411    the new set and store the differences in DELTA.  */
4412
4413 static unsigned
4414 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4415               struct iv_cand *cand, struct iv_ca_delta **delta)
4416 {
4417   unsigned i, ci;
4418   struct iv_use *use;
4419   struct cost_pair *old_cp, *new_cp, *cp;
4420   bitmap_iterator bi;
4421   struct iv_cand *cnd;
4422   unsigned cost;
4423
4424   *delta = NULL;
4425   for (i = 0; i < n_iv_uses (data); i++)
4426     {
4427       use = iv_use (data, i);
4428
4429       old_cp = iv_ca_cand_for_use (ivs, use);
4430       if (old_cp->cand != cand)
4431         continue;
4432
4433       new_cp = NULL;
4434
4435       if (data->consider_all_candidates)
4436         {
4437           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4438             {
4439               if (ci == cand->id)
4440                 continue;
4441
4442               cnd = iv_cand (data, ci);
4443
4444               cp = get_use_iv_cost (data, use, cnd);
4445               if (!cp)
4446                 continue;
4447               if (!iv_ca_has_deps (ivs, cp))
4448                 continue;
4449
4450               if (!cheaper_cost_pair (cp, new_cp))
4451                 continue;
4452
4453               new_cp = cp;
4454             }
4455         }
4456       else
4457         {
4458           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4459             {
4460               if (ci == cand->id)
4461                 continue;
4462
4463               cnd = iv_cand (data, ci);
4464
4465               cp = get_use_iv_cost (data, use, cnd);
4466               if (!cp)
4467                 continue;
4468               if (!iv_ca_has_deps (ivs, cp))
4469                 continue;
4470
4471               if (!cheaper_cost_pair (cp, new_cp))
4472                 continue;
4473
4474               new_cp = cp;
4475             }
4476         }
4477
4478       if (!new_cp)
4479         {
4480           iv_ca_delta_free (delta);
4481           return INFTY;
4482         }
4483
4484       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4485     }
4486
4487   iv_ca_delta_commit (data, ivs, *delta, true);
4488   cost = iv_ca_cost (ivs);
4489   iv_ca_delta_commit (data, ivs, *delta, false);
4490
4491   return cost;
4492 }
4493
4494 /* Try optimizing the set of candidates IVS by removing candidates different
4495    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4496    differences in DELTA.  */
4497
4498 static unsigned
4499 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4500              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4501 {
4502   bitmap_iterator bi;
4503   struct iv_ca_delta *act_delta, *best_delta;
4504   unsigned i, best_cost, acost;
4505   struct iv_cand *cand;
4506
4507   best_delta = NULL;
4508   best_cost = iv_ca_cost (ivs);
4509
4510   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4511     {
4512       cand = iv_cand (data, i);
4513
4514       if (cand == except_cand)
4515         continue;
4516
4517       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4518
4519       if (acost < best_cost)
4520         {
4521           best_cost = acost;
4522           iv_ca_delta_free (&best_delta);
4523           best_delta = act_delta;
4524         }
4525       else
4526         iv_ca_delta_free (&act_delta);
4527     }
4528
4529   if (!best_delta)
4530     {
4531       *delta = NULL;
4532       return best_cost;
4533     }
4534
4535   /* Recurse to possibly remove other unnecessary ivs.  */
4536   iv_ca_delta_commit (data, ivs, best_delta, true);
4537   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4538   iv_ca_delta_commit (data, ivs, best_delta, false);
4539   *delta = iv_ca_delta_join (best_delta, *delta);
4540   return best_cost;
4541 }
4542
4543 /* Tries to extend the sets IVS in the best possible way in order
4544    to express the USE.  */
4545
4546 static bool
4547 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4548                   struct iv_use *use)
4549 {
4550   unsigned best_cost, act_cost;
4551   unsigned i;
4552   bitmap_iterator bi;
4553   struct iv_cand *cand;
4554   struct iv_ca_delta *best_delta = NULL, *act_delta;
4555   struct cost_pair *cp;
4556
4557   iv_ca_add_use (data, ivs, use);
4558   best_cost = iv_ca_cost (ivs);
4559
4560   cp = iv_ca_cand_for_use (ivs, use);
4561   if (cp)
4562     {
4563       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4564       iv_ca_set_no_cp (data, ivs, use);
4565     }
4566
4567   /* First try important candidates.  Only if it fails, try the specific ones.
4568      Rationale -- in loops with many variables the best choice often is to use
4569      just one generic biv.  If we added here many ivs specific to the uses,
4570      the optimization algorithm later would be likely to get stuck in a local
4571      minimum, thus causing us to create too many ivs.  The approach from
4572      few ivs to more seems more likely to be successful -- starting from few
4573      ivs, replacing an expensive use by a specific iv should always be a
4574      win.  */
4575   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4576     {
4577       cand = iv_cand (data, i);
4578
4579       if (iv_ca_cand_used_p (ivs, cand))
4580         continue;
4581
4582       cp = get_use_iv_cost (data, use, cand);
4583       if (!cp)
4584         continue;
4585
4586       iv_ca_set_cp (data, ivs, use, cp);
4587       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4588       iv_ca_set_no_cp (data, ivs, use);
4589       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4590
4591       if (act_cost < best_cost)
4592         {
4593           best_cost = act_cost;
4594
4595           iv_ca_delta_free (&best_delta);
4596           best_delta = act_delta;
4597         }
4598       else
4599         iv_ca_delta_free (&act_delta);
4600     }
4601
4602   if (best_cost == INFTY)
4603     {
4604       for (i = 0; i < use->n_map_members; i++)
4605         {
4606           cp = use->cost_map + i;
4607           cand = cp->cand;
4608           if (!cand)
4609             continue;
4610
4611           /* Already tried this.  */
4612           if (cand->important)
4613             continue;
4614
4615           if (iv_ca_cand_used_p (ivs, cand))
4616             continue;
4617
4618           act_delta = NULL;
4619           iv_ca_set_cp (data, ivs, use, cp);
4620           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4621           iv_ca_set_no_cp (data, ivs, use);
4622           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4623                                        cp, act_delta);
4624
4625           if (act_cost < best_cost)
4626             {
4627               best_cost = act_cost;
4628
4629               if (best_delta)
4630                 iv_ca_delta_free (&best_delta);
4631               best_delta = act_delta;
4632             }
4633           else
4634             iv_ca_delta_free (&act_delta);
4635         }
4636     }
4637
4638   iv_ca_delta_commit (data, ivs, best_delta, true);
4639   iv_ca_delta_free (&best_delta);
4640
4641   return (best_cost != INFTY);
4642 }
4643
4644 /* Finds an initial assignment of candidates to uses.  */
4645
4646 static struct iv_ca *
4647 get_initial_solution (struct ivopts_data *data)
4648 {
4649   struct iv_ca *ivs = iv_ca_new (data);
4650   unsigned i;
4651
4652   for (i = 0; i < n_iv_uses (data); i++)
4653     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4654       {
4655         iv_ca_free (&ivs);
4656         return NULL;
4657       }
4658
4659   return ivs;
4660 }
4661
4662 /* Tries to improve set of induction variables IVS.  */
4663
4664 static bool
4665 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4666 {
4667   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4668   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4669   struct iv_cand *cand;
4670
4671   /* Try extending the set of induction variables by one.  */
4672   for (i = 0; i < n_iv_cands (data); i++)
4673     {
4674       cand = iv_cand (data, i);
4675
4676       if (iv_ca_cand_used_p (ivs, cand))
4677         continue;
4678
4679       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4680       if (!act_delta)
4681         continue;
4682
4683       /* If we successfully added the candidate and the set is small enough,
4684          try optimizing it by removing other candidates.  */
4685       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4686         {
4687           iv_ca_delta_commit (data, ivs, act_delta, true);
4688           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4689           iv_ca_delta_commit (data, ivs, act_delta, false);
4690           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4691         }
4692
4693       if (acost < best_cost)
4694         {
4695           best_cost = acost;
4696           iv_ca_delta_free (&best_delta);
4697           best_delta = act_delta;
4698         }
4699       else
4700         iv_ca_delta_free (&act_delta);
4701     }
4702
4703   if (!best_delta)
4704     {
4705       /* Try removing the candidates from the set instead.  */
4706       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4707
4708       /* Nothing more we can do.  */
4709       if (!best_delta)
4710         return false;
4711     }
4712
4713   iv_ca_delta_commit (data, ivs, best_delta, true);
4714   gcc_assert (best_cost == iv_ca_cost (ivs));
4715   iv_ca_delta_free (&best_delta);
4716   return true;
4717 }
4718
4719 /* Attempts to find the optimal set of induction variables.  We do simple
4720    greedy heuristic -- we try to replace at most one candidate in the selected
4721    solution and remove the unused ivs while this improves the cost.  */
4722
4723 static struct iv_ca *
4724 find_optimal_iv_set (struct ivopts_data *data)
4725 {
4726   unsigned i;
4727   struct iv_ca *set;
4728   struct iv_use *use;
4729
4730   /* Get the initial solution.  */
4731   set = get_initial_solution (data);
4732   if (!set)
4733     {
4734       if (dump_file && (dump_flags & TDF_DETAILS))
4735         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4736       return NULL;
4737     }
4738
4739   if (dump_file && (dump_flags & TDF_DETAILS))
4740     {
4741       fprintf (dump_file, "Initial set of candidates:\n");
4742       iv_ca_dump (data, dump_file, set);
4743     }
4744
4745   while (try_improve_iv_set (data, set))
4746     {
4747       if (dump_file && (dump_flags & TDF_DETAILS))
4748         {
4749           fprintf (dump_file, "Improved to:\n");
4750           iv_ca_dump (data, dump_file, set);
4751         }
4752     }
4753
4754   if (dump_file && (dump_flags & TDF_DETAILS))
4755     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4756
4757   for (i = 0; i < n_iv_uses (data); i++)
4758     {
4759       use = iv_use (data, i);
4760       use->selected = iv_ca_cand_for_use (set, use)->cand;
4761     }
4762
4763   return set;
4764 }
4765
4766 /* Creates a new induction variable corresponding to CAND.  */
4767
4768 static void
4769 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4770 {
4771   block_stmt_iterator incr_pos;
4772   tree base;
4773   bool after = false;
4774
4775   if (!cand->iv)
4776     return;
4777
4778   switch (cand->pos)
4779     {
4780     case IP_NORMAL:
4781       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4782       break;
4783
4784     case IP_END:
4785       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4786       after = true;
4787       break;
4788
4789     case IP_ORIGINAL:
4790       /* Mark that the iv is preserved.  */
4791       name_info (data, cand->var_before)->preserve_biv = true;
4792       name_info (data, cand->var_after)->preserve_biv = true;
4793
4794       /* Rewrite the increment so that it uses var_before directly.  */
4795       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4796
4797       return;
4798     }
4799
4800   gimple_add_tmp_var (cand->var_before);
4801   add_referenced_var (cand->var_before);
4802
4803   base = unshare_expr (cand->iv->base);
4804
4805   create_iv (base, unshare_expr (cand->iv->step),
4806              cand->var_before, data->current_loop,
4807              &incr_pos, after, &cand->var_before, &cand->var_after);
4808 }
4809
4810 /* Creates new induction variables described in SET.  */
4811
4812 static void
4813 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4814 {
4815   unsigned i;
4816   struct iv_cand *cand;
4817   bitmap_iterator bi;
4818
4819   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4820     {
4821       cand = iv_cand (data, i);
4822       create_new_iv (data, cand);
4823     }
4824 }
4825
4826 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4827    is true, remove also the ssa name defined by the statement.  */
4828
4829 static void
4830 remove_statement (tree stmt, bool including_defined_name)
4831 {
4832   if (TREE_CODE (stmt) == PHI_NODE)
4833     {
4834       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4835     }
4836   else
4837     {
4838       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4839
4840       bsi_remove (&bsi, true);
4841       release_defs (stmt);
4842     }
4843 }
4844
4845 /* Rewrites USE (definition of iv used in a nonlinear expression)
4846    using candidate CAND.  */
4847
4848 static void
4849 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4850                             struct iv_use *use, struct iv_cand *cand)
4851 {
4852   tree comp;
4853   tree op, stmts, tgt, ass;
4854   block_stmt_iterator bsi, pbsi;
4855
4856   /* An important special case -- if we are asked to express value of
4857      the original iv by itself, just exit; there is no need to
4858      introduce a new computation (that might also need casting the
4859      variable to unsigned and back).  */
4860   if (cand->pos == IP_ORIGINAL
4861       && cand->incremented_at == use->stmt)
4862     {
4863       tree step, ctype, utype;
4864       enum tree_code incr_code = PLUS_EXPR;
4865
4866       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4867       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4868
4869       step = cand->iv->step;
4870       ctype = TREE_TYPE (step);
4871       utype = TREE_TYPE (cand->var_after);
4872       if (TREE_CODE (step) == NEGATE_EXPR)
4873         {
4874           incr_code = MINUS_EXPR;
4875           step = TREE_OPERAND (step, 0);
4876         }
4877
4878       /* Check whether we may leave the computation unchanged.
4879          This is the case only if it does not rely on other
4880          computations in the loop -- otherwise, the computation
4881          we rely upon may be removed in remove_unused_ivs,
4882          thus leading to ICE.  */
4883       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4884       if (TREE_CODE (op) == PLUS_EXPR
4885           || TREE_CODE (op) == MINUS_EXPR)
4886         {
4887           if (TREE_OPERAND (op, 0) == cand->var_before)
4888             op = TREE_OPERAND (op, 1);
4889           else if (TREE_CODE (op) == PLUS_EXPR
4890                    && TREE_OPERAND (op, 1) == cand->var_before)
4891             op = TREE_OPERAND (op, 0);
4892           else
4893             op = NULL_TREE;
4894         }
4895       else
4896         op = NULL_TREE;
4897
4898       if (op
4899           && (TREE_CODE (op) == INTEGER_CST
4900               || operand_equal_p (op, step, 0)))
4901         return;
4902
4903       /* Otherwise, add the necessary computations to express
4904          the iv.  */
4905       op = fold_convert (ctype, cand->var_before);
4906       comp = fold_convert (utype,
4907                            build2 (incr_code, ctype, op,
4908                                    unshare_expr (step)));
4909     }
4910   else
4911     {
4912       comp = get_computation (data->current_loop, use, cand);
4913       gcc_assert (comp != NULL_TREE);
4914     }
4915
4916   switch (TREE_CODE (use->stmt))
4917     {
4918     case PHI_NODE:
4919       tgt = PHI_RESULT (use->stmt);
4920
4921       /* If we should keep the biv, do not replace it.  */
4922       if (name_info (data, tgt)->preserve_biv)
4923         return;
4924
4925       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4926       while (!bsi_end_p (pbsi)
4927              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4928         {
4929           bsi = pbsi;
4930           bsi_next (&pbsi);
4931         }
4932       break;
4933
4934     case GIMPLE_MODIFY_STMT:
4935       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4936       bsi = bsi_for_stmt (use->stmt);
4937       break;
4938
4939     default:
4940       gcc_unreachable ();
4941     }
4942
4943   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4944
4945   if (TREE_CODE (use->stmt) == PHI_NODE)
4946     {
4947       if (stmts)
4948         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4949       ass = build_gimple_modify_stmt (tgt, op);
4950       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4951       remove_statement (use->stmt, false);
4952       SSA_NAME_DEF_STMT (tgt) = ass;
4953     }
4954   else
4955     {
4956       if (stmts)
4957         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4958       GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4959     }
4960 }
4961
4962 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4963    for_each_index.  */
4964
4965 static bool
4966 idx_remove_ssa_names (tree base, tree *idx,
4967                       void *data ATTRIBUTE_UNUSED)
4968 {
4969   tree *op;
4970
4971   if (TREE_CODE (*idx) == SSA_NAME)
4972     *idx = SSA_NAME_VAR (*idx);
4973
4974   if (TREE_CODE (base) == ARRAY_REF)
4975     {
4976       op = &TREE_OPERAND (base, 2);
4977       if (*op
4978           && TREE_CODE (*op) == SSA_NAME)
4979         *op = SSA_NAME_VAR (*op);
4980       op = &TREE_OPERAND (base, 3);
4981       if (*op
4982           && TREE_CODE (*op) == SSA_NAME)
4983         *op = SSA_NAME_VAR (*op);
4984     }
4985
4986   return true;
4987 }
4988
4989 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4990
4991 static tree
4992 unshare_and_remove_ssa_names (tree ref)
4993 {
4994   ref = unshare_expr (ref);
4995   for_each_index (&ref, idx_remove_ssa_names, NULL);
4996
4997   return ref;
4998 }
4999
5000 /* Extract the alias analysis info for the memory reference REF.  There are
5001    several ways how this information may be stored and what precisely is
5002    its semantics depending on the type of the reference, but there always is
5003    somewhere hidden one _DECL node that is used to determine the set of
5004    virtual operands for the reference.  The code below deciphers this jungle
5005    and extracts this single useful piece of information.  */
5006
5007 static tree
5008 get_ref_tag (tree ref, tree orig)
5009 {
5010   tree var = get_base_address (ref);
5011   tree aref = NULL_TREE, tag, sv;
5012   HOST_WIDE_INT offset, size, maxsize;
5013
5014   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5015     {
5016       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5017       if (ref)
5018         break;
5019     }
5020
5021   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5022     return unshare_expr (sv);
5023
5024   if (!var)
5025     return NULL_TREE;
5026
5027   if (TREE_CODE (var) == INDIRECT_REF)
5028     {
5029       /* If the base is a dereference of a pointer, first check its name memory
5030          tag.  If it does not have one, use its symbol memory tag.  */
5031       var = TREE_OPERAND (var, 0);
5032       if (TREE_CODE (var) != SSA_NAME)
5033         return NULL_TREE;
5034
5035       if (SSA_NAME_PTR_INFO (var))
5036         {
5037           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5038           if (tag)
5039             return tag;
5040         }
5041
5042       var = SSA_NAME_VAR (var);
5043       tag = symbol_mem_tag (var);
5044       gcc_assert (tag != NULL_TREE);
5045       return tag;
5046     }
5047   else
5048     {
5049       if (!DECL_P (var))
5050         return NULL_TREE;
5051
5052       tag = symbol_mem_tag (var);
5053       if (tag)
5054         return tag;
5055
5056       return var;
5057     }
5058 }
5059
5060 /* Copies the reference information from OLD_REF to NEW_REF.  */
5061
5062 static void
5063 copy_ref_info (tree new_ref, tree old_ref)
5064 {
5065   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5066     copy_mem_ref_info (new_ref, old_ref);
5067   else
5068     {
5069       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5070       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5071     }
5072 }
5073
5074 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5075
5076 static void
5077 rewrite_use_address (struct ivopts_data *data,
5078                      struct iv_use *use, struct iv_cand *cand)
5079 {
5080   aff_tree aff;
5081   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5082   tree ref;
5083   bool ok;
5084
5085   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5086   gcc_assert (ok);
5087   unshare_aff_combination (&aff);
5088
5089   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5090   copy_ref_info (ref, *use->op_p);
5091   *use->op_p = ref;
5092 }
5093
5094 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5095    candidate CAND.  */
5096
5097 static void
5098 rewrite_use_compare (struct ivopts_data *data,
5099                      struct iv_use *use, struct iv_cand *cand)
5100 {
5101   tree comp, *var_p, op, bound;
5102   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5103   enum tree_code compare;
5104   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5105   bool ok;
5106
5107   bound = cp->value;
5108   if (bound)
5109     {
5110       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5111       tree var_type = TREE_TYPE (var);
5112
5113       compare = iv_elimination_compare (data, use);
5114       bound = unshare_expr (fold_convert (var_type, bound));
5115       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5116
5117       *use->op_p = build2 (compare, boolean_type_node, var, op);
5118       return;
5119     }
5120
5121   /* The induction variable elimination failed; just express the original
5122      giv.  */
5123   comp = get_computation (data->current_loop, use, cand);
5124   gcc_assert (comp != NULL_TREE);
5125
5126   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5127   gcc_assert (ok);
5128
5129   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5130 }
5131
5132 /* Rewrites USE using candidate CAND.  */
5133
5134 static void
5135 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5136 {
5137   push_stmt_changes (&use->stmt);
5138
5139   switch (use->type)
5140     {
5141       case USE_NONLINEAR_EXPR:
5142         rewrite_use_nonlinear_expr (data, use, cand);
5143         break;
5144
5145       case USE_ADDRESS:
5146         rewrite_use_address (data, use, cand);
5147         break;
5148
5149       case USE_COMPARE:
5150         rewrite_use_compare (data, use, cand);
5151         break;
5152
5153       default:
5154         gcc_unreachable ();
5155     }
5156
5157   pop_stmt_changes (&use->stmt);
5158 }
5159
5160 /* Rewrite the uses using the selected induction variables.  */
5161
5162 static void
5163 rewrite_uses (struct ivopts_data *data)
5164 {
5165   unsigned i;
5166   struct iv_cand *cand;
5167   struct iv_use *use;
5168
5169   for (i = 0; i < n_iv_uses (data); i++)
5170     {
5171       use = iv_use (data, i);
5172       cand = use->selected;
5173       gcc_assert (cand);
5174
5175       rewrite_use (data, use, cand);
5176     }
5177 }
5178
5179 /* Removes the ivs that are not used after rewriting.  */
5180
5181 static void
5182 remove_unused_ivs (struct ivopts_data *data)
5183 {
5184   unsigned j;
5185   bitmap_iterator bi;
5186
5187   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5188     {
5189       struct version_info *info;
5190
5191       info = ver_info (data, j);
5192       if (info->iv
5193           && !integer_zerop (info->iv->step)
5194           && !info->inv_id
5195           && !info->iv->have_use_for
5196           && !info->preserve_biv)
5197         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5198     }
5199 }
5200
5201 /* Frees data allocated by the optimization of a single loop.  */
5202
5203 static void
5204 free_loop_data (struct ivopts_data *data)
5205 {
5206   unsigned i, j;
5207   bitmap_iterator bi;
5208   tree obj;
5209
5210   if (data->niters)
5211     {
5212       pointer_map_destroy (data->niters);
5213       data->niters = NULL;
5214     }
5215
5216   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5217     {
5218       struct version_info *info;
5219
5220       info = ver_info (data, i);
5221       if (info->iv)
5222         free (info->iv);
5223       info->iv = NULL;
5224       info->has_nonlin_use = false;
5225       info->preserve_biv = false;
5226       info->inv_id = 0;
5227     }
5228   bitmap_clear (data->relevant);
5229   bitmap_clear (data->important_candidates);
5230
5231   for (i = 0; i < n_iv_uses (data); i++)
5232     {
5233       struct iv_use *use = iv_use (data, i);
5234
5235       free (use->iv);
5236       BITMAP_FREE (use->related_cands);
5237       for (j = 0; j < use->n_map_members; j++)
5238         if (use->cost_map[j].depends_on)
5239           BITMAP_FREE (use->cost_map[j].depends_on);
5240       free (use->cost_map);
5241       free (use);
5242     }
5243   VEC_truncate (iv_use_p, data->iv_uses, 0);
5244
5245   for (i = 0; i < n_iv_cands (data); i++)
5246     {
5247       struct iv_cand *cand = iv_cand (data, i);
5248
5249       if (cand->iv)
5250         free (cand->iv);
5251       if (cand->depends_on)
5252         BITMAP_FREE (cand->depends_on);
5253       free (cand);
5254     }
5255   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5256
5257   if (data->version_info_size < num_ssa_names)
5258     {
5259       data->version_info_size = 2 * num_ssa_names;
5260       free (data->version_info);
5261       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5262     }
5263
5264   data->max_inv_id = 0;
5265
5266   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5267     SET_DECL_RTL (obj, NULL_RTX);
5268
5269   VEC_truncate (tree, decl_rtl_to_reset, 0);
5270 }
5271
5272 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5273    loop tree.  */
5274
5275 static void
5276 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5277 {
5278   free_loop_data (data);
5279   free (data->version_info);
5280   BITMAP_FREE (data->relevant);
5281   BITMAP_FREE (data->important_candidates);
5282
5283   VEC_free (tree, heap, decl_rtl_to_reset);
5284   VEC_free (iv_use_p, heap, data->iv_uses);
5285   VEC_free (iv_cand_p, heap, data->iv_candidates);
5286 }
5287
5288 /* Optimizes the LOOP.  Returns true if anything changed.  */
5289
5290 static bool
5291 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5292 {
5293   bool changed = false;
5294   struct iv_ca *iv_ca;
5295   edge exit;
5296
5297   gcc_assert (!data->niters);
5298   data->current_loop = loop;
5299
5300   if (dump_file && (dump_flags & TDF_DETAILS))
5301     {
5302       fprintf (dump_file, "Processing loop %d\n", loop->num);
5303
5304       exit = single_dom_exit (loop);
5305       if (exit)
5306         {
5307           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5308                    exit->src->index, exit->dest->index);
5309           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5310           fprintf (dump_file, "\n");
5311         }
5312
5313       fprintf (dump_file, "\n");
5314     }
5315
5316   /* For each ssa name determines whether it behaves as an induction variable
5317      in some loop.  */
5318   if (!find_induction_variables (data))
5319     goto finish;
5320
5321   /* Finds interesting uses (item 1).  */
5322   find_interesting_uses (data);
5323   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5324     goto finish;
5325
5326   /* Finds candidates for the induction variables (item 2).  */
5327   find_iv_candidates (data);
5328
5329   /* Calculates the costs (item 3, part 1).  */
5330   determine_use_iv_costs (data);
5331   determine_iv_costs (data);
5332   determine_set_costs (data);
5333
5334   /* Find the optimal set of induction variables (item 3, part 2).  */
5335   iv_ca = find_optimal_iv_set (data);
5336   if (!iv_ca)
5337     goto finish;
5338   changed = true;
5339
5340   /* Create the new induction variables (item 4, part 1).  */
5341   create_new_ivs (data, iv_ca);
5342   iv_ca_free (&iv_ca);
5343
5344   /* Rewrite the uses (item 4, part 2).  */
5345   rewrite_uses (data);
5346
5347   /* Remove the ivs that are unused after rewriting.  */
5348   remove_unused_ivs (data);
5349
5350   /* We have changed the structure of induction variables; it might happen
5351      that definitions in the scev database refer to some of them that were
5352      eliminated.  */
5353   scev_reset ();
5354
5355 finish:
5356   free_loop_data (data);
5357
5358   return changed;
5359 }
5360
5361 /* Main entry point.  Optimizes induction variables in loops.  */
5362
5363 void
5364 tree_ssa_iv_optimize (void)
5365 {
5366   struct loop *loop;
5367   struct ivopts_data data;
5368   loop_iterator li;
5369
5370   tree_ssa_iv_optimize_init (&data);
5371
5372   /* Optimize the loops starting with the innermost ones.  */
5373   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5374     {
5375       if (dump_file && (dump_flags & TDF_DETAILS))
5376         flow_loop_dump (loop, dump_file, NULL, 1);
5377
5378       tree_ssa_iv_optimize_loop (&data, loop);
5379     }
5380
5381   tree_ssa_iv_optimize_finalize (&data);
5382 }