gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91
  92 /* The infinite cost.  */
  93 #define INFTY 10000000
  94
  95 /* The expected number of loop iterations.  TODO -- use profiling instead of
  96    this.  */
  97 #define AVG_LOOP_NITER(LOOP) 5
  98
  99
 100 /* Representation of the induction variable.  */
 101 struct iv
 102 {
 103   tree base;            /* Initial value of the iv.  */
 104   tree base_object;     /* A memory object to that the induction variable points.  */
 105   tree step;            /* Step of the iv (constant only).  */
 106   tree ssa_name;        /* The ssa name with the value.  */
 107   bool biv_p;           /* Is it a biv?  */
 108   bool have_use_for;    /* Do we already have a use for it?  */
 109   unsigned use_id;      /* The identifier in the use if it is the case.  */
 110 };
 111
 112 /* Per-ssa version information (induction variable descriptions, etc.).  */
 113 struct version_info
 114 {
 115   tree name;            /* The ssa name.  */
 116   struct iv *iv;        /* Induction variable description.  */
 117   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 118                            an expression that is not an induction variable.  */
 119   unsigned inv_id;      /* Id of an invariant.  */
 120   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 121 };
 122
 123 /* Information attached to loop.  */
 124 struct loop_data
 125 {
 126   unsigned regs_used;   /* Number of registers used.  */
 127 };
 128
 129 /* Types of uses.  */
 130 enum use_type
 131 {
 132   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 133   USE_OUTER,            /* The induction variable is used outside the loop.  */
 134   USE_ADDRESS,          /* Use in an address.  */
 135   USE_COMPARE           /* Use is a compare.  */
 136 };
 137
 138 /* The candidate - cost pair.  */
 139 struct cost_pair
 140 {
 141   struct iv_cand *cand; /* The candidate.  */
 142   unsigned cost;        /* The cost.  */
 143   bitmap depends_on;    /* The list of invariants that have to be
 144                            preserved.  */
 145 };
 146
 147 /* Use.  */
 148 struct iv_use
 149 {
 150   unsigned id;          /* The id of the use.  */
 151   enum use_type type;   /* Type of the use.  */
 152   struct iv *iv;        /* The induction variable it is based on.  */
 153   tree stmt;            /* Statement in that it occurs.  */
 154   tree *op_p;           /* The place where it occurs.  */
 155   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 156                            important ones.  */
 157
 158   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 159   struct cost_pair *cost_map;
 160                         /* The costs wrto the iv candidates.  */
 161
 162   struct iv_cand *selected;
 163                         /* The selected candidate.  */
 164 };
 165
 166 /* The position where the iv is computed.  */
 167 enum iv_position
 168 {
 169   IP_NORMAL,            /* At the end, just before the exit condition.  */
 170   IP_END,               /* At the end of the latch block.  */
 171   IP_ORIGINAL           /* The original biv.  */
 172 };
 173
 174 /* The induction variable candidate.  */
 175 struct iv_cand
 176 {
 177   unsigned id;          /* The number of the candidate.  */
 178   bool important;       /* Whether this is an "important" candidate, i.e. such
 179                            that it should be considered by all uses.  */
 180   enum iv_position pos; /* Where it is computed.  */
 181   tree incremented_at;  /* For original biv, the statement where it is
 182                            incremented.  */
 183   tree var_before;      /* The variable used for it before increment.  */
 184   tree var_after;       /* The variable used for it after increment.  */
 185   struct iv *iv;        /* The value of the candidate.  NULL for
 186                            "pseudocandidate" used to indicate the possibility
 187                            to replace the final value of an iv by direct
 188                            computation of the value.  */
 189   unsigned cost;        /* Cost of the candidate.  */
 190 };
 191
 192 /* The data used by the induction variable optimizations.  */
 193
 194 struct ivopts_data
 195 {
 196   /* The currently optimized loop.  */
 197   struct loop *current_loop;
 198
 199   /* Numbers of iterations for all exits of the current loop.  */
 200   htab_t niters;
 201
 202   /* The size of version_info array allocated.  */
 203   unsigned version_info_size;
 204
 205   /* The array of information for the ssa names.  */
 206   struct version_info *version_info;
 207
 208   /* The bitmap of indices in version_info whose value was changed.  */
 209   bitmap relevant;
 210
 211   /* The maximum invariant id.  */
 212   unsigned max_inv_id;
 213
 214   /* The uses of induction variables.  */
 215   varray_type iv_uses;
 216
 217   /* The candidates.  */
 218   varray_type iv_candidates;
 219
 220   /* A bitmap of important candidates.  */
 221   bitmap important_candidates;
 222
 223   /* Whether to consider just related and important candidates when replacing a
 224      use.  */
 225   bool consider_all_candidates;
 226 };
 227
 228 /* An assignment of iv candidates to uses.  */
 229
 230 struct iv_ca
 231 {
 232   /* The number of uses covered by the assignment.  */
 233   unsigned upto;
 234
 235   /* Number of uses that cannot be expressed by the candidates in the set.  */
 236   unsigned bad_uses;
 237
 238   /* Candidate assigned to a use, together with the related costs.  */
 239   struct cost_pair **cand_for_use;
 240
 241   /* Number of times each candidate is used.  */
 242   unsigned *n_cand_uses;
 243
 244   /* The candidates used.  */
 245   bitmap cands;
 246
 247   /* The number of candidates in the set.  */
 248   unsigned n_cands;
 249
 250   /* Total number of registers needed.  */
 251   unsigned n_regs;
 252
 253   /* Total cost of expressing uses.  */
 254   unsigned cand_use_cost;
 255
 256   /* Total cost of candidates.  */
 257   unsigned cand_cost;
 258
 259   /* Number of times each invariant is used.  */
 260   unsigned *n_invariant_uses;
 261
 262   /* Total cost of the assignment.  */
 263   unsigned cost;
 264 };
 265
 266 /* Difference of two iv candidate assignments.  */
 267
 268 struct iv_ca_delta
 269 {
 270   /* Changed use.  */
 271   struct iv_use *use;
 272
 273   /* An old assignment (for rollback purposes).  */
 274   struct cost_pair *old_cp;
 275
 276   /* A new assignment.  */
 277   struct cost_pair *new_cp;
 278
 279   /* Next change in the list.  */
 280   struct iv_ca_delta *next_change;
 281 };
 282
 283 /* Bound on number of candidates below that all candidates are considered.  */
 284
 285 #define CONSIDER_ALL_CANDIDATES_BOUND \
 286   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 287
 288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 289    optimizing such a loop would help, and it would take ages).  */
 290
 291 #define MAX_CONSIDERED_USES \
 292   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 293
 294 /* If there are at most this number of ivs in the set, try removing unnecessary
 295    ivs from the set always.  */
 296
 297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 298   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 299
 300 /* The list of trees for that the decl_rtl field must be reset is stored
 301    here.  */
 302
 303 static varray_type decl_rtl_to_reset;
 304
 305 /* Number of uses recorded in DATA.  */
 306
 307 static inline unsigned
 308 n_iv_uses (struct ivopts_data *data)
 309 {
 310   return VARRAY_ACTIVE_SIZE (data->iv_uses);
 311 }
 312
 313 /* Ith use recorded in DATA.  */
 314
 315 static inline struct iv_use *
 316 iv_use (struct ivopts_data *data, unsigned i)
 317 {
 318   return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
 319 }
 320
 321 /* Number of candidates recorded in DATA.  */
 322
 323 static inline unsigned
 324 n_iv_cands (struct ivopts_data *data)
 325 {
 326   return VARRAY_ACTIVE_SIZE (data->iv_candidates);
 327 }
 328
 329 /* Ith candidate recorded in DATA.  */
 330
 331 static inline struct iv_cand *
 332 iv_cand (struct ivopts_data *data, unsigned i)
 333 {
 334   return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
 335 }
 336
 337 /* The data for LOOP.  */
 338
 339 static inline struct loop_data *
 340 loop_data (struct loop *loop)
 341 {
 342   return loop->aux;
 343 }
 344
 345 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 346
 347 static edge
 348 single_dom_exit (struct loop *loop)
 349 {
 350   edge exit = loop->single_exit;
 351
 352   if (!exit)
 353     return NULL;
 354
 355   if (!just_once_each_iteration_p (loop, exit->src))
 356     return NULL;
 357
 358   return exit;
 359 }
 360
 361 /* Dumps information about the induction variable IV to FILE.  */
 362
 363 extern void dump_iv (FILE *, struct iv *);
 364 void
 365 dump_iv (FILE *file, struct iv *iv)
 366 {
 367   if (iv->ssa_name)
 368     {
 369       fprintf (file, "ssa name ");
 370       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 371       fprintf (file, "\n");
 372     }
 373
 374   fprintf (file, "  type ");
 375   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 376   fprintf (file, "\n");
 377
 378   if (iv->step)
 379     {
 380       fprintf (file, "  base ");
 381       print_generic_expr (file, iv->base, TDF_SLIM);
 382       fprintf (file, "\n");
 383
 384       fprintf (file, "  step ");
 385       print_generic_expr (file, iv->step, TDF_SLIM);
 386       fprintf (file, "\n");
 387     }
 388   else
 389     {
 390       fprintf (file, "  invariant ");
 391       print_generic_expr (file, iv->base, TDF_SLIM);
 392       fprintf (file, "\n");
 393     }
 394
 395   if (iv->base_object)
 396     {
 397       fprintf (file, "  base object ");
 398       print_generic_expr (file, iv->base_object, TDF_SLIM);
 399       fprintf (file, "\n");
 400     }
 401
 402   if (iv->biv_p)
 403     fprintf (file, "  is a biv\n");
 404 }
 405
 406 /* Dumps information about the USE to FILE.  */
 407
 408 extern void dump_use (FILE *, struct iv_use *);
 409 void
 410 dump_use (FILE *file, struct iv_use *use)
 411 {
 412   fprintf (file, "use %d\n", use->id);
 413
 414   switch (use->type)
 415     {
 416     case USE_NONLINEAR_EXPR:
 417       fprintf (file, "  generic\n");
 418       break;
 419
 420     case USE_OUTER:
 421       fprintf (file, "  outside\n");
 422       break;
 423
 424     case USE_ADDRESS:
 425       fprintf (file, "  address\n");
 426       break;
 427
 428     case USE_COMPARE:
 429       fprintf (file, "  compare\n");
 430       break;
 431
 432     default:
 433       gcc_unreachable ();
 434     }
 435
 436   fprintf (file, "  in statement ");
 437   print_generic_expr (file, use->stmt, TDF_SLIM);
 438   fprintf (file, "\n");
 439
 440   fprintf (file, "  at position ");
 441   if (use->op_p)
 442     print_generic_expr (file, *use->op_p, TDF_SLIM);
 443   fprintf (file, "\n");
 444
 445   dump_iv (file, use->iv);
 446
 447   if (use->related_cands)
 448     {
 449       fprintf (file, "  related candidates ");
 450       dump_bitmap (file, use->related_cands);
 451     }
 452 }
 453
 454 /* Dumps information about the uses to FILE.  */
 455
 456 extern void dump_uses (FILE *, struct ivopts_data *);
 457 void
 458 dump_uses (FILE *file, struct ivopts_data *data)
 459 {
 460   unsigned i;
 461   struct iv_use *use;
 462
 463   for (i = 0; i < n_iv_uses (data); i++)
 464     {
 465       use = iv_use (data, i);
 466
 467       dump_use (file, use);
 468       fprintf (file, "\n");
 469     }
 470 }
 471
 472 /* Dumps information about induction variable candidate CAND to FILE.  */
 473
 474 extern void dump_cand (FILE *, struct iv_cand *);
 475 void
 476 dump_cand (FILE *file, struct iv_cand *cand)
 477 {
 478   struct iv *iv = cand->iv;
 479
 480   fprintf (file, "candidate %d%s\n",
 481            cand->id, cand->important ? " (important)" : "");
 482
 483   if (!iv)
 484     {
 485       fprintf (file, "  final value replacement\n");
 486       return;
 487     }
 488
 489   switch (cand->pos)
 490     {
 491     case IP_NORMAL:
 492       fprintf (file, "  incremented before exit test\n");
 493       break;
 494
 495     case IP_END:
 496       fprintf (file, "  incremented at end\n");
 497       break;
 498
 499     case IP_ORIGINAL:
 500       fprintf (file, "  original biv\n");
 501       break;
 502     }
 503
 504   dump_iv (file, iv);
 505 }
 506
 507 /* Returns the info for ssa version VER.  */
 508
 509 static inline struct version_info *
 510 ver_info (struct ivopts_data *data, unsigned ver)
 511 {
 512   return data->version_info + ver;
 513 }
 514
 515 /* Returns the info for ssa name NAME.  */
 516
 517 static inline struct version_info *
 518 name_info (struct ivopts_data *data, tree name)
 519 {
 520   return ver_info (data, SSA_NAME_VERSION (name));
 521 }
 522
 523 /* Checks whether there exists number X such that X * B = A, counting modulo
 524    2^BITS.  */
 525
 526 static bool
 527 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 528         HOST_WIDE_INT *x)
 529 {
 530   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 531   unsigned HOST_WIDE_INT inv, ex, val;
 532   unsigned i;
 533
 534   a &= mask;
 535   b &= mask;
 536
 537   /* First divide the whole equation by 2 as long as possible.  */
 538   while (!(a & 1) && !(b & 1))
 539     {
 540       a >>= 1;
 541       b >>= 1;
 542       bits--;
 543       mask >>= 1;
 544     }
 545
 546   if (!(b & 1))
 547     {
 548       /* If b is still even, a is odd and there is no such x.  */
 549       return false;
 550     }
 551
 552   /* Find the inverse of b.  We compute it as
 553      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 554   inv = 1;
 555   ex = b;
 556   for (i = 0; i < bits - 1; i++)
 557     {
 558       inv = (inv * ex) & mask;
 559       ex = (ex * ex) & mask;
 560     }
 561
 562   val = (a * inv) & mask;
 563
 564   gcc_assert (((val * b) & mask) == a);
 565
 566   if ((val >> (bits - 1)) & 1)
 567     val |= ~mask;
 568
 569   *x = val;
 570
 571   return true;
 572 }
 573
 574 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 575    emitted in LOOP.  */
 576
 577 static bool
 578 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 579 {
 580   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 581
 582   gcc_assert (bb);
 583
 584   if (sbb == loop->latch)
 585     return true;
 586
 587   if (sbb != bb)
 588     return false;
 589
 590   return stmt == last_stmt (bb);
 591 }
 592
 593 /* Returns true if STMT if after the place where the original induction
 594    variable CAND is incremented.  */
 595
 596 static bool
 597 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 598 {
 599   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 600   basic_block stmt_bb = bb_for_stmt (stmt);
 601   block_stmt_iterator bsi;
 602
 603   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 604     return false;
 605
 606   if (stmt_bb != cand_bb)
 607     return true;
 608
 609   /* Scan the block from the end, since the original ivs are usually
 610      incremented at the end of the loop body.  */
 611   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 612     {
 613       if (bsi_stmt (bsi) == cand->incremented_at)
 614         return false;
 615       if (bsi_stmt (bsi) == stmt)
 616         return true;
 617     }
 618 }
 619
 620 /* Returns true if STMT if after the place where the induction variable
 621    CAND is incremented in LOOP.  */
 622
 623 static bool
 624 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 625 {
 626   switch (cand->pos)
 627     {
 628     case IP_END:
 629       return false;
 630
 631     case IP_NORMAL:
 632       return stmt_after_ip_normal_pos (loop, stmt);
 633
 634     case IP_ORIGINAL:
 635       return stmt_after_ip_original_pos (cand, stmt);
 636
 637     default:
 638       gcc_unreachable ();
 639     }
 640 }
 641
 642 /* Element of the table in that we cache the numbers of iterations obtained
 643    from exits of the loop.  */
 644
 645 struct nfe_cache_elt
 646 {
 647   /* The edge for that the number of iterations is cached.  */
 648   edge exit;
 649
 650   /* True if the # of iterations was successfully determined.  */
 651   bool valid_p;
 652
 653   /* Description of # of iterations.  */
 654   struct tree_niter_desc niter;
 655 };
 656
 657 /* Hash function for nfe_cache_elt E.  */
 658
 659 static hashval_t
 660 nfe_hash (const void *e)
 661 {
 662   const struct nfe_cache_elt *elt = e;
 663
 664   return htab_hash_pointer (elt->exit);
 665 }
 666
 667 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 668
 669 static int
 670 nfe_eq (const void *e1, const void *e2)
 671 {
 672   const struct nfe_cache_elt *elt1 = e1;
 673
 674   return elt1->exit == e2;
 675 }
 676
 677 /*  Returns structure describing number of iterations determined from
 678     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 679
 680 static struct tree_niter_desc *
 681 niter_for_exit (struct ivopts_data *data, edge exit)
 682 {
 683   struct nfe_cache_elt *nfe_desc;
 684   PTR *slot;
 685
 686   slot = htab_find_slot_with_hash (data->niters, exit,
 687                                    htab_hash_pointer (exit),
 688                                    INSERT);
 689
 690   if (!*slot)
 691     {
 692       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 693       nfe_desc->exit = exit;
 694       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 695                                                      exit, &nfe_desc->niter);
 696       *slot = nfe_desc;
 697     }
 698   else
 699     nfe_desc = *slot;
 700
 701   if (!nfe_desc->valid_p)
 702     return NULL;
 703
 704   return &nfe_desc->niter;
 705 }
 706
 707 /* Returns structure describing number of iterations determined from
 708    single dominating exit of DATA->current_loop, or NULL if something
 709    goes wrong.  */
 710
 711 static struct tree_niter_desc *
 712 niter_for_single_dom_exit (struct ivopts_data *data)
 713 {
 714   edge exit = single_dom_exit (data->current_loop);
 715
 716   if (!exit)
 717     return NULL;
 718
 719   return niter_for_exit (data, exit);
 720 }
 721
 722 /* Initializes data structures used by the iv optimization pass, stored
 723    in DATA.  LOOPS is the loop tree.  */
 724
 725 static void
 726 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 727 {
 728   unsigned i;
 729
 730   data->version_info_size = 2 * num_ssa_names;
 731   data->version_info = xcalloc (data->version_info_size,
 732                                 sizeof (struct version_info));
 733   data->relevant = BITMAP_ALLOC (NULL);
 734   data->important_candidates = BITMAP_ALLOC (NULL);
 735   data->max_inv_id = 0;
 736   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 737
 738   for (i = 1; i < loops->num; i++)
 739     if (loops->parray[i])
 740       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 741
 742   VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
 743   VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
 744   VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
 745 }
 746
 747 /* Returns a memory object to that EXPR points.  In case we are able to
 748    determine that it does not point to any such object, NULL is returned.  */
 749
 750 static tree
 751 determine_base_object (tree expr)
 752 {
 753   enum tree_code code = TREE_CODE (expr);
 754   tree base, obj, op0, op1;
 755
 756   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 757     return NULL_TREE;
 758
 759   switch (code)
 760     {
 761     case INTEGER_CST:
 762       return NULL_TREE;
 763
 764     case ADDR_EXPR:
 765       obj = TREE_OPERAND (expr, 0);
 766       base = get_base_address (obj);
 767
 768       if (!base)
 769         return expr;
 770
 771       if (TREE_CODE (base) == INDIRECT_REF)
 772         return determine_base_object (TREE_OPERAND (base, 0));
 773
 774       return fold (build1 (ADDR_EXPR, ptr_type_node, base));
 775
 776     case PLUS_EXPR:
 777     case MINUS_EXPR:
 778       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 779       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 780
 781       if (!op1)
 782         return op0;
 783
 784       if (!op0)
 785         return (code == PLUS_EXPR
 786                 ? op1
 787                 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
 788
 789       return fold (build (code, ptr_type_node, op0, op1));
 790
 791     case NOP_EXPR:
 792     case CONVERT_EXPR:
 793       return determine_base_object (TREE_OPERAND (expr, 0));
 794
 795     default:
 796       return fold_convert (ptr_type_node, expr);
 797     }
 798 }
 799
 800 /* Allocates an induction variable with given initial value BASE and step STEP
 801    for loop LOOP.  */
 802
 803 static struct iv *
 804 alloc_iv (tree base, tree step)
 805 {
 806   struct iv *iv = xcalloc (1, sizeof (struct iv));
 807
 808   if (step && integer_zerop (step))
 809     step = NULL_TREE;
 810
 811   iv->base = base;
 812   iv->base_object = determine_base_object (base);
 813   iv->step = step;
 814   iv->biv_p = false;
 815   iv->have_use_for = false;
 816   iv->use_id = 0;
 817   iv->ssa_name = NULL_TREE;
 818
 819   return iv;
 820 }
 821
 822 /* Sets STEP and BASE for induction variable IV.  */
 823
 824 static void
 825 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 826 {
 827   struct version_info *info = name_info (data, iv);
 828
 829   gcc_assert (!info->iv);
 830
 831   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 832   info->iv = alloc_iv (base, step);
 833   info->iv->ssa_name = iv;
 834 }
 835
 836 /* Finds induction variable declaration for VAR.  */
 837
 838 static struct iv *
 839 get_iv (struct ivopts_data *data, tree var)
 840 {
 841   basic_block bb;
 842
 843   if (!name_info (data, var)->iv)
 844     {
 845       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 846
 847       if (!bb
 848           || !flow_bb_inside_loop_p (data->current_loop, bb))
 849         set_iv (data, var, var, NULL_TREE);
 850     }
 851
 852   return name_info (data, var)->iv;
 853 }
 854
 855 /* Determines the step of a biv defined in PHI.  */
 856
 857 static tree
 858 determine_biv_step (tree phi)
 859 {
 860   struct loop *loop = bb_for_stmt (phi)->loop_father;
 861   tree name = PHI_RESULT (phi), base, step;
 862   tree type = TREE_TYPE (name);
 863
 864   if (!is_gimple_reg (name))
 865     return NULL_TREE;
 866
 867   if (!simple_iv (loop, phi, name, &base, &step))
 868     return NULL_TREE;
 869
 870   if (!step)
 871     return build_int_cst (type, 0);
 872
 873   return step;
 874 }
 875
 876 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 877
 878 static bool
 879 abnormal_ssa_name_p (tree exp)
 880 {
 881   if (!exp)
 882     return false;
 883
 884   if (TREE_CODE (exp) != SSA_NAME)
 885     return false;
 886
 887   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 888 }
 889
 890 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 891    abnormal phi node.  Callback for for_each_index.  */
 892
 893 static bool
 894 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 895                                   void *data ATTRIBUTE_UNUSED)
 896 {
 897   if (TREE_CODE (base) == ARRAY_REF)
 898     {
 899       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 900         return false;
 901       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 902         return false;
 903     }
 904
 905   return !abnormal_ssa_name_p (*index);
 906 }
 907
 908 /* Returns true if EXPR contains a ssa name that occurs in an
 909    abnormal phi node.  */
 910
 911 static bool
 912 contains_abnormal_ssa_name_p (tree expr)
 913 {
 914   enum tree_code code = TREE_CODE (expr);
 915   enum tree_code_class class = TREE_CODE_CLASS (code);
 916
 917   if (code == SSA_NAME)
 918     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 919
 920   if (code == INTEGER_CST
 921       || is_gimple_min_invariant (expr))
 922     return false;
 923
 924   if (code == ADDR_EXPR)
 925     return !for_each_index (&TREE_OPERAND (expr, 0),
 926                             idx_contains_abnormal_ssa_name_p,
 927                             NULL);
 928
 929   switch (class)
 930     {
 931     case tcc_binary:
 932     case tcc_comparison:
 933       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 934         return true;
 935
 936       /* Fallthru.  */
 937     case tcc_unary:
 938       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 939         return true;
 940
 941       break;
 942
 943     default:
 944       gcc_unreachable ();
 945     }
 946
 947   return false;
 948 }
 949
 950 /* Finds basic ivs.  */
 951
 952 static bool
 953 find_bivs (struct ivopts_data *data)
 954 {
 955   tree phi, step, type, base;
 956   bool found = false;
 957   struct loop *loop = data->current_loop;
 958
 959   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 960     {
 961       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 962         continue;
 963
 964       step = determine_biv_step (phi);
 965
 966       if (!step)
 967         continue;
 968       if (cst_and_fits_in_hwi (step)
 969           && int_cst_value (step) == 0)
 970         continue;
 971
 972       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 973       if (contains_abnormal_ssa_name_p (base))
 974         continue;
 975
 976       type = TREE_TYPE (PHI_RESULT (phi));
 977       base = fold_convert (type, base);
 978       step = fold_convert (type, step);
 979
 980       /* FIXME: We do not handle induction variables whose step does
 981          not satisfy cst_and_fits_in_hwi.  */
 982       if (!cst_and_fits_in_hwi (step))
 983         continue;
 984
 985       set_iv (data, PHI_RESULT (phi), base, step);
 986       found = true;
 987     }
 988
 989   return found;
 990 }
 991
 992 /* Marks basic ivs.  */
 993
 994 static void
 995 mark_bivs (struct ivopts_data *data)
 996 {
 997   tree phi, var;
 998   struct iv *iv, *incr_iv;
 999   struct loop *loop = data->current_loop;
1000   basic_block incr_bb;
1001
1002   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1003     {
1004       iv = get_iv (data, PHI_RESULT (phi));
1005       if (!iv)
1006         continue;
1007
1008       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1009       incr_iv = get_iv (data, var);
1010       if (!incr_iv)
1011         continue;
1012
1013       /* If the increment is in the subloop, ignore it.  */
1014       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1015       if (incr_bb->loop_father != data->current_loop
1016           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1017         continue;
1018
1019       iv->biv_p = true;
1020       incr_iv->biv_p = true;
1021     }
1022 }
1023
1024 /* Checks whether STMT defines a linear induction variable and stores its
1025    parameters to BASE and STEP.  */
1026
1027 static bool
1028 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1029                         tree *base, tree *step)
1030 {
1031   tree lhs;
1032   struct loop *loop = data->current_loop;
1033
1034   *base = NULL_TREE;
1035   *step = NULL_TREE;
1036
1037   if (TREE_CODE (stmt) != MODIFY_EXPR)
1038     return false;
1039
1040   lhs = TREE_OPERAND (stmt, 0);
1041   if (TREE_CODE (lhs) != SSA_NAME)
1042     return false;
1043
1044   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1045     return false;
1046
1047   /* FIXME: We do not handle induction variables whose step does
1048      not satisfy cst_and_fits_in_hwi.  */
1049   if (!zero_p (*step)
1050       && !cst_and_fits_in_hwi (*step))
1051     return false;
1052
1053   if (contains_abnormal_ssa_name_p (*base))
1054     return false;
1055
1056   return true;
1057 }
1058
1059 /* Finds general ivs in statement STMT.  */
1060
1061 static void
1062 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1063 {
1064   tree base, step;
1065
1066   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1067     return;
1068
1069   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1070 }
1071
1072 /* Finds general ivs in basic block BB.  */
1073
1074 static void
1075 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1076 {
1077   block_stmt_iterator bsi;
1078
1079   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1080     find_givs_in_stmt (data, bsi_stmt (bsi));
1081 }
1082
1083 /* Finds general ivs.  */
1084
1085 static void
1086 find_givs (struct ivopts_data *data)
1087 {
1088   struct loop *loop = data->current_loop;
1089   basic_block *body = get_loop_body_in_dom_order (loop);
1090   unsigned i;
1091
1092   for (i = 0; i < loop->num_nodes; i++)
1093     find_givs_in_bb (data, body[i]);
1094   free (body);
1095 }
1096
1097 /* For each ssa name defined in LOOP determines whether it is an induction
1098    variable and if so, its initial value and step.  */
1099
1100 static bool
1101 find_induction_variables (struct ivopts_data *data)
1102 {
1103   unsigned i;
1104   bitmap_iterator bi;
1105
1106   if (!find_bivs (data))
1107     return false;
1108
1109   find_givs (data);
1110   mark_bivs (data);
1111
1112   if (dump_file && (dump_flags & TDF_DETAILS))
1113     {
1114       struct tree_niter_desc *niter;
1115
1116       niter = niter_for_single_dom_exit (data);
1117
1118       if (niter)
1119         {
1120           fprintf (dump_file, "  number of iterations ");
1121           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1122           fprintf (dump_file, "\n");
1123
1124           fprintf (dump_file, "  may be zero if ");
1125           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1126           fprintf (dump_file, "\n");
1127           fprintf (dump_file, "\n");
1128         };
1129
1130       fprintf (dump_file, "Induction variables:\n\n");
1131
1132       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1133         {
1134           if (ver_info (data, i)->iv)
1135             dump_iv (dump_file, ver_info (data, i)->iv);
1136         }
1137     }
1138
1139   return true;
1140 }
1141
1142 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1143
1144 static struct iv_use *
1145 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1146             tree stmt, enum use_type use_type)
1147 {
1148   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1149
1150   use->id = n_iv_uses (data);
1151   use->type = use_type;
1152   use->iv = iv;
1153   use->stmt = stmt;
1154   use->op_p = use_p;
1155   use->related_cands = BITMAP_ALLOC (NULL);
1156
1157   /* To avoid showing ssa name in the dumps, if it was not reset by the
1158      caller.  */
1159   iv->ssa_name = NULL_TREE;
1160
1161   if (dump_file && (dump_flags & TDF_DETAILS))
1162     dump_use (dump_file, use);
1163
1164   VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1165
1166   return use;
1167 }
1168
1169 /* Checks whether OP is a loop-level invariant and if so, records it.
1170    NONLINEAR_USE is true if the invariant is used in a way we do not
1171    handle specially.  */
1172
1173 static void
1174 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1175 {
1176   basic_block bb;
1177   struct version_info *info;
1178
1179   if (TREE_CODE (op) != SSA_NAME
1180       || !is_gimple_reg (op))
1181     return;
1182
1183   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1184   if (bb
1185       && flow_bb_inside_loop_p (data->current_loop, bb))
1186     return;
1187
1188   info = name_info (data, op);
1189   info->name = op;
1190   info->has_nonlin_use |= nonlinear_use;
1191   if (!info->inv_id)
1192     info->inv_id = ++data->max_inv_id;
1193   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1194 }
1195
1196 /* Checks whether the use OP is interesting and if so, records it
1197    as TYPE.  */
1198
1199 static struct iv_use *
1200 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1201                                        enum use_type type)
1202 {
1203   struct iv *iv;
1204   struct iv *civ;
1205   tree stmt;
1206   struct iv_use *use;
1207
1208   if (TREE_CODE (op) != SSA_NAME)
1209     return NULL;
1210
1211   iv = get_iv (data, op);
1212   if (!iv)
1213     return NULL;
1214
1215   if (iv->have_use_for)
1216     {
1217       use = iv_use (data, iv->use_id);
1218
1219       gcc_assert (use->type == USE_NONLINEAR_EXPR
1220                   || use->type == USE_OUTER);
1221
1222       if (type == USE_NONLINEAR_EXPR)
1223         use->type = USE_NONLINEAR_EXPR;
1224       return use;
1225     }
1226
1227   if (zero_p (iv->step))
1228     {
1229       record_invariant (data, op, true);
1230       return NULL;
1231     }
1232   iv->have_use_for = true;
1233
1234   civ = xmalloc (sizeof (struct iv));
1235   *civ = *iv;
1236
1237   stmt = SSA_NAME_DEF_STMT (op);
1238   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1239               || TREE_CODE (stmt) == MODIFY_EXPR);
1240
1241   use = record_use (data, NULL, civ, stmt, type);
1242   iv->use_id = use->id;
1243
1244   return use;
1245 }
1246
1247 /* Checks whether the use OP is interesting and if so, records it.  */
1248
1249 static struct iv_use *
1250 find_interesting_uses_op (struct ivopts_data *data, tree op)
1251 {
1252   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1253 }
1254
1255 /* Records a definition of induction variable OP that is used outside of the
1256    loop.  */
1257
1258 static struct iv_use *
1259 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1260 {
1261   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1262 }
1263
1264 /* Checks whether the condition *COND_P in STMT is interesting
1265    and if so, records it.  */
1266
1267 static void
1268 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1269 {
1270   tree *op0_p;
1271   tree *op1_p;
1272   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1273   struct iv const_iv;
1274   tree zero = integer_zero_node;
1275
1276   const_iv.step = NULL_TREE;
1277
1278   if (integer_zerop (*cond_p)
1279       || integer_nonzerop (*cond_p))
1280     return;
1281
1282   if (TREE_CODE (*cond_p) == SSA_NAME)
1283     {
1284       op0_p = cond_p;
1285       op1_p = &zero;
1286     }
1287   else
1288     {
1289       op0_p = &TREE_OPERAND (*cond_p, 0);
1290       op1_p = &TREE_OPERAND (*cond_p, 1);
1291     }
1292
1293   if (TREE_CODE (*op0_p) == SSA_NAME)
1294     iv0 = get_iv (data, *op0_p);
1295   else
1296     iv0 = &const_iv;
1297
1298   if (TREE_CODE (*op1_p) == SSA_NAME)
1299     iv1 = get_iv (data, *op1_p);
1300   else
1301     iv1 = &const_iv;
1302
1303   if (/* When comparing with non-invariant value, we may not do any senseful
1304          induction variable elimination.  */
1305       (!iv0 || !iv1)
1306       /* Eliminating condition based on two ivs would be nontrivial.
1307          ??? TODO -- it is not really important to handle this case.  */
1308       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1309     {
1310       find_interesting_uses_op (data, *op0_p);
1311       find_interesting_uses_op (data, *op1_p);
1312       return;
1313     }
1314
1315   if (zero_p (iv0->step) && zero_p (iv1->step))
1316     {
1317       /* If both are invariants, this is a work for unswitching.  */
1318       return;
1319     }
1320
1321   civ = xmalloc (sizeof (struct iv));
1322   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1323   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1324 }
1325
1326 /* Returns true if expression EXPR is obviously invariant in LOOP,
1327    i.e. if all its operands are defined outside of the LOOP.  */
1328
1329 bool
1330 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1331 {
1332   basic_block def_bb;
1333   unsigned i, len;
1334
1335   if (is_gimple_min_invariant (expr))
1336     return true;
1337
1338   if (TREE_CODE (expr) == SSA_NAME)
1339     {
1340       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1341       if (def_bb
1342           && flow_bb_inside_loop_p (loop, def_bb))
1343         return false;
1344
1345       return true;
1346     }
1347
1348   if (!EXPR_P (expr))
1349     return false;
1350
1351   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1352   for (i = 0; i < len; i++)
1353     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1354       return false;
1355
1356   return true;
1357 }
1358
1359 /* Cumulates the steps of indices into DATA and replaces their values with the
1360    initial ones.  Returns false when the value of the index cannot be determined.
1361    Callback for for_each_index.  */
1362
1363 struct ifs_ivopts_data
1364 {
1365   struct ivopts_data *ivopts_data;
1366   tree stmt;
1367   tree *step_p;
1368 };
1369
1370 static bool
1371 idx_find_step (tree base, tree *idx, void *data)
1372 {
1373   struct ifs_ivopts_data *dta = data;
1374   struct iv *iv;
1375   tree step, type, iv_type, iv_step, lbound, off;
1376   struct loop *loop = dta->ivopts_data->current_loop;
1377
1378   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1379       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1380     return false;
1381
1382   /* If base is a component ref, require that the offset of the reference
1383      be invariant.  */
1384   if (TREE_CODE (base) == COMPONENT_REF)
1385     {
1386       off = component_ref_field_offset (base);
1387       return expr_invariant_in_loop_p (loop, off);
1388     }
1389
1390   /* If base is array, first check whether we will be able to move the
1391      reference out of the loop (in order to take its address in strength
1392      reduction).  In order for this to work we need both lower bound
1393      and step to be loop invariants.  */
1394   if (TREE_CODE (base) == ARRAY_REF)
1395     {
1396       step = array_ref_element_size (base);
1397       lbound = array_ref_low_bound (base);
1398
1399       if (!expr_invariant_in_loop_p (loop, step)
1400           || !expr_invariant_in_loop_p (loop, lbound))
1401         return false;
1402     }
1403
1404   if (TREE_CODE (*idx) != SSA_NAME)
1405     return true;
1406
1407   iv = get_iv (dta->ivopts_data, *idx);
1408   if (!iv)
1409     return false;
1410
1411   *idx = iv->base;
1412
1413   if (!iv->step)
1414     return true;
1415
1416   iv_type = TREE_TYPE (iv->base);
1417   type = build_pointer_type (TREE_TYPE (base));
1418   if (TREE_CODE (base) == ARRAY_REF)
1419     {
1420       step = array_ref_element_size (base);
1421
1422       /* We only handle addresses whose step is an integer constant.  */
1423       if (TREE_CODE (step) != INTEGER_CST)
1424         return false;
1425     }
1426   else
1427     /* The step for pointer arithmetics already is 1 byte.  */
1428     step = build_int_cst (type, 1);
1429
1430   if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1431     iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1432                                           type, iv->base, iv->step, dta->stmt);
1433   else
1434     iv_step = fold_convert (iv_type, iv->step);
1435
1436   if (!iv_step)
1437     {
1438       /* The index might wrap.  */
1439       return false;
1440     }
1441
1442   step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1443
1444   if (!*dta->step_p)
1445     *dta->step_p = step;
1446   else
1447     *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1448                                             *dta->step_p, step);
1449
1450   return true;
1451 }
1452
1453 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1454    object is passed to it in DATA.  */
1455
1456 static bool
1457 idx_record_use (tree base, tree *idx,
1458                 void *data)
1459 {
1460   find_interesting_uses_op (data, *idx);
1461   if (TREE_CODE (base) == ARRAY_REF)
1462     {
1463       find_interesting_uses_op (data, array_ref_element_size (base));
1464       find_interesting_uses_op (data, array_ref_low_bound (base));
1465     }
1466   return true;
1467 }
1468
1469 /* Returns true if memory reference REF may be unaligned.  */
1470
1471 static bool
1472 may_be_unaligned_p (tree ref)
1473 {
1474   tree base;
1475   tree base_type;
1476   HOST_WIDE_INT bitsize;
1477   HOST_WIDE_INT bitpos;
1478   tree toffset;
1479   enum machine_mode mode;
1480   int unsignedp, volatilep;
1481   unsigned base_align;
1482
1483   /* The test below is basically copy of what expr.c:normal_inner_ref
1484      does to check whether the object must be loaded by parts when
1485      STRICT_ALIGNMENT is true.  */
1486   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1487                               &unsignedp, &volatilep, true);
1488   base_type = TREE_TYPE (base);
1489   base_align = TYPE_ALIGN (base_type);
1490
1491   if (mode != BLKmode
1492       && (base_align < GET_MODE_ALIGNMENT (mode)
1493           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1494           || bitpos % BITS_PER_UNIT != 0))
1495     return true;
1496
1497   return false;
1498 }
1499
1500 /* Finds addresses in *OP_P inside STMT.  */
1501
1502 static void
1503 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1504 {
1505   tree base = unshare_expr (*op_p), step = NULL;
1506   struct iv *civ;
1507   struct ifs_ivopts_data ifs_ivopts_data;
1508
1509   /* Ignore bitfields for now.  Not really something terribly complicated
1510      to handle.  TODO.  */
1511   if (TREE_CODE (base) == COMPONENT_REF
1512       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1513     goto fail;
1514
1515   if (STRICT_ALIGNMENT
1516       && may_be_unaligned_p (base))
1517     goto fail;
1518
1519   ifs_ivopts_data.ivopts_data = data;
1520   ifs_ivopts_data.stmt = stmt;
1521   ifs_ivopts_data.step_p = &step;
1522   if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1523       || zero_p (step))
1524     goto fail;
1525
1526   gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1527   gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1528
1529   if (TREE_CODE (base) == INDIRECT_REF)
1530     base = TREE_OPERAND (base, 0);
1531   else
1532     base = build_addr (base);
1533
1534   civ = alloc_iv (base, step);
1535   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1536   return;
1537
1538 fail:
1539   for_each_index (op_p, idx_record_use, data);
1540 }
1541
1542 /* Finds and records invariants used in STMT.  */
1543
1544 static void
1545 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1546 {
1547   use_optype uses = NULL;
1548   unsigned i, n;
1549   tree op;
1550
1551   if (TREE_CODE (stmt) == PHI_NODE)
1552     n = PHI_NUM_ARGS (stmt);
1553   else
1554     {
1555       get_stmt_operands (stmt);
1556       uses = STMT_USE_OPS (stmt);
1557       n = NUM_USES (uses);
1558     }
1559
1560   for (i = 0; i < n; i++)
1561     {
1562       if (TREE_CODE (stmt) == PHI_NODE)
1563         op = PHI_ARG_DEF (stmt, i);
1564       else
1565         op = USE_OP (uses, i);
1566
1567       record_invariant (data, op, false);
1568     }
1569 }
1570
1571 /* Finds interesting uses of induction variables in the statement STMT.  */
1572
1573 static void
1574 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1575 {
1576   struct iv *iv;
1577   tree op, lhs, rhs;
1578   use_optype uses = NULL;
1579   unsigned i, n;
1580
1581   find_invariants_stmt (data, stmt);
1582
1583   if (TREE_CODE (stmt) == COND_EXPR)
1584     {
1585       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1586       return;
1587     }
1588
1589   if (TREE_CODE (stmt) == MODIFY_EXPR)
1590     {
1591       lhs = TREE_OPERAND (stmt, 0);
1592       rhs = TREE_OPERAND (stmt, 1);
1593
1594       if (TREE_CODE (lhs) == SSA_NAME)
1595         {
1596           /* If the statement defines an induction variable, the uses are not
1597              interesting by themselves.  */
1598
1599           iv = get_iv (data, lhs);
1600
1601           if (iv && !zero_p (iv->step))
1602             return;
1603         }
1604
1605       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1606         {
1607         case tcc_comparison:
1608           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1609           return;
1610
1611         case tcc_reference:
1612           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1613           if (REFERENCE_CLASS_P (lhs))
1614             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1615           return;
1616
1617         default: ;
1618         }
1619
1620       if (REFERENCE_CLASS_P (lhs)
1621           && is_gimple_val (rhs))
1622         {
1623           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1624           find_interesting_uses_op (data, rhs);
1625           return;
1626         }
1627
1628       /* TODO -- we should also handle address uses of type
1629
1630          memory = call (whatever);
1631
1632          and
1633
1634          call (memory).  */
1635     }
1636
1637   if (TREE_CODE (stmt) == PHI_NODE
1638       && bb_for_stmt (stmt) == data->current_loop->header)
1639     {
1640       lhs = PHI_RESULT (stmt);
1641       iv = get_iv (data, lhs);
1642
1643       if (iv && !zero_p (iv->step))
1644         return;
1645     }
1646
1647   if (TREE_CODE (stmt) == PHI_NODE)
1648     n = PHI_NUM_ARGS (stmt);
1649   else
1650     {
1651       uses = STMT_USE_OPS (stmt);
1652       n = NUM_USES (uses);
1653     }
1654
1655   for (i = 0; i < n; i++)
1656     {
1657       if (TREE_CODE (stmt) == PHI_NODE)
1658         op = PHI_ARG_DEF (stmt, i);
1659       else
1660         op = USE_OP (uses, i);
1661
1662       if (TREE_CODE (op) != SSA_NAME)
1663         continue;
1664
1665       iv = get_iv (data, op);
1666       if (!iv)
1667         continue;
1668
1669       find_interesting_uses_op (data, op);
1670     }
1671 }
1672
1673 /* Finds interesting uses of induction variables outside of loops
1674    on loop exit edge EXIT.  */
1675
1676 static void
1677 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1678 {
1679   tree phi, def;
1680
1681   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1682     {
1683       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1684       find_interesting_uses_outer (data, def);
1685     }
1686 }
1687
1688 /* Finds uses of the induction variables that are interesting.  */
1689
1690 static void
1691 find_interesting_uses (struct ivopts_data *data)
1692 {
1693   basic_block bb;
1694   block_stmt_iterator bsi;
1695   tree phi;
1696   basic_block *body = get_loop_body (data->current_loop);
1697   unsigned i;
1698   struct version_info *info;
1699   edge e;
1700
1701   if (dump_file && (dump_flags & TDF_DETAILS))
1702     fprintf (dump_file, "Uses:\n\n");
1703
1704   for (i = 0; i < data->current_loop->num_nodes; i++)
1705     {
1706       edge_iterator ei;
1707       bb = body[i];
1708
1709       FOR_EACH_EDGE (e, ei, bb->succs)
1710         if (e->dest != EXIT_BLOCK_PTR
1711             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1712           find_interesting_uses_outside (data, e);
1713
1714       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1715         find_interesting_uses_stmt (data, phi);
1716       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1717         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1718     }
1719
1720   if (dump_file && (dump_flags & TDF_DETAILS))
1721     {
1722       bitmap_iterator bi;
1723
1724       fprintf (dump_file, "\n");
1725
1726       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1727         {
1728           info = ver_info (data, i);
1729           if (info->inv_id)
1730             {
1731               fprintf (dump_file, "  ");
1732               print_generic_expr (dump_file, info->name, TDF_SLIM);
1733               fprintf (dump_file, " is invariant (%d)%s\n",
1734                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1735             }
1736         }
1737
1738       fprintf (dump_file, "\n");
1739     }
1740
1741   free (body);
1742 }
1743
1744 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1745    is true, assume we are inside an address.  */
1746
1747 static tree
1748 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1749 {
1750   tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1751   enum tree_code code;
1752   tree type, orig_type = TREE_TYPE (expr);
1753   unsigned HOST_WIDE_INT off0, off1, st;
1754   tree orig_expr = expr;
1755
1756   STRIP_NOPS (expr);
1757   type = TREE_TYPE (expr);
1758   code = TREE_CODE (expr);
1759   *offset = 0;
1760
1761   switch (code)
1762     {
1763     case INTEGER_CST:
1764       if (!cst_and_fits_in_hwi (expr)
1765           || zero_p (expr))
1766         return orig_expr;
1767
1768       *offset = int_cst_value (expr);
1769       return build_int_cst_type (orig_type, 0);
1770
1771     case PLUS_EXPR:
1772     case MINUS_EXPR:
1773       op0 = TREE_OPERAND (expr, 0);
1774       op1 = TREE_OPERAND (expr, 1);
1775
1776       op0 = strip_offset (op0, false, &off0);
1777       op1 = strip_offset (op1, false, &off1);
1778
1779       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1780       if (op0 == TREE_OPERAND (expr, 0)
1781           && op1 == TREE_OPERAND (expr, 1))
1782         return orig_expr;
1783
1784       if (zero_p (op1))
1785         expr = op0;
1786       else if (zero_p (op0))
1787         {
1788           if (code == PLUS_EXPR)
1789             expr = op1;
1790           else
1791             expr = build1 (NEGATE_EXPR, type, op1);
1792         }
1793       else
1794         expr = build2 (code, type, op0, op1);
1795
1796       return fold_convert (orig_type, expr);
1797
1798     case ARRAY_REF:
1799       if (!inside_addr)
1800         return orig_expr;
1801
1802       step = array_ref_element_size (expr);
1803       if (!cst_and_fits_in_hwi (step))
1804         break;
1805
1806       st = int_cst_value (step);
1807       op1 = TREE_OPERAND (expr, 1);
1808       op1 = strip_offset (op1, false, &off1);
1809       *offset = off1 * st;
1810       break;
1811
1812     case COMPONENT_REF:
1813       if (!inside_addr)
1814         return orig_expr;
1815       break;
1816
1817     case ADDR_EXPR:
1818       inside_addr = true;
1819       break;
1820
1821     default:
1822       return orig_expr;
1823     }
1824
1825   /* Default handling of expressions for that we want to recurse into
1826      the first operand.  */
1827   op0 = TREE_OPERAND (expr, 0);
1828   op0 = strip_offset (op0, inside_addr, &off0);
1829   *offset += off0;
1830
1831   if (op0 == TREE_OPERAND (expr, 0)
1832       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1833     return orig_expr;
1834
1835   expr = copy_node (expr);
1836   TREE_OPERAND (expr, 0) = op0;
1837   if (op1)
1838     TREE_OPERAND (expr, 1) = op1;
1839
1840   return fold_convert (orig_type, expr);
1841 }
1842
1843 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1844    position to POS.  If USE is not NULL, the candidate is set as related to
1845    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1846    replacement of the final value of the iv by a direct computation.  */
1847
1848 static struct iv_cand *
1849 add_candidate_1 (struct ivopts_data *data,
1850                  tree base, tree step, bool important, enum iv_position pos,
1851                  struct iv_use *use, tree incremented_at)
1852 {
1853   unsigned i;
1854   struct iv_cand *cand = NULL;
1855   tree type;
1856
1857   if (base)
1858     {
1859       type = TREE_TYPE (base);
1860       if (!TYPE_UNSIGNED (type))
1861         {
1862           type = unsigned_type_for (type);
1863           base = fold_convert (type, base);
1864           if (step)
1865             step = fold_convert (type, step);
1866         }
1867     }
1868
1869   for (i = 0; i < n_iv_cands (data); i++)
1870     {
1871       cand = iv_cand (data, i);
1872
1873       if (cand->pos != pos)
1874         continue;
1875
1876       if (cand->incremented_at != incremented_at)
1877         continue;
1878
1879       if (!cand->iv)
1880         {
1881           if (!base && !step)
1882             break;
1883
1884           continue;
1885         }
1886
1887       if (!base && !step)
1888         continue;
1889
1890       if (!operand_equal_p (base, cand->iv->base, 0))
1891         continue;
1892
1893       if (zero_p (cand->iv->step))
1894         {
1895           if (zero_p (step))
1896             break;
1897         }
1898       else
1899         {
1900           if (step && operand_equal_p (step, cand->iv->step, 0))
1901             break;
1902         }
1903     }
1904
1905   if (i == n_iv_cands (data))
1906     {
1907       cand = xcalloc (1, sizeof (struct iv_cand));
1908       cand->id = i;
1909
1910       if (!base && !step)
1911         cand->iv = NULL;
1912       else
1913         cand->iv = alloc_iv (base, step);
1914
1915       cand->pos = pos;
1916       if (pos != IP_ORIGINAL && cand->iv)
1917         {
1918           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1919           cand->var_after = cand->var_before;
1920         }
1921       cand->important = important;
1922       cand->incremented_at = incremented_at;
1923       VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1924
1925       if (dump_file && (dump_flags & TDF_DETAILS))
1926         dump_cand (dump_file, cand);
1927     }
1928
1929   if (important && !cand->important)
1930     {
1931       cand->important = true;
1932       if (dump_file && (dump_flags & TDF_DETAILS))
1933         fprintf (dump_file, "Candidate %d is important\n", cand->id);
1934     }
1935
1936   if (use)
1937     {
1938       bitmap_set_bit (use->related_cands, i);
1939       if (dump_file && (dump_flags & TDF_DETAILS))
1940         fprintf (dump_file, "Candidate %d is related to use %d\n",
1941                  cand->id, use->id);
1942     }
1943
1944   return cand;
1945 }
1946
1947 /* Returns true if incrementing the induction variable at the end of the LOOP
1948    is allowed.
1949
1950    The purpose is to avoid splitting latch edge with a biv increment, thus
1951    creating a jump, possibly confusing other optimization passes and leaving
1952    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
1953    is not available (so we do not have a better alternative), or if the latch
1954    edge is already nonempty.  */
1955
1956 static bool
1957 allow_ip_end_pos_p (struct loop *loop)
1958 {
1959   if (!ip_normal_pos (loop))
1960     return true;
1961
1962   if (!empty_block_p (ip_end_pos (loop)))
1963     return true;
1964
1965   return false;
1966 }
1967
1968 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1969    position to POS.  If USE is not NULL, the candidate is set as related to
1970    it.  The candidate computation is scheduled on all available positions.  */
1971
1972 static void
1973 add_candidate (struct ivopts_data *data,
1974                tree base, tree step, bool important, struct iv_use *use)
1975 {
1976   if (ip_normal_pos (data->current_loop))
1977     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1978   if (ip_end_pos (data->current_loop)
1979       && allow_ip_end_pos_p (data->current_loop))
1980     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1981 }
1982
1983 /* Adds standard iv candidates.  */
1984
1985 static void
1986 add_standard_iv_candidates (struct ivopts_data *data)
1987 {
1988   /* Add 0 + 1 * iteration candidate.  */
1989   add_candidate (data,
1990                  build_int_cst (unsigned_intSI_type_node, 0),
1991                  build_int_cst (unsigned_intSI_type_node, 1),
1992                  true, NULL);
1993
1994   /* The same for a long type if it is still fast enough.  */
1995   if (BITS_PER_WORD > 32)
1996     add_candidate (data,
1997                    build_int_cst (unsigned_intDI_type_node, 0),
1998                    build_int_cst (unsigned_intDI_type_node, 1),
1999                    true, NULL);
2000 }
2001
2002
2003 /* Adds candidates bases on the old induction variable IV.  */
2004
2005 static void
2006 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2007 {
2008   tree phi, def;
2009   struct iv_cand *cand;
2010
2011   add_candidate (data, iv->base, iv->step, true, NULL);
2012
2013   /* The same, but with initial value zero.  */
2014   add_candidate (data,
2015                  build_int_cst (TREE_TYPE (iv->base), 0),
2016                  iv->step, true, NULL);
2017
2018   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2019   if (TREE_CODE (phi) == PHI_NODE)
2020     {
2021       /* Additionally record the possibility of leaving the original iv
2022          untouched.  */
2023       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2024       cand = add_candidate_1 (data,
2025                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2026                               SSA_NAME_DEF_STMT (def));
2027       cand->var_before = iv->ssa_name;
2028       cand->var_after = def;
2029     }
2030 }
2031
2032 /* Adds candidates based on the old induction variables.  */
2033
2034 static void
2035 add_old_ivs_candidates (struct ivopts_data *data)
2036 {
2037   unsigned i;
2038   struct iv *iv;
2039   bitmap_iterator bi;
2040
2041   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2042     {
2043       iv = ver_info (data, i)->iv;
2044       if (iv && iv->biv_p && !zero_p (iv->step))
2045         add_old_iv_candidates (data, iv);
2046     }
2047 }
2048
2049 /* Adds candidates based on the value of the induction variable IV and USE.  */
2050
2051 static void
2052 add_iv_value_candidates (struct ivopts_data *data,
2053                          struct iv *iv, struct iv_use *use)
2054 {
2055   add_candidate (data, iv->base, iv->step, false, use);
2056
2057   /* The same, but with initial value zero.  */
2058   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2059                  iv->step, false, use);
2060 }
2061
2062 /* Adds candidates based on the address IV and USE.  */
2063
2064 static void
2065 add_address_candidates (struct ivopts_data *data,
2066                         struct iv *iv, struct iv_use *use)
2067 {
2068   tree base, abase;
2069   unsigned HOST_WIDE_INT offset;
2070
2071   /* First, the trivial choices.  */
2072   add_iv_value_candidates (data, iv, use);
2073
2074   /* Second, try removing the COMPONENT_REFs.  */
2075   if (TREE_CODE (iv->base) == ADDR_EXPR)
2076     {
2077       base = TREE_OPERAND (iv->base, 0);
2078       while (TREE_CODE (base) == COMPONENT_REF
2079              || (TREE_CODE (base) == ARRAY_REF
2080                  && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2081         base = TREE_OPERAND (base, 0);
2082
2083       if (base != TREE_OPERAND (iv->base, 0))
2084         {
2085           gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2086           gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2087
2088           if (TREE_CODE (base) == INDIRECT_REF)
2089             base = TREE_OPERAND (base, 0);
2090           else
2091             base = build_addr (base);
2092           add_candidate (data, base, iv->step, false, use);
2093         }
2094     }
2095
2096   /* Third, try removing the constant offset.  */
2097   abase = iv->base;
2098   base = strip_offset (abase, false, &offset);
2099   if (offset)
2100     add_candidate (data, base, iv->step, false, use);
2101 }
2102
2103 /* Possibly adds pseudocandidate for replacing the final value of USE by
2104    a direct computation.  */
2105
2106 static void
2107 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2108 {
2109   struct tree_niter_desc *niter;
2110
2111   /* We must know where we exit the loop and how many times does it roll.  */
2112   niter = niter_for_single_dom_exit (data);
2113   if (!niter
2114       || !zero_p (niter->may_be_zero))
2115     return;
2116
2117   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2118 }
2119
2120 /* Adds candidates based on the uses.  */
2121
2122 static void
2123 add_derived_ivs_candidates (struct ivopts_data *data)
2124 {
2125   unsigned i;
2126
2127   for (i = 0; i < n_iv_uses (data); i++)
2128     {
2129       struct iv_use *use = iv_use (data, i);
2130
2131       if (!use)
2132         continue;
2133
2134       switch (use->type)
2135         {
2136         case USE_NONLINEAR_EXPR:
2137         case USE_COMPARE:
2138           /* Just add the ivs based on the value of the iv used here.  */
2139           add_iv_value_candidates (data, use->iv, use);
2140           break;
2141
2142         case USE_OUTER:
2143           add_iv_value_candidates (data, use->iv, use);
2144
2145           /* Additionally, add the pseudocandidate for the possibility to
2146              replace the final value by a direct computation.  */
2147           add_iv_outer_candidates (data, use);
2148           break;
2149
2150         case USE_ADDRESS:
2151           add_address_candidates (data, use->iv, use);
2152           break;
2153
2154         default:
2155           gcc_unreachable ();
2156         }
2157     }
2158 }
2159
2160 /* Record important candidates and add them to related_cands bitmaps
2161    if needed.  */
2162
2163 static void
2164 record_important_candidates (struct ivopts_data *data)
2165 {
2166   unsigned i;
2167   struct iv_use *use;
2168
2169   for (i = 0; i < n_iv_cands (data); i++)
2170     {
2171       struct iv_cand *cand = iv_cand (data, i);
2172
2173       if (cand->important)
2174         bitmap_set_bit (data->important_candidates, i);
2175     }
2176
2177   data->consider_all_candidates = (n_iv_cands (data)
2178                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2179
2180   if (data->consider_all_candidates)
2181     {
2182       /* We will not need "related_cands" bitmaps in this case,
2183          so release them to decrease peak memory consumption.  */
2184       for (i = 0; i < n_iv_uses (data); i++)
2185         {
2186           use = iv_use (data, i);
2187           BITMAP_FREE (use->related_cands);
2188         }
2189     }
2190   else
2191     {
2192       /* Add important candidates to the related_cands bitmaps.  */
2193       for (i = 0; i < n_iv_uses (data); i++)
2194         bitmap_ior_into (iv_use (data, i)->related_cands,
2195                          data->important_candidates);
2196     }
2197 }
2198
2199 /* Finds the candidates for the induction variables.  */
2200
2201 static void
2202 find_iv_candidates (struct ivopts_data *data)
2203 {
2204   /* Add commonly used ivs.  */
2205   add_standard_iv_candidates (data);
2206
2207   /* Add old induction variables.  */
2208   add_old_ivs_candidates (data);
2209
2210   /* Add induction variables derived from uses.  */
2211   add_derived_ivs_candidates (data);
2212
2213   /* Record the important candidates.  */
2214   record_important_candidates (data);
2215 }
2216
2217 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2218    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2219    we allocate a simple list to every use.  */
2220
2221 static void
2222 alloc_use_cost_map (struct ivopts_data *data)
2223 {
2224   unsigned i, size, s, j;
2225
2226   for (i = 0; i < n_iv_uses (data); i++)
2227     {
2228       struct iv_use *use = iv_use (data, i);
2229       bitmap_iterator bi;
2230
2231       if (data->consider_all_candidates)
2232         size = n_iv_cands (data);
2233       else
2234         {
2235           s = 0;
2236           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2237             {
2238               s++;
2239             }
2240
2241           /* Round up to the power of two, so that moduling by it is fast.  */
2242           for (size = 1; size < s; size <<= 1)
2243             continue;
2244         }
2245
2246       use->n_map_members = size;
2247       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2248     }
2249 }
2250
2251 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2252    on invariants DEPENDS_ON.  */
2253
2254 static void
2255 set_use_iv_cost (struct ivopts_data *data,
2256                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2257                  bitmap depends_on)
2258 {
2259   unsigned i, s;
2260
2261   if (cost == INFTY)
2262     {
2263       BITMAP_FREE (depends_on);
2264       return;
2265     }
2266
2267   if (data->consider_all_candidates)
2268     {
2269       use->cost_map[cand->id].cand = cand;
2270       use->cost_map[cand->id].cost = cost;
2271       use->cost_map[cand->id].depends_on = depends_on;
2272       return;
2273     }
2274
2275   /* n_map_members is a power of two, so this computes modulo.  */
2276   s = cand->id & (use->n_map_members - 1);
2277   for (i = s; i < use->n_map_members; i++)
2278     if (!use->cost_map[i].cand)
2279       goto found;
2280   for (i = 0; i < s; i++)
2281     if (!use->cost_map[i].cand)
2282       goto found;
2283
2284   gcc_unreachable ();
2285
2286 found:
2287   use->cost_map[i].cand = cand;
2288   use->cost_map[i].cost = cost;
2289   use->cost_map[i].depends_on = depends_on;
2290 }
2291
2292 /* Gets cost of (USE, CANDIDATE) pair.  */
2293
2294 static struct cost_pair *
2295 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2296                  struct iv_cand *cand)
2297 {
2298   unsigned i, s;
2299   struct cost_pair *ret;
2300
2301   if (!cand)
2302     return NULL;
2303
2304   if (data->consider_all_candidates)
2305     {
2306       ret = use->cost_map + cand->id;
2307       if (!ret->cand)
2308         return NULL;
2309
2310       return ret;
2311     }
2312
2313   /* n_map_members is a power of two, so this computes modulo.  */
2314   s = cand->id & (use->n_map_members - 1);
2315   for (i = s; i < use->n_map_members; i++)
2316     if (use->cost_map[i].cand == cand)
2317       return use->cost_map + i;
2318
2319   for (i = 0; i < s; i++)
2320     if (use->cost_map[i].cand == cand)
2321       return use->cost_map + i;
2322
2323   return NULL;
2324 }
2325
2326 /* Returns estimate on cost of computing SEQ.  */
2327
2328 static unsigned
2329 seq_cost (rtx seq)
2330 {
2331   unsigned cost = 0;
2332   rtx set;
2333
2334   for (; seq; seq = NEXT_INSN (seq))
2335     {
2336       set = single_set (seq);
2337       if (set)
2338         cost += rtx_cost (set, SET);
2339       else
2340         cost++;
2341     }
2342
2343   return cost;
2344 }
2345
2346 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2347 static rtx
2348 produce_memory_decl_rtl (tree obj, int *regno)
2349 {
2350   rtx x;
2351   if (!obj)
2352     abort ();
2353   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2354     {
2355       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2356       x = gen_rtx_SYMBOL_REF (Pmode, name);
2357     }
2358   else
2359     x = gen_raw_REG (Pmode, (*regno)++);
2360
2361   return gen_rtx_MEM (DECL_MODE (obj), x);
2362 }
2363
2364 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2365    walk_tree.  DATA contains the actual fake register number.  */
2366
2367 static tree
2368 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2369 {
2370   tree obj = NULL_TREE;
2371   rtx x = NULL_RTX;
2372   int *regno = data;
2373
2374   switch (TREE_CODE (*expr_p))
2375     {
2376     case ADDR_EXPR:
2377       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2378            handled_component_p (*expr_p);
2379            expr_p = &TREE_OPERAND (*expr_p, 0))
2380         continue;
2381       obj = *expr_p;
2382       if (DECL_P (obj))
2383         x = produce_memory_decl_rtl (obj, regno);
2384       break;
2385
2386     case SSA_NAME:
2387       *ws = 0;
2388       obj = SSA_NAME_VAR (*expr_p);
2389       if (!DECL_RTL_SET_P (obj))
2390         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2391       break;
2392
2393     case VAR_DECL:
2394     case PARM_DECL:
2395     case RESULT_DECL:
2396       *ws = 0;
2397       obj = *expr_p;
2398
2399       if (DECL_RTL_SET_P (obj))
2400         break;
2401
2402       if (DECL_MODE (obj) == BLKmode)
2403         x = produce_memory_decl_rtl (obj, regno);
2404       else
2405         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2406
2407       break;
2408
2409     default:
2410       break;
2411     }
2412
2413   if (x)
2414     {
2415       VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2416       SET_DECL_RTL (obj, x);
2417     }
2418
2419   return NULL_TREE;
2420 }
2421
2422 /* Determines cost of the computation of EXPR.  */
2423
2424 static unsigned
2425 computation_cost (tree expr)
2426 {
2427   rtx seq, rslt;
2428   tree type = TREE_TYPE (expr);
2429   unsigned cost;
2430   /* Avoid using hard regs in ways which may be unsupported.  */
2431   int regno = LAST_VIRTUAL_REGISTER + 1;
2432
2433   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2434   start_sequence ();
2435   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2436   seq = get_insns ();
2437   end_sequence ();
2438
2439   cost = seq_cost (seq);
2440   if (GET_CODE (rslt) == MEM)
2441     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2442
2443   return cost;
2444 }
2445
2446 /* Returns variable containing the value of candidate CAND at statement AT.  */
2447
2448 static tree
2449 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2450 {
2451   if (stmt_after_increment (loop, cand, stmt))
2452     return cand->var_after;
2453   else
2454     return cand->var_before;
2455 }
2456
2457 /* Determines the expression by that USE is expressed from induction variable
2458    CAND at statement AT in LOOP.  */
2459
2460 static tree
2461 get_computation_at (struct loop *loop,
2462                     struct iv_use *use, struct iv_cand *cand, tree at)
2463 {
2464   tree ubase = use->iv->base;
2465   tree ustep = use->iv->step;
2466   tree cbase = cand->iv->base;
2467   tree cstep = cand->iv->step;
2468   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2469   tree uutype;
2470   tree expr, delta;
2471   tree ratio;
2472   unsigned HOST_WIDE_INT ustepi, cstepi;
2473   HOST_WIDE_INT ratioi;
2474
2475   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2476     {
2477       /* We do not have a precision to express the values of use.  */
2478       return NULL_TREE;
2479     }
2480
2481   expr = var_at_stmt (loop, cand, at);
2482
2483   if (TREE_TYPE (expr) != ctype)
2484     {
2485       /* This may happen with the original ivs.  */
2486       expr = fold_convert (ctype, expr);
2487     }
2488
2489   if (TYPE_UNSIGNED (utype))
2490     uutype = utype;
2491   else
2492     {
2493       uutype = unsigned_type_for (utype);
2494       ubase = fold_convert (uutype, ubase);
2495       ustep = fold_convert (uutype, ustep);
2496     }
2497
2498   if (uutype != ctype)
2499     {
2500       expr = fold_convert (uutype, expr);
2501       cbase = fold_convert (uutype, cbase);
2502       cstep = fold_convert (uutype, cstep);
2503     }
2504
2505   if (!cst_and_fits_in_hwi (cstep)
2506       || !cst_and_fits_in_hwi (ustep))
2507     return NULL_TREE;
2508
2509   ustepi = int_cst_value (ustep);
2510   cstepi = int_cst_value (cstep);
2511
2512   if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2513     {
2514       /* TODO maybe consider case when ustep divides cstep and the ratio is
2515          a power of 2 (so that the division is fast to execute)?  We would
2516          need to be much more careful with overflows etc. then.  */
2517       return NULL_TREE;
2518     }
2519
2520   /* We may need to shift the value if we are after the increment.  */
2521   if (stmt_after_increment (loop, cand, at))
2522     cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2523
2524   /* use = ubase - ratio * cbase + ratio * var.
2525
2526      In general case ubase + ratio * (var - cbase) could be better (one less
2527      multiplication), but often it is possible to eliminate redundant parts
2528      of computations from (ubase - ratio * cbase) term, and if it does not
2529      happen, fold is able to apply the distributive law to obtain this form
2530      anyway.  */
2531
2532   if (ratioi == 1)
2533     {
2534       delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2535       expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2536     }
2537   else if (ratioi == -1)
2538     {
2539       delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2540       expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2541     }
2542   else
2543     {
2544       ratio = build_int_cst_type (uutype, ratioi);
2545       delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2546       delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2547       expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2548       expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2549     }
2550
2551   return fold_convert (utype, expr);
2552 }
2553
2554 /* Determines the expression by that USE is expressed from induction variable
2555    CAND in LOOP.  */
2556
2557 static tree
2558 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2559 {
2560   return get_computation_at (loop, use, cand, use->stmt);
2561 }
2562
2563 /* Returns cost of addition in MODE.  */
2564
2565 static unsigned
2566 add_cost (enum machine_mode mode)
2567 {
2568   static unsigned costs[NUM_MACHINE_MODES];
2569   rtx seq;
2570   unsigned cost;
2571
2572   if (costs[mode])
2573     return costs[mode];
2574
2575   start_sequence ();
2576   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2577                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2578                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2579                  NULL_RTX);
2580   seq = get_insns ();
2581   end_sequence ();
2582
2583   cost = seq_cost (seq);
2584   if (!cost)
2585     cost = 1;
2586
2587   costs[mode] = cost;
2588
2589   if (dump_file && (dump_flags & TDF_DETAILS))
2590     fprintf (dump_file, "Addition in %s costs %d\n",
2591              GET_MODE_NAME (mode), cost);
2592   return cost;
2593 }
2594
2595 /* Entry in a hashtable of already known costs for multiplication.  */
2596 struct mbc_entry
2597 {
2598   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2599   enum machine_mode mode;       /* In mode.  */
2600   unsigned cost;                /* The cost.  */
2601 };
2602
2603 /* Counts hash value for the ENTRY.  */
2604
2605 static hashval_t
2606 mbc_entry_hash (const void *entry)
2607 {
2608   const struct mbc_entry *e = entry;
2609
2610   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2611 }
2612
2613 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2614
2615 static int
2616 mbc_entry_eq (const void *entry1, const void *entry2)
2617 {
2618   const struct mbc_entry *e1 = entry1;
2619   const struct mbc_entry *e2 = entry2;
2620
2621   return (e1->mode == e2->mode
2622           && e1->cst == e2->cst);
2623 }
2624
2625 /* Returns cost of multiplication by constant CST in MODE.  */
2626
2627 static unsigned
2628 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2629 {
2630   static htab_t costs;
2631   struct mbc_entry **cached, act;
2632   rtx seq;
2633   unsigned cost;
2634
2635   if (!costs)
2636     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2637
2638   act.mode = mode;
2639   act.cst = cst;
2640   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2641   if (*cached)
2642     return (*cached)->cost;
2643
2644   *cached = xmalloc (sizeof (struct mbc_entry));
2645   (*cached)->mode = mode;
2646   (*cached)->cst = cst;
2647
2648   start_sequence ();
2649   expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2650                NULL_RTX, 0);
2651   seq = get_insns ();
2652   end_sequence ();
2653
2654   cost = seq_cost (seq);
2655
2656   if (dump_file && (dump_flags & TDF_DETAILS))
2657     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2658              (int) cst, GET_MODE_NAME (mode), cost);
2659
2660   (*cached)->cost = cost;
2661
2662   return cost;
2663 }
2664
2665 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2666    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2667    variable is omitted.  The created memory accesses MODE.
2668
2669    TODO -- there must be some better way.  This all is quite crude.  */
2670
2671 static unsigned
2672 get_address_cost (bool symbol_present, bool var_present,
2673                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2674 {
2675 #define MAX_RATIO 128
2676   static sbitmap valid_mult;
2677   static HOST_WIDE_INT rat, off;
2678   static HOST_WIDE_INT min_offset, max_offset;
2679   static unsigned costs[2][2][2][2];
2680   unsigned cost, acost;
2681   rtx seq, addr, base;
2682   bool offset_p, ratio_p;
2683   rtx reg1;
2684   HOST_WIDE_INT s_offset;
2685   unsigned HOST_WIDE_INT mask;
2686   unsigned bits;
2687
2688   if (!valid_mult)
2689     {
2690       HOST_WIDE_INT i;
2691
2692       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2693
2694       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2695       for (i = 1; i <= 1 << 20; i <<= 1)
2696         {
2697           XEXP (addr, 1) = GEN_INT (i);
2698           if (!memory_address_p (Pmode, addr))
2699             break;
2700         }
2701       max_offset = i >> 1;
2702       off = max_offset;
2703
2704       for (i = 1; i <= 1 << 20; i <<= 1)
2705         {
2706           XEXP (addr, 1) = GEN_INT (-i);
2707           if (!memory_address_p (Pmode, addr))
2708             break;
2709         }
2710       min_offset = -(i >> 1);
2711
2712       if (dump_file && (dump_flags & TDF_DETAILS))
2713         {
2714           fprintf (dump_file, "get_address_cost:\n");
2715           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
2716           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
2717         }
2718
2719       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2720       sbitmap_zero (valid_mult);
2721       rat = 1;
2722       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2723       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2724         {
2725           XEXP (addr, 1) = GEN_INT (i);
2726           if (memory_address_p (Pmode, addr))
2727             {
2728               SET_BIT (valid_mult, i + MAX_RATIO);
2729               rat = i;
2730             }
2731         }
2732
2733       if (dump_file && (dump_flags & TDF_DETAILS))
2734         {
2735           fprintf (dump_file, "  allowed multipliers:");
2736           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2737             if (TEST_BIT (valid_mult, i + MAX_RATIO))
2738               fprintf (dump_file, " %d", (int) i);
2739           fprintf (dump_file, "\n");
2740           fprintf (dump_file, "\n");
2741         }
2742     }
2743
2744   bits = GET_MODE_BITSIZE (Pmode);
2745   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2746   offset &= mask;
2747   if ((offset >> (bits - 1) & 1))
2748     offset |= ~mask;
2749   s_offset = offset;
2750
2751   cost = 0;
2752   offset_p = (s_offset != 0
2753               && min_offset <= s_offset && s_offset <= max_offset);
2754   ratio_p = (ratio != 1
2755              && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2756              && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2757
2758   if (ratio != 1 && !ratio_p)
2759     cost += multiply_by_cost (ratio, Pmode);
2760
2761   if (s_offset && !offset_p && !symbol_present)
2762     {
2763       cost += add_cost (Pmode);
2764       var_present = true;
2765     }
2766
2767   acost = costs[symbol_present][var_present][offset_p][ratio_p];
2768   if (!acost)
2769     {
2770       acost = 0;
2771
2772       addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2773       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2774       if (ratio_p)
2775         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2776
2777       if (var_present)
2778         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2779
2780       if (symbol_present)
2781         {
2782           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2783           if (offset_p)
2784             base = gen_rtx_fmt_e (CONST, Pmode,
2785                                   gen_rtx_fmt_ee (PLUS, Pmode,
2786                                                   base,
2787                                                   GEN_INT (off)));
2788         }
2789       else if (offset_p)
2790         base = GEN_INT (off);
2791       else
2792         base = NULL_RTX;
2793
2794       if (base)
2795         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2796
2797       start_sequence ();
2798       addr = memory_address (Pmode, addr);
2799       seq = get_insns ();
2800       end_sequence ();
2801
2802       acost = seq_cost (seq);
2803       acost += address_cost (addr, Pmode);
2804
2805       if (!acost)
2806         acost = 1;
2807       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2808     }
2809
2810   return cost + acost;
2811 }
2812
2813 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2814    the bitmap to that we should store it.  */
2815
2816 static struct ivopts_data *fd_ivopts_data;
2817 static tree
2818 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2819 {
2820   bitmap *depends_on = data;
2821   struct version_info *info;
2822
2823   if (TREE_CODE (*expr_p) != SSA_NAME)
2824     return NULL_TREE;
2825   info = name_info (fd_ivopts_data, *expr_p);
2826
2827   if (!info->inv_id || info->has_nonlin_use)
2828     return NULL_TREE;
2829
2830   if (!*depends_on)
2831     *depends_on = BITMAP_ALLOC (NULL);
2832   bitmap_set_bit (*depends_on, info->inv_id);
2833
2834   return NULL_TREE;
2835 }
2836
2837 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
2838    invariants the computation depends on.  */
2839
2840 static unsigned
2841 force_var_cost (struct ivopts_data *data,
2842                 tree expr, bitmap *depends_on)
2843 {
2844   static bool costs_initialized = false;
2845   static unsigned integer_cost;
2846   static unsigned symbol_cost;
2847   static unsigned address_cost;
2848   tree op0, op1;
2849   unsigned cost0, cost1, cost;
2850   enum machine_mode mode;
2851
2852   if (!costs_initialized)
2853     {
2854       tree var = create_tmp_var_raw (integer_type_node, "test_var");
2855       rtx x = gen_rtx_MEM (DECL_MODE (var),
2856                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2857       tree addr;
2858       tree type = build_pointer_type (integer_type_node);
2859
2860       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2861                                                            2000));
2862
2863       SET_DECL_RTL (var, x);
2864       TREE_STATIC (var) = 1;
2865       addr = build1 (ADDR_EXPR, type, var);
2866       symbol_cost = computation_cost (addr) + 1;
2867
2868       address_cost
2869         = computation_cost (build2 (PLUS_EXPR, type,
2870                                     addr,
2871                                     build_int_cst_type (type, 2000))) + 1;
2872       if (dump_file && (dump_flags & TDF_DETAILS))
2873         {
2874           fprintf (dump_file, "force_var_cost:\n");
2875           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
2876           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
2877           fprintf (dump_file, "  address %d\n", (int) address_cost);
2878           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
2879           fprintf (dump_file, "\n");
2880         }
2881
2882       costs_initialized = true;
2883     }
2884
2885   STRIP_NOPS (expr);
2886
2887   if (depends_on)
2888     {
2889       fd_ivopts_data = data;
2890       walk_tree (&expr, find_depends, depends_on, NULL);
2891     }
2892
2893   if (SSA_VAR_P (expr))
2894     return 0;
2895
2896   if (TREE_INVARIANT (expr))
2897     {
2898       if (TREE_CODE (expr) == INTEGER_CST)
2899         return integer_cost;
2900
2901       if (TREE_CODE (expr) == ADDR_EXPR)
2902         {
2903           tree obj = TREE_OPERAND (expr, 0);
2904
2905           if (TREE_CODE (obj) == VAR_DECL
2906               || TREE_CODE (obj) == PARM_DECL
2907               || TREE_CODE (obj) == RESULT_DECL)
2908             return symbol_cost;
2909         }
2910
2911       return address_cost;
2912     }
2913
2914   switch (TREE_CODE (expr))
2915     {
2916     case PLUS_EXPR:
2917     case MINUS_EXPR:
2918     case MULT_EXPR:
2919       op0 = TREE_OPERAND (expr, 0);
2920       op1 = TREE_OPERAND (expr, 1);
2921       STRIP_NOPS (op0);
2922       STRIP_NOPS (op1);
2923
2924       if (is_gimple_val (op0))
2925         cost0 = 0;
2926       else
2927         cost0 = force_var_cost (data, op0, NULL);
2928
2929       if (is_gimple_val (op1))
2930         cost1 = 0;
2931       else
2932         cost1 = force_var_cost (data, op1, NULL);
2933
2934       break;
2935
2936     default:
2937       /* Just an arbitrary value, FIXME.  */
2938       return target_spill_cost;
2939     }
2940
2941   mode = TYPE_MODE (TREE_TYPE (expr));
2942   switch (TREE_CODE (expr))
2943     {
2944     case PLUS_EXPR:
2945     case MINUS_EXPR:
2946       cost = add_cost (mode);
2947       break;
2948
2949     case MULT_EXPR:
2950       if (cst_and_fits_in_hwi (op0))
2951         cost = multiply_by_cost (int_cst_value (op0), mode);
2952       else if (cst_and_fits_in_hwi (op1))
2953         cost = multiply_by_cost (int_cst_value (op1), mode);
2954       else
2955         return target_spill_cost;
2956       break;
2957
2958     default:
2959       gcc_unreachable ();
2960     }
2961
2962   cost += cost0;
2963   cost += cost1;
2964
2965   /* Bound the cost by target_spill_cost.  The parts of complicated
2966      computations often are either loop invariant or at least can
2967      be shared between several iv uses, so letting this grow without
2968      limits would not give reasonable results.  */
2969   return cost < target_spill_cost ? cost : target_spill_cost;
2970 }
2971
2972 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
2973    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2974    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
2975    invariants the computation depends on.  */
2976
2977 static unsigned
2978 split_address_cost (struct ivopts_data *data,
2979                     tree addr, bool *symbol_present, bool *var_present,
2980                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2981 {
2982   tree core;
2983   HOST_WIDE_INT bitsize;
2984   HOST_WIDE_INT bitpos;
2985   tree toffset;
2986   enum machine_mode mode;
2987   int unsignedp, volatilep;
2988
2989   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2990                               &unsignedp, &volatilep, false);
2991
2992   if (toffset != 0
2993       || bitpos % BITS_PER_UNIT != 0
2994       || TREE_CODE (core) != VAR_DECL)
2995     {
2996       *symbol_present = false;
2997       *var_present = true;
2998       fd_ivopts_data = data;
2999       walk_tree (&addr, find_depends, depends_on, NULL);
3000       return target_spill_cost;
3001     }
3002
3003   *offset += bitpos / BITS_PER_UNIT;
3004   if (TREE_STATIC (core)
3005       || DECL_EXTERNAL (core))
3006     {
3007       *symbol_present = true;
3008       *var_present = false;
3009       return 0;
3010     }
3011
3012   *symbol_present = false;
3013   *var_present = true;
3014   return 0;
3015 }
3016
3017 /* Estimates cost of expressing difference of addresses E1 - E2 as
3018    var + symbol + offset.  The value of offset is added to OFFSET,
3019    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3020    part is missing.  DEPENDS_ON is a set of the invariants the computation
3021    depends on.  */
3022
3023 static unsigned
3024 ptr_difference_cost (struct ivopts_data *data,
3025                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3026                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3027 {
3028   HOST_WIDE_INT diff = 0;
3029   unsigned cost;
3030
3031   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3032
3033   if (ptr_difference_const (e1, e2, &diff))
3034     {
3035       *offset += diff;
3036       *symbol_present = false;
3037       *var_present = false;
3038       return 0;
3039     }
3040
3041   if (e2 == integer_zero_node)
3042     return split_address_cost (data, TREE_OPERAND (e1, 0),
3043                                symbol_present, var_present, offset, depends_on);
3044
3045   *symbol_present = false;
3046   *var_present = true;
3047
3048   cost = force_var_cost (data, e1, depends_on);
3049   cost += force_var_cost (data, e2, depends_on);
3050   cost += add_cost (Pmode);
3051
3052   return cost;
3053 }
3054
3055 /* Estimates cost of expressing difference E1 - E2 as
3056    var + symbol + offset.  The value of offset is added to OFFSET,
3057    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3058    part is missing.  DEPENDS_ON is a set of the invariants the computation
3059    depends on.  */
3060
3061 static unsigned
3062 difference_cost (struct ivopts_data *data,
3063                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3064                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3065 {
3066   unsigned cost;
3067   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3068   unsigned HOST_WIDE_INT off1, off2;
3069
3070   e1 = strip_offset (e1, false, &off1);
3071   e2 = strip_offset (e2, false, &off2);
3072   *offset += off1 - off2;
3073
3074   STRIP_NOPS (e1);
3075   STRIP_NOPS (e2);
3076
3077   if (TREE_CODE (e1) == ADDR_EXPR)
3078     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3079                                 depends_on);
3080   *symbol_present = false;
3081
3082   if (operand_equal_p (e1, e2, 0))
3083     {
3084       *var_present = false;
3085       return 0;
3086     }
3087   *var_present = true;
3088   if (zero_p (e2))
3089     return force_var_cost (data, e1, depends_on);
3090
3091   if (zero_p (e1))
3092     {
3093       cost = force_var_cost (data, e2, depends_on);
3094       cost += multiply_by_cost (-1, mode);
3095
3096       return cost;
3097     }
3098
3099   cost = force_var_cost (data, e1, depends_on);
3100   cost += force_var_cost (data, e2, depends_on);
3101   cost += add_cost (mode);
3102
3103   return cost;
3104 }
3105
3106 /* Determines the cost of the computation by that USE is expressed
3107    from induction variable CAND.  If ADDRESS_P is true, we just need
3108    to create an address from it, otherwise we want to get it into
3109    register.  A set of invariants we depend on is stored in
3110    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3111
3112 static unsigned
3113 get_computation_cost_at (struct ivopts_data *data,
3114                          struct iv_use *use, struct iv_cand *cand,
3115                          bool address_p, bitmap *depends_on, tree at)
3116 {
3117   tree ubase = use->iv->base, ustep = use->iv->step;
3118   tree cbase, cstep;
3119   tree utype = TREE_TYPE (ubase), ctype;
3120   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3121   HOST_WIDE_INT ratio, aratio;
3122   bool var_present, symbol_present;
3123   unsigned cost = 0, n_sums;
3124
3125   *depends_on = NULL;
3126
3127   /* Only consider real candidates.  */
3128   if (!cand->iv)
3129     return INFTY;
3130
3131   cbase = cand->iv->base;
3132   cstep = cand->iv->step;
3133   ctype = TREE_TYPE (cbase);
3134
3135   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3136     {
3137       /* We do not have a precision to express the values of use.  */
3138       return INFTY;
3139     }
3140
3141   if (address_p)
3142     {
3143       /* Do not try to express address of an object with computation based
3144          on address of a different object.  This may cause problems in rtl
3145          level alias analysis (that does not expect this to be happening,
3146          as this is illegal in C), and would be unlikely to be useful
3147          anyway.  */
3148       if (use->iv->base_object
3149           && cand->iv->base_object
3150           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3151         return INFTY;
3152     }
3153
3154   if (!cst_and_fits_in_hwi (ustep)
3155       || !cst_and_fits_in_hwi (cstep))
3156     return INFTY;
3157
3158   if (TREE_CODE (ubase) == INTEGER_CST
3159       && !cst_and_fits_in_hwi (ubase))
3160     goto fallback;
3161
3162   if (TREE_CODE (cbase) == INTEGER_CST
3163       && !cst_and_fits_in_hwi (cbase))
3164     goto fallback;
3165
3166   ustepi = int_cst_value (ustep);
3167   cstepi = int_cst_value (cstep);
3168
3169   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3170     {
3171       /* TODO -- add direct handling of this case.  */
3172       goto fallback;
3173     }
3174
3175   if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3176     return INFTY;
3177
3178   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3179      or ratio == 1, it is better to handle this like
3180
3181      ubase - ratio * cbase + ratio * var
3182
3183      (also holds in the case ratio == -1, TODO.  */
3184
3185   if (TREE_CODE (cbase) == INTEGER_CST)
3186     {
3187       offset = - ratio * int_cst_value (cbase);
3188       cost += difference_cost (data,
3189                                ubase, integer_zero_node,
3190                                &symbol_present, &var_present, &offset,
3191                                depends_on);
3192     }
3193   else if (ratio == 1)
3194     {
3195       cost += difference_cost (data,
3196                                ubase, cbase,
3197                                &symbol_present, &var_present, &offset,
3198                                depends_on);
3199     }
3200   else
3201     {
3202       cost += force_var_cost (data, cbase, depends_on);
3203       cost += add_cost (TYPE_MODE (ctype));
3204       cost += difference_cost (data,
3205                                ubase, integer_zero_node,
3206                                &symbol_present, &var_present, &offset,
3207                                depends_on);
3208     }
3209
3210   /* If we are after the increment, the value of the candidate is higher by
3211      one iteration.  */
3212   if (stmt_after_increment (data->current_loop, cand, at))
3213     offset -= ratio * cstepi;
3214
3215   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3216      (symbol/var/const parts may be omitted).  If we are looking for an address,
3217      find the cost of addressing this.  */
3218   if (address_p)
3219     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3220
3221   /* Otherwise estimate the costs for computing the expression.  */
3222   aratio = ratio > 0 ? ratio : -ratio;
3223   if (!symbol_present && !var_present && !offset)
3224     {
3225       if (ratio != 1)
3226         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3227
3228       return cost;
3229     }
3230
3231   if (aratio != 1)
3232     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3233
3234   n_sums = 1;
3235   if (var_present
3236       /* Symbol + offset should be compile-time computable.  */
3237       && (symbol_present || offset))
3238     n_sums++;
3239
3240   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3241
3242 fallback:
3243   {
3244     /* Just get the expression, expand it and measure the cost.  */
3245     tree comp = get_computation_at (data->current_loop, use, cand, at);
3246
3247     if (!comp)
3248       return INFTY;
3249
3250     if (address_p)
3251       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3252
3253     return computation_cost (comp);
3254   }
3255 }
3256
3257 /* Determines the cost of the computation by that USE is expressed
3258    from induction variable CAND.  If ADDRESS_P is true, we just need
3259    to create an address from it, otherwise we want to get it into
3260    register.  A set of invariants we depend on is stored in
3261    DEPENDS_ON.  */
3262
3263 static unsigned
3264 get_computation_cost (struct ivopts_data *data,
3265                       struct iv_use *use, struct iv_cand *cand,
3266                       bool address_p, bitmap *depends_on)
3267 {
3268   return get_computation_cost_at (data,
3269                                   use, cand, address_p, depends_on, use->stmt);
3270 }
3271
3272 /* Determines cost of basing replacement of USE on CAND in a generic
3273    expression.  */
3274
3275 static bool
3276 determine_use_iv_cost_generic (struct ivopts_data *data,
3277                                struct iv_use *use, struct iv_cand *cand)
3278 {
3279   bitmap depends_on;
3280   unsigned cost;
3281
3282   /* The simple case first -- if we need to express value of the preserved
3283      original biv, the cost is 0.  This also prevents us from counting the
3284      cost of increment twice -- once at this use and once in the cost of
3285      the candidate.  */
3286   if (cand->pos == IP_ORIGINAL
3287       && cand->incremented_at == use->stmt)
3288     {
3289       set_use_iv_cost (data, use, cand, 0, NULL);
3290       return true;
3291     }
3292
3293   cost = get_computation_cost (data, use, cand, false, &depends_on);
3294   set_use_iv_cost (data, use, cand, cost, depends_on);
3295
3296   return cost != INFTY;
3297 }
3298
3299 /* Determines cost of basing replacement of USE on CAND in an address.  */
3300
3301 static bool
3302 determine_use_iv_cost_address (struct ivopts_data *data,
3303                                struct iv_use *use, struct iv_cand *cand)
3304 {
3305   bitmap depends_on;
3306   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3307
3308   set_use_iv_cost (data, use, cand, cost, depends_on);
3309
3310   return cost != INFTY;
3311 }
3312
3313 /* Computes value of induction variable IV in iteration NITER.  */
3314
3315 static tree
3316 iv_value (struct iv *iv, tree niter)
3317 {
3318   tree val;
3319   tree type = TREE_TYPE (iv->base);
3320
3321   niter = fold_convert (type, niter);
3322   val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3323
3324   return fold (build2 (PLUS_EXPR, type, iv->base, val));
3325 }
3326
3327 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3328
3329 static tree
3330 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3331 {
3332   tree val = iv_value (cand->iv, niter);
3333   tree type = TREE_TYPE (cand->iv->base);
3334
3335   if (stmt_after_increment (loop, cand, at))
3336     val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3337
3338   return val;
3339 }
3340
3341 /* Returns period of induction variable iv.  */
3342
3343 static tree
3344 iv_period (struct iv *iv)
3345 {
3346   tree step = iv->step, period, type;
3347   tree pow2div;
3348
3349   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3350
3351   /* Period of the iv is gcd (step, type range).  Since type range is power
3352      of two, it suffices to determine the maximum power of two that divides
3353      step.  */
3354   pow2div = num_ending_zeros (step);
3355   type = unsigned_type_for (TREE_TYPE (step));
3356
3357   period = build_low_bits_mask (type,
3358                                 (TYPE_PRECISION (type)
3359                                  - tree_low_cst (pow2div, 1)));
3360
3361   return period;
3362 }
3363
3364 /* Check whether it is possible to express the condition in USE by comparison
3365    of candidate CAND.  If so, store the comparison code to COMPARE and the
3366    value compared with to BOUND.  */
3367
3368 static bool
3369 may_eliminate_iv (struct ivopts_data *data,
3370                   struct iv_use *use, struct iv_cand *cand,
3371                   enum tree_code *compare, tree *bound)
3372 {
3373   basic_block ex_bb;
3374   edge exit;
3375   struct tree_niter_desc *niter;
3376   tree nit, nit_type;
3377   tree wider_type, period, per_type;
3378   struct loop *loop = data->current_loop;
3379
3380   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3381      for other conditions inside loop body.  */
3382   ex_bb = bb_for_stmt (use->stmt);
3383   if (use->stmt != last_stmt (ex_bb)
3384       || TREE_CODE (use->stmt) != COND_EXPR)
3385     return false;
3386   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3387     return false;
3388
3389   exit = EDGE_SUCC (ex_bb, 0);
3390   if (flow_bb_inside_loop_p (loop, exit->dest))
3391     exit = EDGE_SUCC (ex_bb, 1);
3392   if (flow_bb_inside_loop_p (loop, exit->dest))
3393     return false;
3394
3395   niter = niter_for_exit (data, exit);
3396   if (!niter
3397       || !zero_p (niter->may_be_zero))
3398     return false;
3399
3400   nit = niter->niter;
3401   nit_type = TREE_TYPE (nit);
3402
3403   /* Determine whether we may use the variable to test whether niter iterations
3404      elapsed.  This is the case iff the period of the induction variable is
3405      greater than the number of iterations.  */
3406   period = iv_period (cand->iv);
3407   if (!period)
3408     return false;
3409   per_type = TREE_TYPE (period);
3410
3411   wider_type = TREE_TYPE (period);
3412   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3413     wider_type = per_type;
3414   else
3415     wider_type = nit_type;
3416
3417   if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3418                                        fold_convert (wider_type, period),
3419                                        fold_convert (wider_type, nit)))))
3420     return false;
3421
3422   if (exit->flags & EDGE_TRUE_VALUE)
3423     *compare = EQ_EXPR;
3424   else
3425     *compare = NE_EXPR;
3426
3427   *bound = cand_value_at (loop, cand, use->stmt, nit);
3428   return true;
3429 }
3430
3431 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3432
3433 static bool
3434 determine_use_iv_cost_condition (struct ivopts_data *data,
3435                                  struct iv_use *use, struct iv_cand *cand)
3436 {
3437   tree bound;
3438   enum tree_code compare;
3439
3440   /* Only consider real candidates.  */
3441   if (!cand->iv)
3442     {
3443       set_use_iv_cost (data, use, cand, INFTY, NULL);
3444       return false;
3445     }
3446
3447   if (may_eliminate_iv (data, use, cand, &compare, &bound))
3448     {
3449       bitmap depends_on = NULL;
3450       unsigned cost = force_var_cost (data, bound, &depends_on);
3451
3452       set_use_iv_cost (data, use, cand, cost, depends_on);
3453       return cost != INFTY;
3454     }
3455
3456   /* The induction variable elimination failed; just express the original
3457      giv.  If it is compared with an invariant, note that we cannot get
3458      rid of it.  */
3459   if (TREE_CODE (*use->op_p) == SSA_NAME)
3460     record_invariant (data, *use->op_p, true);
3461   else
3462     {
3463       record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3464       record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3465     }
3466
3467   return determine_use_iv_cost_generic (data, use, cand);
3468 }
3469
3470 /* Checks whether it is possible to replace the final value of USE by
3471    a direct computation.  If so, the formula is stored to *VALUE.  */
3472
3473 static bool
3474 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3475                          tree *value)
3476 {
3477   struct loop *loop = data->current_loop;
3478   edge exit;
3479   struct tree_niter_desc *niter;
3480
3481   exit = single_dom_exit (loop);
3482   if (!exit)
3483     return false;
3484
3485   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3486                               bb_for_stmt (use->stmt)));
3487
3488   niter = niter_for_single_dom_exit (data);
3489   if (!niter
3490       || !zero_p (niter->may_be_zero))
3491     return false;
3492
3493   *value = iv_value (use->iv, niter->niter);
3494
3495   return true;
3496 }
3497
3498 /* Determines cost of replacing final value of USE using CAND.  */
3499
3500 static bool
3501 determine_use_iv_cost_outer (struct ivopts_data *data,
3502                              struct iv_use *use, struct iv_cand *cand)
3503 {
3504   bitmap depends_on;
3505   unsigned cost;
3506   edge exit;
3507   tree value;
3508   struct loop *loop = data->current_loop;
3509
3510   /* The simple case first -- if we need to express value of the preserved
3511      original biv, the cost is 0.  This also prevents us from counting the
3512      cost of increment twice -- once at this use and once in the cost of
3513      the candidate.  */
3514   if (cand->pos == IP_ORIGINAL
3515       && cand->incremented_at == use->stmt)
3516     {
3517       set_use_iv_cost (data, use, cand, 0, NULL);
3518       return true;
3519     }
3520
3521   if (!cand->iv)
3522     {
3523       if (!may_replace_final_value (data, use, &value))
3524         {
3525           set_use_iv_cost (data, use, cand, INFTY, NULL);
3526           return false;
3527         }
3528
3529       depends_on = NULL;
3530       cost = force_var_cost (data, value, &depends_on);
3531
3532       cost /= AVG_LOOP_NITER (loop);
3533
3534       set_use_iv_cost (data, use, cand, cost, depends_on);
3535       return cost != INFTY;
3536     }
3537
3538   exit = single_dom_exit (loop);
3539   if (exit)
3540     {
3541       /* If there is just a single exit, we may use value of the candidate
3542          after we take it to determine the value of use.  */
3543       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3544                                       last_stmt (exit->src));
3545       if (cost != INFTY)
3546         cost /= AVG_LOOP_NITER (loop);
3547     }
3548   else
3549     {
3550       /* Otherwise we just need to compute the iv.  */
3551       cost = get_computation_cost (data, use, cand, false, &depends_on);
3552     }
3553
3554   set_use_iv_cost (data, use, cand, cost, depends_on);
3555
3556   return cost != INFTY;
3557 }
3558
3559 /* Determines cost of basing replacement of USE on CAND.  Returns false
3560    if USE cannot be based on CAND.  */
3561
3562 static bool
3563 determine_use_iv_cost (struct ivopts_data *data,
3564                        struct iv_use *use, struct iv_cand *cand)
3565 {
3566   switch (use->type)
3567     {
3568     case USE_NONLINEAR_EXPR:
3569       return determine_use_iv_cost_generic (data, use, cand);
3570
3571     case USE_OUTER:
3572       return determine_use_iv_cost_outer (data, use, cand);
3573
3574     case USE_ADDRESS:
3575       return determine_use_iv_cost_address (data, use, cand);
3576
3577     case USE_COMPARE:
3578       return determine_use_iv_cost_condition (data, use, cand);
3579
3580     default:
3581       gcc_unreachable ();
3582     }
3583 }
3584
3585 /* Determines costs of basing the use of the iv on an iv candidate.  */
3586
3587 static void
3588 determine_use_iv_costs (struct ivopts_data *data)
3589 {
3590   unsigned i, j;
3591   struct iv_use *use;
3592   struct iv_cand *cand;
3593   bitmap to_clear = BITMAP_ALLOC (NULL);
3594
3595   alloc_use_cost_map (data);
3596
3597   for (i = 0; i < n_iv_uses (data); i++)
3598     {
3599       use = iv_use (data, i);
3600
3601       if (data->consider_all_candidates)
3602         {
3603           for (j = 0; j < n_iv_cands (data); j++)
3604             {
3605               cand = iv_cand (data, j);
3606               determine_use_iv_cost (data, use, cand);
3607             }
3608         }
3609       else
3610         {
3611           bitmap_iterator bi;
3612
3613           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3614             {
3615               cand = iv_cand (data, j);
3616               if (!determine_use_iv_cost (data, use, cand))
3617                 bitmap_set_bit (to_clear, j);
3618             }
3619
3620           /* Remove the candidates for that the cost is infinite from
3621              the list of related candidates.  */
3622           bitmap_and_compl_into (use->related_cands, to_clear);
3623           bitmap_clear (to_clear);
3624         }
3625     }
3626
3627   BITMAP_FREE (to_clear);
3628
3629   if (dump_file && (dump_flags & TDF_DETAILS))
3630     {
3631       fprintf (dump_file, "Use-candidate costs:\n");
3632
3633       for (i = 0; i < n_iv_uses (data); i++)
3634         {
3635           use = iv_use (data, i);
3636
3637           fprintf (dump_file, "Use %d:\n", i);
3638           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3639           for (j = 0; j < use->n_map_members; j++)
3640             {
3641               if (!use->cost_map[j].cand
3642                   || use->cost_map[j].cost == INFTY)
3643                 continue;
3644
3645               fprintf (dump_file, "  %d\t%d\t",
3646                        use->cost_map[j].cand->id,
3647                        use->cost_map[j].cost);
3648               if (use->cost_map[j].depends_on)
3649                 bitmap_print (dump_file,
3650                               use->cost_map[j].depends_on, "","");
3651               fprintf (dump_file, "\n");
3652             }
3653
3654           fprintf (dump_file, "\n");
3655         }
3656       fprintf (dump_file, "\n");
3657     }
3658 }
3659
3660 /* Determines cost of the candidate CAND.  */
3661
3662 static void
3663 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3664 {
3665   unsigned cost_base, cost_step;
3666   tree base;
3667
3668   if (!cand->iv)
3669     {
3670       cand->cost = 0;
3671       return;
3672     }
3673
3674   /* There are two costs associated with the candidate -- its increment
3675      and its initialization.  The second is almost negligible for any loop
3676      that rolls enough, so we take it just very little into account.  */
3677
3678   base = cand->iv->base;
3679   cost_base = force_var_cost (data, base, NULL);
3680   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3681
3682   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3683
3684   /* Prefer the original iv unless we may gain something by replacing it.  */
3685   if (cand->pos == IP_ORIGINAL)
3686     cand->cost--;
3687
3688   /* Prefer not to insert statements into latch unless there are some
3689      already (so that we do not create unnecessary jumps).  */
3690   if (cand->pos == IP_END
3691       && empty_block_p (ip_end_pos (data->current_loop)))
3692     cand->cost++;
3693 }
3694
3695 /* Determines costs of computation of the candidates.  */
3696
3697 static void
3698 determine_iv_costs (struct ivopts_data *data)
3699 {
3700   unsigned i;
3701
3702   if (dump_file && (dump_flags & TDF_DETAILS))
3703     {
3704       fprintf (dump_file, "Candidate costs:\n");
3705       fprintf (dump_file, "  cand\tcost\n");
3706     }
3707
3708   for (i = 0; i < n_iv_cands (data); i++)
3709     {
3710       struct iv_cand *cand = iv_cand (data, i);
3711
3712       determine_iv_cost (data, cand);
3713
3714       if (dump_file && (dump_flags & TDF_DETAILS))
3715         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3716     }
3717
3718 if (dump_file && (dump_flags & TDF_DETAILS))
3719       fprintf (dump_file, "\n");
3720 }
3721
3722 /* Calculates cost for having SIZE induction variables.  */
3723
3724 static unsigned
3725 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3726 {
3727   return global_cost_for_size (size,
3728                                loop_data (data->current_loop)->regs_used,
3729                                n_iv_uses (data));
3730 }
3731
3732 /* For each size of the induction variable set determine the penalty.  */
3733
3734 static void
3735 determine_set_costs (struct ivopts_data *data)
3736 {
3737   unsigned j, n;
3738   tree phi, op;
3739   struct loop *loop = data->current_loop;
3740   bitmap_iterator bi;
3741
3742   /* We use the following model (definitely improvable, especially the
3743      cost function -- TODO):
3744
3745      We estimate the number of registers available (using MD data), name it A.
3746
3747      We estimate the number of registers used by the loop, name it U.  This
3748      number is obtained as the number of loop phi nodes (not counting virtual
3749      registers and bivs) + the number of variables from outside of the loop.
3750
3751      We set a reserve R (free regs that are used for temporary computations,
3752      etc.).  For now the reserve is a constant 3.
3753
3754      Let I be the number of induction variables.
3755
3756      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3757         make a lot of ivs without a reason).
3758      -- if A - R < U + I <= A, the cost is I * PRES_COST
3759      -- if U + I > A, the cost is I * PRES_COST and
3760         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3761
3762   if (dump_file && (dump_flags & TDF_DETAILS))
3763     {
3764       fprintf (dump_file, "Global costs:\n");
3765       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3766       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3767       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3768       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3769     }
3770
3771   n = 0;
3772   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3773     {
3774       op = PHI_RESULT (phi);
3775
3776       if (!is_gimple_reg (op))
3777         continue;
3778
3779       if (get_iv (data, op))
3780         continue;
3781
3782       n++;
3783     }
3784
3785   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3786     {
3787       struct version_info *info = ver_info (data, j);
3788
3789       if (info->inv_id && info->has_nonlin_use)
3790         n++;
3791     }
3792
3793   loop_data (loop)->regs_used = n;
3794   if (dump_file && (dump_flags & TDF_DETAILS))
3795     fprintf (dump_file, "  regs_used %d\n", n);
3796
3797   if (dump_file && (dump_flags & TDF_DETAILS))
3798     {
3799       fprintf (dump_file, "  cost for size:\n");
3800       fprintf (dump_file, "  ivs\tcost\n");
3801       for (j = 0; j <= 2 * target_avail_regs; j++)
3802         fprintf (dump_file, "  %d\t%d\n", j,
3803                  ivopts_global_cost_for_size (data, j));
3804       fprintf (dump_file, "\n");
3805     }
3806 }
3807
3808 /* Returns true if A is a cheaper cost pair than B.  */
3809
3810 static bool
3811 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3812 {
3813   if (!a)
3814     return false;
3815
3816   if (!b)
3817     return true;
3818
3819   if (a->cost < b->cost)
3820     return true;
3821
3822   if (a->cost > b->cost)
3823     return false;
3824
3825   /* In case the costs are the same, prefer the cheaper candidate.  */
3826   if (a->cand->cost < b->cand->cost)
3827     return true;
3828
3829   return false;
3830 }
3831
3832 /* Computes the cost field of IVS structure.  */
3833
3834 static void
3835 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3836 {
3837   unsigned cost = 0;
3838
3839   cost += ivs->cand_use_cost;
3840   cost += ivs->cand_cost;
3841   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3842
3843   ivs->cost = cost;
3844 }
3845
3846 /* Set USE not to be expressed by any candidate in IVS.  */
3847
3848 static void
3849 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3850                  struct iv_use *use)
3851 {
3852   unsigned uid = use->id, cid, iid;
3853   bitmap deps;
3854   struct cost_pair *cp;
3855   bitmap_iterator bi;
3856
3857   cp = ivs->cand_for_use[uid];
3858   if (!cp)
3859     return;
3860   cid = cp->cand->id;
3861
3862   ivs->bad_uses++;
3863   ivs->cand_for_use[uid] = NULL;
3864   ivs->n_cand_uses[cid]--;
3865
3866   if (ivs->n_cand_uses[cid] == 0)
3867     {
3868       bitmap_clear_bit (ivs->cands, cid);
3869       /* Do not count the pseudocandidates.  */
3870       if (cp->cand->iv)
3871         ivs->n_regs--;
3872       ivs->n_cands--;
3873       ivs->cand_cost -= cp->cand->cost;
3874     }
3875
3876   ivs->cand_use_cost -= cp->cost;
3877
3878   deps = cp->depends_on;
3879
3880   if (deps)
3881     {
3882       EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3883         {
3884           ivs->n_invariant_uses[iid]--;
3885           if (ivs->n_invariant_uses[iid] == 0)
3886             ivs->n_regs--;
3887         }
3888     }
3889
3890   iv_ca_recount_cost (data, ivs);
3891 }
3892
3893 /* Set cost pair for USE in set IVS to CP.  */
3894
3895 static void
3896 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3897               struct iv_use *use, struct cost_pair *cp)
3898 {
3899   unsigned uid = use->id, cid, iid;
3900   bitmap deps;
3901   bitmap_iterator bi;
3902
3903   if (ivs->cand_for_use[uid] == cp)
3904     return;
3905
3906   if (ivs->cand_for_use[uid])
3907     iv_ca_set_no_cp (data, ivs, use);
3908
3909   if (cp)
3910     {
3911       cid = cp->cand->id;
3912
3913       ivs->bad_uses--;
3914       ivs->cand_for_use[uid] = cp;
3915       ivs->n_cand_uses[cid]++;
3916       if (ivs->n_cand_uses[cid] == 1)
3917         {
3918           bitmap_set_bit (ivs->cands, cid);
3919           /* Do not count the pseudocandidates.  */
3920           if (cp->cand->iv)
3921             ivs->n_regs++;
3922           ivs->n_cands++;
3923           ivs->cand_cost += cp->cand->cost;
3924         }
3925
3926       ivs->cand_use_cost += cp->cost;
3927
3928       deps = cp->depends_on;
3929
3930       if (deps)
3931         {
3932           EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3933             {
3934               ivs->n_invariant_uses[iid]++;
3935               if (ivs->n_invariant_uses[iid] == 1)
3936                 ivs->n_regs++;
3937             }
3938         }
3939
3940       iv_ca_recount_cost (data, ivs);
3941     }
3942 }
3943
3944 /* Extend set IVS by expressing USE by some of the candidates in it
3945    if possible.  */
3946
3947 static void
3948 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3949                struct iv_use *use)
3950 {
3951   struct cost_pair *best_cp = NULL, *cp;
3952   bitmap_iterator bi;
3953   unsigned i;
3954
3955   gcc_assert (ivs->upto >= use->id);
3956
3957   if (ivs->upto == use->id)
3958     {
3959       ivs->upto++;
3960       ivs->bad_uses++;
3961     }
3962
3963   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3964     {
3965       cp = get_use_iv_cost (data, use, iv_cand (data, i));
3966
3967       if (cheaper_cost_pair (cp, best_cp))
3968         best_cp = cp;
3969     }
3970
3971   iv_ca_set_cp (data, ivs, use, best_cp);
3972 }
3973
3974 /* Get cost for assignment IVS.  */
3975
3976 static unsigned
3977 iv_ca_cost (struct iv_ca *ivs)
3978 {
3979   return (ivs->bad_uses ? INFTY : ivs->cost);
3980 }
3981
3982 /* Returns true if all dependences of CP are among invariants in IVS.  */
3983
3984 static bool
3985 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3986 {
3987   unsigned i;
3988   bitmap_iterator bi;
3989
3990   if (!cp->depends_on)
3991     return true;
3992
3993   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3994     {
3995       if (ivs->n_invariant_uses[i] == 0)
3996         return false;
3997     }
3998
3999   return true;
4000 }
4001
4002 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4003    it before NEXT_CHANGE.  */
4004
4005 static struct iv_ca_delta *
4006 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4007                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4008 {
4009   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4010
4011   change->use = use;
4012   change->old_cp = old_cp;
4013   change->new_cp = new_cp;
4014   change->next_change = next_change;
4015
4016   return change;
4017 }
4018
4019 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4020    are rewritten.   */
4021
4022 static struct iv_ca_delta *
4023 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4024 {
4025   struct iv_ca_delta *last;
4026
4027   if (!l2)
4028     return l1;
4029
4030   if (!l1)
4031     return l2;
4032
4033   for (last = l1; last->next_change; last = last->next_change)
4034     continue;
4035   last->next_change = l2;
4036
4037   return l1;
4038 }
4039
4040 /* Returns candidate by that USE is expressed in IVS.  */
4041
4042 static struct cost_pair *
4043 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4044 {
4045   return ivs->cand_for_use[use->id];
4046 }
4047
4048 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4049
4050 static struct iv_ca_delta *
4051 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4052 {
4053   struct iv_ca_delta *act, *next, *prev = NULL;
4054   struct cost_pair *tmp;
4055
4056   for (act = delta; act; act = next)
4057     {
4058       next = act->next_change;
4059       act->next_change = prev;
4060       prev = act;
4061
4062       tmp = act->old_cp;
4063       act->old_cp = act->new_cp;
4064       act->new_cp = tmp;
4065     }
4066
4067   return prev;
4068 }
4069
4070 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4071    reverted instead.  */
4072
4073 static void
4074 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4075                     struct iv_ca_delta *delta, bool forward)
4076 {
4077   struct cost_pair *from, *to;
4078   struct iv_ca_delta *act;
4079
4080   if (!forward)
4081     delta = iv_ca_delta_reverse (delta);
4082
4083   for (act = delta; act; act = act->next_change)
4084     {
4085       from = act->old_cp;
4086       to = act->new_cp;
4087       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4088       iv_ca_set_cp (data, ivs, act->use, to);
4089     }
4090
4091   if (!forward)
4092     iv_ca_delta_reverse (delta);
4093 }
4094
4095 /* Returns true if CAND is used in IVS.  */
4096
4097 static bool
4098 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4099 {
4100   return ivs->n_cand_uses[cand->id] > 0;
4101 }
4102
4103 /* Returns number of induction variable candidates in the set IVS.  */
4104
4105 static unsigned
4106 iv_ca_n_cands (struct iv_ca *ivs)
4107 {
4108   return ivs->n_cands;
4109 }
4110
4111 /* Free the list of changes DELTA.  */
4112
4113 static void
4114 iv_ca_delta_free (struct iv_ca_delta **delta)
4115 {
4116   struct iv_ca_delta *act, *next;
4117
4118   for (act = *delta; act; act = next)
4119     {
4120       next = act->next_change;
4121       free (act);
4122     }
4123
4124   *delta = NULL;
4125 }
4126
4127 /* Allocates new iv candidates assignment.  */
4128
4129 static struct iv_ca *
4130 iv_ca_new (struct ivopts_data *data)
4131 {
4132   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4133
4134   nw->upto = 0;
4135   nw->bad_uses = 0;
4136   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4137   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4138   nw->cands = BITMAP_ALLOC (NULL);
4139   nw->n_cands = 0;
4140   nw->n_regs = 0;
4141   nw->cand_use_cost = 0;
4142   nw->cand_cost = 0;
4143   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4144   nw->cost = 0;
4145
4146   return nw;
4147 }
4148
4149 /* Free memory occupied by the set IVS.  */
4150
4151 static void
4152 iv_ca_free (struct iv_ca **ivs)
4153 {
4154   free ((*ivs)->cand_for_use);
4155   free ((*ivs)->n_cand_uses);
4156   BITMAP_FREE ((*ivs)->cands);
4157   free ((*ivs)->n_invariant_uses);
4158   free (*ivs);
4159   *ivs = NULL;
4160 }
4161
4162 /* Dumps IVS to FILE.  */
4163
4164 static void
4165 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4166 {
4167   const char *pref = "  invariants ";
4168   unsigned i;
4169
4170   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4171   bitmap_print (file, ivs->cands, "  candidates ","\n");
4172
4173   for (i = 1; i <= data->max_inv_id; i++)
4174     if (ivs->n_invariant_uses[i])
4175       {
4176         fprintf (file, "%s%d", pref, i);
4177         pref = ", ";
4178       }
4179   fprintf (file, "\n");
4180 }
4181
4182 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4183    new set, and store differences in DELTA.  Number of induction variables
4184    in the new set is stored to N_IVS.  */
4185
4186 static unsigned
4187 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4188               struct iv_cand *cand, struct iv_ca_delta **delta,
4189               unsigned *n_ivs)
4190 {
4191   unsigned i, cost;
4192   struct iv_use *use;
4193   struct cost_pair *old_cp, *new_cp;
4194
4195   *delta = NULL;
4196   for (i = 0; i < ivs->upto; i++)
4197     {
4198       use = iv_use (data, i);
4199       old_cp = iv_ca_cand_for_use (ivs, use);
4200
4201       if (old_cp
4202           && old_cp->cand == cand)
4203         continue;
4204
4205       new_cp = get_use_iv_cost (data, use, cand);
4206       if (!new_cp)
4207         continue;
4208
4209       if (!iv_ca_has_deps (ivs, new_cp))
4210         continue;
4211
4212       if (!cheaper_cost_pair (new_cp, old_cp))
4213         continue;
4214
4215       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4216     }
4217
4218   iv_ca_delta_commit (data, ivs, *delta, true);
4219   cost = iv_ca_cost (ivs);
4220   if (n_ivs)
4221     *n_ivs = iv_ca_n_cands (ivs);
4222   iv_ca_delta_commit (data, ivs, *delta, false);
4223
4224   return cost;
4225 }
4226
4227 /* Try narrowing set IVS by removing CAND.  Return the cost of
4228    the new set and store the differences in DELTA.  */
4229
4230 static unsigned
4231 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4232               struct iv_cand *cand, struct iv_ca_delta **delta)
4233 {
4234   unsigned i, ci;
4235   struct iv_use *use;
4236   struct cost_pair *old_cp, *new_cp, *cp;
4237   bitmap_iterator bi;
4238   struct iv_cand *cnd;
4239   unsigned cost;
4240
4241   *delta = NULL;
4242   for (i = 0; i < n_iv_uses (data); i++)
4243     {
4244       use = iv_use (data, i);
4245
4246       old_cp = iv_ca_cand_for_use (ivs, use);
4247       if (old_cp->cand != cand)
4248         continue;
4249
4250       new_cp = NULL;
4251
4252       if (data->consider_all_candidates)
4253         {
4254           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4255             {
4256               if (ci == cand->id)
4257                 continue;
4258
4259               cnd = iv_cand (data, ci);
4260
4261               cp = get_use_iv_cost (data, use, cnd);
4262               if (!cp)
4263                 continue;
4264               if (!iv_ca_has_deps (ivs, cp))
4265                 continue;
4266
4267               if (!cheaper_cost_pair (cp, new_cp))
4268                 continue;
4269
4270               new_cp = cp;
4271             }
4272         }
4273       else
4274         {
4275           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4276             {
4277               if (ci == cand->id)
4278                 continue;
4279
4280               cnd = iv_cand (data, ci);
4281
4282               cp = get_use_iv_cost (data, use, cnd);
4283               if (!cp)
4284                 continue;
4285               if (!iv_ca_has_deps (ivs, cp))
4286                 continue;
4287
4288               if (!cheaper_cost_pair (cp, new_cp))
4289                 continue;
4290
4291               new_cp = cp;
4292             }
4293         }
4294
4295       if (!new_cp)
4296         {
4297           iv_ca_delta_free (delta);
4298           return INFTY;
4299         }
4300
4301       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4302     }
4303
4304   iv_ca_delta_commit (data, ivs, *delta, true);
4305   cost = iv_ca_cost (ivs);
4306   iv_ca_delta_commit (data, ivs, *delta, false);
4307
4308   return cost;
4309 }
4310
4311 /* Try optimizing the set of candidates IVS by removing candidates different
4312    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4313    differences in DELTA.  */
4314
4315 static unsigned
4316 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4317              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4318 {
4319   bitmap_iterator bi;
4320   struct iv_ca_delta *act_delta, *best_delta;
4321   unsigned i, best_cost, acost;
4322   struct iv_cand *cand;
4323
4324   best_delta = NULL;
4325   best_cost = iv_ca_cost (ivs);
4326
4327   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4328     {
4329       cand = iv_cand (data, i);
4330
4331       if (cand == except_cand)
4332         continue;
4333
4334       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4335
4336       if (acost < best_cost)
4337         {
4338           best_cost = acost;
4339           iv_ca_delta_free (&best_delta);
4340           best_delta = act_delta;
4341         }
4342       else
4343         iv_ca_delta_free (&act_delta);
4344     }
4345
4346   if (!best_delta)
4347     {
4348       *delta = NULL;
4349       return best_cost;
4350     }
4351
4352   /* Recurse to possibly remove other unnecessary ivs.  */
4353   iv_ca_delta_commit (data, ivs, best_delta, true);
4354   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4355   iv_ca_delta_commit (data, ivs, best_delta, false);
4356   *delta = iv_ca_delta_join (best_delta, *delta);
4357   return best_cost;
4358 }
4359
4360 /* Tries to extend the sets IVS in the best possible way in order
4361    to express the USE.  */
4362
4363 static bool
4364 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4365                   struct iv_use *use)
4366 {
4367   unsigned best_cost, act_cost;
4368   unsigned i;
4369   bitmap_iterator bi;
4370   struct iv_cand *cand;
4371   struct iv_ca_delta *best_delta = NULL, *act_delta;
4372   struct cost_pair *cp;
4373
4374   iv_ca_add_use (data, ivs, use);
4375   best_cost = iv_ca_cost (ivs);
4376
4377   cp = iv_ca_cand_for_use (ivs, use);
4378   if (cp)
4379     {
4380       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4381       iv_ca_set_no_cp (data, ivs, use);
4382     }
4383
4384   /* First try important candidates.  Only if it fails, try the specific ones.
4385      Rationale -- in loops with many variables the best choice often is to use
4386      just one generic biv.  If we added here many ivs specific to the uses,
4387      the optimization algorithm later would be likely to get stuck in a local
4388      minimum, thus causing us to create too many ivs.  The approach from
4389      few ivs to more seems more likely to be successful -- starting from few
4390      ivs, replacing an expensive use by a specific iv should always be a
4391      win.  */
4392   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4393     {
4394       cand = iv_cand (data, i);
4395
4396       if (iv_ca_cand_used_p (ivs, cand))
4397         continue;
4398
4399       cp = get_use_iv_cost (data, use, cand);
4400       if (!cp)
4401         continue;
4402
4403       iv_ca_set_cp (data, ivs, use, cp);
4404       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4405       iv_ca_set_no_cp (data, ivs, use);
4406       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4407
4408       if (act_cost < best_cost)
4409         {
4410           best_cost = act_cost;
4411
4412           iv_ca_delta_free (&best_delta);
4413           best_delta = act_delta;
4414         }
4415       else
4416         iv_ca_delta_free (&act_delta);
4417     }
4418
4419   if (best_cost == INFTY)
4420     {
4421       for (i = 0; i < use->n_map_members; i++)
4422         {
4423           cp = use->cost_map + i;
4424           cand = cp->cand;
4425           if (!cand)
4426             continue;
4427
4428           /* Already tried this.  */
4429           if (cand->important)
4430             continue;
4431
4432           if (iv_ca_cand_used_p (ivs, cand))
4433             continue;
4434
4435           act_delta = NULL;
4436           iv_ca_set_cp (data, ivs, use, cp);
4437           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4438           iv_ca_set_no_cp (data, ivs, use);
4439           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4440                                        cp, act_delta);
4441
4442           if (act_cost < best_cost)
4443             {
4444               best_cost = act_cost;
4445
4446               if (best_delta)
4447                 iv_ca_delta_free (&best_delta);
4448               best_delta = act_delta;
4449             }
4450           else
4451             iv_ca_delta_free (&act_delta);
4452         }
4453     }
4454
4455   iv_ca_delta_commit (data, ivs, best_delta, true);
4456   iv_ca_delta_free (&best_delta);
4457
4458   return (best_cost != INFTY);
4459 }
4460
4461 /* Finds an initial assignment of candidates to uses.  */
4462
4463 static struct iv_ca *
4464 get_initial_solution (struct ivopts_data *data)
4465 {
4466   struct iv_ca *ivs = iv_ca_new (data);
4467   unsigned i;
4468
4469   for (i = 0; i < n_iv_uses (data); i++)
4470     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4471       {
4472         iv_ca_free (&ivs);
4473         return NULL;
4474       }
4475
4476   return ivs;
4477 }
4478
4479 /* Tries to improve set of induction variables IVS.  */
4480
4481 static bool
4482 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4483 {
4484   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4485   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4486   struct iv_cand *cand;
4487
4488   /* Try extending the set of induction variables by one.  */
4489   for (i = 0; i < n_iv_cands (data); i++)
4490     {
4491       cand = iv_cand (data, i);
4492
4493       if (iv_ca_cand_used_p (ivs, cand))
4494         continue;
4495
4496       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4497       if (!act_delta)
4498         continue;
4499
4500       /* If we successfully added the candidate and the set is small enough,
4501          try optimizing it by removing other candidates.  */
4502       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4503         {
4504           iv_ca_delta_commit (data, ivs, act_delta, true);
4505           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4506           iv_ca_delta_commit (data, ivs, act_delta, false);
4507           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4508         }
4509
4510       if (acost < best_cost)
4511         {
4512           best_cost = acost;
4513           iv_ca_delta_free (&best_delta);
4514           best_delta = act_delta;
4515         }
4516       else
4517         iv_ca_delta_free (&act_delta);
4518     }
4519
4520   if (!best_delta)
4521     {
4522       /* Try removing the candidates from the set instead.  */
4523       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4524
4525       /* Nothing more we can do.  */
4526       if (!best_delta)
4527         return false;
4528     }
4529
4530   iv_ca_delta_commit (data, ivs, best_delta, true);
4531   gcc_assert (best_cost == iv_ca_cost (ivs));
4532   iv_ca_delta_free (&best_delta);
4533   return true;
4534 }
4535
4536 /* Attempts to find the optimal set of induction variables.  We do simple
4537    greedy heuristic -- we try to replace at most one candidate in the selected
4538    solution and remove the unused ivs while this improves the cost.  */
4539
4540 static struct iv_ca *
4541 find_optimal_iv_set (struct ivopts_data *data)
4542 {
4543   unsigned i;
4544   struct iv_ca *set;
4545   struct iv_use *use;
4546
4547   /* Get the initial solution.  */
4548   set = get_initial_solution (data);
4549   if (!set)
4550     {
4551       if (dump_file && (dump_flags & TDF_DETAILS))
4552         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4553       return NULL;
4554     }
4555
4556   if (dump_file && (dump_flags & TDF_DETAILS))
4557     {
4558       fprintf (dump_file, "Initial set of candidates:\n");
4559       iv_ca_dump (data, dump_file, set);
4560     }
4561
4562   while (try_improve_iv_set (data, set))
4563     {
4564       if (dump_file && (dump_flags & TDF_DETAILS))
4565         {
4566           fprintf (dump_file, "Improved to:\n");
4567           iv_ca_dump (data, dump_file, set);
4568         }
4569     }
4570
4571   if (dump_file && (dump_flags & TDF_DETAILS))
4572     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4573
4574   for (i = 0; i < n_iv_uses (data); i++)
4575     {
4576       use = iv_use (data, i);
4577       use->selected = iv_ca_cand_for_use (set, use)->cand;
4578     }
4579
4580   return set;
4581 }
4582
4583 /* Creates a new induction variable corresponding to CAND.  */
4584
4585 static void
4586 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4587 {
4588   block_stmt_iterator incr_pos;
4589   tree base;
4590   bool after = false;
4591
4592   if (!cand->iv)
4593     return;
4594
4595   switch (cand->pos)
4596     {
4597     case IP_NORMAL:
4598       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4599       break;
4600
4601     case IP_END:
4602       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4603       after = true;
4604       break;
4605
4606     case IP_ORIGINAL:
4607       /* Mark that the iv is preserved.  */
4608       name_info (data, cand->var_before)->preserve_biv = true;
4609       name_info (data, cand->var_after)->preserve_biv = true;
4610
4611       /* Rewrite the increment so that it uses var_before directly.  */
4612       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4613
4614       return;
4615     }
4616
4617   gimple_add_tmp_var (cand->var_before);
4618   add_referenced_tmp_var (cand->var_before);
4619
4620   base = unshare_expr (cand->iv->base);
4621
4622   create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4623              &incr_pos, after, &cand->var_before, &cand->var_after);
4624 }
4625
4626 /* Creates new induction variables described in SET.  */
4627
4628 static void
4629 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4630 {
4631   unsigned i;
4632   struct iv_cand *cand;
4633   bitmap_iterator bi;
4634
4635   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4636     {
4637       cand = iv_cand (data, i);
4638       create_new_iv (data, cand);
4639     }
4640 }
4641
4642 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4643    is true, remove also the ssa name defined by the statement.  */
4644
4645 static void
4646 remove_statement (tree stmt, bool including_defined_name)
4647 {
4648   if (TREE_CODE (stmt) == PHI_NODE)
4649     {
4650       if (!including_defined_name)
4651         {
4652           /* Prevent the ssa name defined by the statement from being removed.  */
4653           SET_PHI_RESULT (stmt, NULL);
4654         }
4655       remove_phi_node (stmt, NULL_TREE);
4656     }
4657   else
4658     {
4659       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4660
4661       bsi_remove (&bsi);
4662     }
4663 }
4664
4665 /* Rewrites USE (definition of iv used in a nonlinear expression)
4666    using candidate CAND.  */
4667
4668 static void
4669 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4670                             struct iv_use *use, struct iv_cand *cand)
4671 {
4672   tree comp;
4673   tree op, stmts, tgt, ass;
4674   block_stmt_iterator bsi, pbsi;
4675
4676   /* An important special case -- if we are asked to express value of
4677      the original iv by itself, just exit; there is no need to
4678      introduce a new computation (that might also need casting the
4679      variable to unsigned and back).  */
4680   if (cand->pos == IP_ORIGINAL
4681       && TREE_CODE (use->stmt) == MODIFY_EXPR
4682       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4683     {
4684       op = TREE_OPERAND (use->stmt, 1);
4685
4686       /* Be a bit careful.  In case variable is expressed in some
4687          complicated way, rewrite it so that we may get rid of this
4688          complicated expression.  */
4689       if ((TREE_CODE (op) == PLUS_EXPR
4690            || TREE_CODE (op) == MINUS_EXPR)
4691           && TREE_OPERAND (op, 0) == cand->var_before
4692           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4693         return;
4694     }
4695
4696   comp = unshare_expr (get_computation (data->current_loop,
4697                                         use, cand));
4698   switch (TREE_CODE (use->stmt))
4699     {
4700     case PHI_NODE:
4701       tgt = PHI_RESULT (use->stmt);
4702
4703       /* If we should keep the biv, do not replace it.  */
4704       if (name_info (data, tgt)->preserve_biv)
4705         return;
4706
4707       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4708       while (!bsi_end_p (pbsi)
4709              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4710         {
4711           bsi = pbsi;
4712           bsi_next (&pbsi);
4713         }
4714       break;
4715
4716     case MODIFY_EXPR:
4717       tgt = TREE_OPERAND (use->stmt, 0);
4718       bsi = bsi_for_stmt (use->stmt);
4719       break;
4720
4721     default:
4722       gcc_unreachable ();
4723     }
4724
4725   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4726
4727   if (TREE_CODE (use->stmt) == PHI_NODE)
4728     {
4729       if (stmts)
4730         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4731       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4732       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4733       remove_statement (use->stmt, false);
4734       SSA_NAME_DEF_STMT (tgt) = ass;
4735     }
4736   else
4737     {
4738       if (stmts)
4739         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4740       TREE_OPERAND (use->stmt, 1) = op;
4741     }
4742 }
4743
4744 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4745    for_each_index.  */
4746
4747 static bool
4748 idx_remove_ssa_names (tree base, tree *idx,
4749                       void *data ATTRIBUTE_UNUSED)
4750 {
4751   tree *op;
4752
4753   if (TREE_CODE (*idx) == SSA_NAME)
4754     *idx = SSA_NAME_VAR (*idx);
4755
4756   if (TREE_CODE (base) == ARRAY_REF)
4757     {
4758       op = &TREE_OPERAND (base, 2);
4759       if (*op
4760           && TREE_CODE (*op) == SSA_NAME)
4761         *op = SSA_NAME_VAR (*op);
4762       op = &TREE_OPERAND (base, 3);
4763       if (*op
4764           && TREE_CODE (*op) == SSA_NAME)
4765         *op = SSA_NAME_VAR (*op);
4766     }
4767
4768   return true;
4769 }
4770
4771 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4772
4773 static tree
4774 unshare_and_remove_ssa_names (tree ref)
4775 {
4776   ref = unshare_expr (ref);
4777   for_each_index (&ref, idx_remove_ssa_names, NULL);
4778
4779   return ref;
4780 }
4781
4782 /* Rewrites base of memory access OP with expression WITH in statement
4783    pointed to by BSI.  */
4784
4785 static void
4786 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4787 {
4788   tree bvar, var, new_var, new_name, copy, name;
4789   tree orig;
4790
4791   var = bvar = get_base_address (*op);
4792
4793   if (!var || TREE_CODE (with) != SSA_NAME)
4794     goto do_rewrite;
4795
4796   gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4797   gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4798   if (TREE_CODE (var) == INDIRECT_REF)
4799     var = TREE_OPERAND (var, 0);
4800   if (TREE_CODE (var) == SSA_NAME)
4801     {
4802       name = var;
4803       var = SSA_NAME_VAR (var);
4804     }
4805   else if (DECL_P (var))
4806     name = NULL_TREE;
4807   else
4808     goto do_rewrite;
4809
4810   if (var_ann (var)->type_mem_tag)
4811     var = var_ann (var)->type_mem_tag;
4812
4813   /* We need to add a memory tag for the variable.  But we do not want
4814      to add it to the temporary used for the computations, since this leads
4815      to problems in redundancy elimination when there are common parts
4816      in two computations referring to the different arrays.  So we copy
4817      the variable to a new temporary.  */
4818   copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4819   if (name)
4820     new_name = duplicate_ssa_name (name, copy);
4821   else
4822     {
4823       new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4824       add_referenced_tmp_var (new_var);
4825       var_ann (new_var)->type_mem_tag = var;
4826       new_name = make_ssa_name (new_var, copy);
4827     }
4828   TREE_OPERAND (copy, 0) = new_name;
4829   bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4830   with = new_name;
4831
4832 do_rewrite:
4833
4834   orig = NULL_TREE;
4835   gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4836   gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4837
4838   if (TREE_CODE (*op) == INDIRECT_REF)
4839     orig = REF_ORIGINAL (*op);
4840   if (!orig)
4841     orig = unshare_and_remove_ssa_names (*op);
4842
4843   *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4844
4845   /* Record the original reference, for purposes of alias analysis.  */
4846   REF_ORIGINAL (*op) = orig;
4847 }
4848
4849 /* Rewrites USE (address that is an iv) using candidate CAND.  */
4850
4851 static void
4852 rewrite_use_address (struct ivopts_data *data,
4853                      struct iv_use *use, struct iv_cand *cand)
4854 {
4855   tree comp = unshare_expr (get_computation (data->current_loop,
4856                                              use, cand));
4857   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4858   tree stmts;
4859   tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4860
4861   if (stmts)
4862     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4863
4864   rewrite_address_base (&bsi, use->op_p, op);
4865 }
4866
4867 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4868    candidate CAND.  */
4869
4870 static void
4871 rewrite_use_compare (struct ivopts_data *data,
4872                      struct iv_use *use, struct iv_cand *cand)
4873 {
4874   tree comp;
4875   tree *op_p, cond, op, stmts, bound;
4876   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4877   enum tree_code compare;
4878
4879   if (may_eliminate_iv (data, use, cand, &compare, &bound))
4880     {
4881       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4882       tree var_type = TREE_TYPE (var);
4883
4884       bound = fold_convert (var_type, bound);
4885       op = force_gimple_operand (unshare_expr (bound), &stmts,
4886                                  true, NULL_TREE);
4887
4888       if (stmts)
4889         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4890
4891       *use->op_p = build2 (compare, boolean_type_node, var, op);
4892       modify_stmt (use->stmt);
4893       return;
4894     }
4895
4896   /* The induction variable elimination failed; just express the original
4897      giv.  */
4898   comp = unshare_expr (get_computation (data->current_loop, use, cand));
4899
4900   cond = *use->op_p;
4901   op_p = &TREE_OPERAND (cond, 0);
4902   if (TREE_CODE (*op_p) != SSA_NAME
4903       || zero_p (get_iv (data, *op_p)->step))
4904     op_p = &TREE_OPERAND (cond, 1);
4905
4906   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4907   if (stmts)
4908     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4909
4910   *op_p = op;
4911 }
4912
4913 /* Ensure that operand *OP_P may be used at the end of EXIT without
4914    violating loop closed ssa form.  */
4915
4916 static void
4917 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4918 {
4919   basic_block def_bb;
4920   struct loop *def_loop;
4921   tree phi, use;
4922
4923   use = USE_FROM_PTR (op_p);
4924   if (TREE_CODE (use) != SSA_NAME)
4925     return;
4926
4927   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4928   if (!def_bb)
4929     return;
4930
4931   def_loop = def_bb->loop_father;
4932   if (flow_bb_inside_loop_p (def_loop, exit->dest))
4933     return;
4934
4935   /* Try finding a phi node that copies the value out of the loop.  */
4936   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4937     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4938       break;
4939
4940   if (!phi)
4941     {
4942       /* Create such a phi node.  */
4943       tree new_name = duplicate_ssa_name (use, NULL);
4944
4945       phi = create_phi_node (new_name, exit->dest);
4946       SSA_NAME_DEF_STMT (new_name) = phi;
4947       add_phi_arg (phi, use, exit);
4948     }
4949
4950   SET_USE (op_p, PHI_RESULT (phi));
4951 }
4952
4953 /* Ensure that operands of STMT may be used at the end of EXIT without
4954    violating loop closed ssa form.  */
4955
4956 static void
4957 protect_loop_closed_ssa_form (edge exit, tree stmt)
4958 {
4959   use_optype uses;
4960   vuse_optype vuses;
4961   v_may_def_optype v_may_defs;
4962   unsigned i;
4963
4964   get_stmt_operands (stmt);
4965
4966   uses = STMT_USE_OPS (stmt);
4967   for (i = 0; i < NUM_USES (uses); i++)
4968     protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4969
4970   vuses = STMT_VUSE_OPS (stmt);
4971   for (i = 0; i < NUM_VUSES (vuses); i++)
4972     protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4973
4974   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4975   for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4976     protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4977 }
4978
4979 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
4980    so that they are emitted on the correct place, and so that the loop closed
4981    ssa form is preserved.  */
4982
4983 static void
4984 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4985 {
4986   tree_stmt_iterator tsi;
4987   block_stmt_iterator bsi;
4988   tree phi, stmt, def, next;
4989
4990   if (EDGE_COUNT (exit->dest->preds) > 1)
4991     split_loop_exit_edge (exit);
4992
4993   if (TREE_CODE (stmts) == STATEMENT_LIST)
4994     {
4995       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4996         protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4997     }
4998   else
4999     protect_loop_closed_ssa_form (exit, stmts);
5000
5001   /* Ensure there is label in exit->dest, so that we can
5002      insert after it.  */
5003   tree_block_label (exit->dest);
5004   bsi = bsi_after_labels (exit->dest);
5005   bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5006
5007   if (!op)
5008     return;
5009
5010   for (phi = phi_nodes (exit->dest); phi; phi = next)
5011     {
5012       next = PHI_CHAIN (phi);
5013
5014       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5015         {
5016           def = PHI_RESULT (phi);
5017           remove_statement (phi, false);
5018           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5019                         def, op);
5020           SSA_NAME_DEF_STMT (def) = stmt;
5021           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5022         }
5023     }
5024 }
5025
5026 /* Rewrites the final value of USE (that is only needed outside of the loop)
5027    using candidate CAND.  */
5028
5029 static void
5030 rewrite_use_outer (struct ivopts_data *data,
5031                    struct iv_use *use, struct iv_cand *cand)
5032 {
5033   edge exit;
5034   tree value, op, stmts, tgt;
5035   tree phi;
5036
5037   switch (TREE_CODE (use->stmt))
5038     {
5039     case PHI_NODE:
5040       tgt = PHI_RESULT (use->stmt);
5041       break;
5042     case MODIFY_EXPR:
5043       tgt = TREE_OPERAND (use->stmt, 0);
5044       break;
5045     default:
5046       gcc_unreachable ();
5047     }
5048
5049   exit = single_dom_exit (data->current_loop);
5050
5051   if (exit)
5052     {
5053       if (!cand->iv)
5054         {
5055           bool ok = may_replace_final_value (data, use, &value);
5056           gcc_assert (ok);
5057         }
5058       else
5059         value = get_computation_at (data->current_loop,
5060                                     use, cand, last_stmt (exit->src));
5061
5062       value = unshare_expr (value);
5063       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5064
5065       /* If we will preserve the iv anyway and we would need to perform
5066          some computation to replace the final value, do nothing.  */
5067       if (stmts && name_info (data, tgt)->preserve_biv)
5068         return;
5069
5070       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5071         {
5072           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5073
5074           if (USE_FROM_PTR (use_p) == tgt)
5075             SET_USE (use_p, op);
5076         }
5077
5078       if (stmts)
5079         compute_phi_arg_on_exit (exit, stmts, op);
5080
5081       /* Enable removal of the statement.  We cannot remove it directly,
5082          since we may still need the aliasing information attached to the
5083          ssa name defined by it.  */
5084       name_info (data, tgt)->iv->have_use_for = false;
5085       return;
5086     }
5087
5088   /* If the variable is going to be preserved anyway, there is nothing to
5089      do.  */
5090   if (name_info (data, tgt)->preserve_biv)
5091     return;
5092
5093   /* Otherwise we just need to compute the iv.  */
5094   rewrite_use_nonlinear_expr (data, use, cand);
5095 }
5096
5097 /* Rewrites USE using candidate CAND.  */
5098
5099 static void
5100 rewrite_use (struct ivopts_data *data,
5101              struct iv_use *use, struct iv_cand *cand)
5102 {
5103   switch (use->type)
5104     {
5105       case USE_NONLINEAR_EXPR:
5106         rewrite_use_nonlinear_expr (data, use, cand);
5107         break;
5108
5109       case USE_OUTER:
5110         rewrite_use_outer (data, use, cand);
5111         break;
5112
5113       case USE_ADDRESS:
5114         rewrite_use_address (data, use, cand);
5115         break;
5116
5117       case USE_COMPARE:
5118         rewrite_use_compare (data, use, cand);
5119         break;
5120
5121       default:
5122         gcc_unreachable ();
5123     }
5124   modify_stmt (use->stmt);
5125 }
5126
5127 /* Rewrite the uses using the selected induction variables.  */
5128
5129 static void
5130 rewrite_uses (struct ivopts_data *data)
5131 {
5132   unsigned i;
5133   struct iv_cand *cand;
5134   struct iv_use *use;
5135
5136   for (i = 0; i < n_iv_uses (data); i++)
5137     {
5138       use = iv_use (data, i);
5139       cand = use->selected;
5140       gcc_assert (cand);
5141
5142       rewrite_use (data, use, cand);
5143     }
5144 }
5145
5146 /* Removes the ivs that are not used after rewriting.  */
5147
5148 static void
5149 remove_unused_ivs (struct ivopts_data *data)
5150 {
5151   unsigned j;
5152   bitmap_iterator bi;
5153
5154   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5155     {
5156       struct version_info *info;
5157
5158       info = ver_info (data, j);
5159       if (info->iv
5160           && !zero_p (info->iv->step)
5161           && !info->inv_id
5162           && !info->iv->have_use_for
5163           && !info->preserve_biv)
5164         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5165     }
5166 }
5167
5168 /* Frees data allocated by the optimization of a single loop.  */
5169
5170 static void
5171 free_loop_data (struct ivopts_data *data)
5172 {
5173   unsigned i, j;
5174   bitmap_iterator bi;
5175
5176   htab_empty (data->niters);
5177
5178   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5179     {
5180       struct version_info *info;
5181
5182       info = ver_info (data, i);
5183       if (info->iv)
5184         free (info->iv);
5185       info->iv = NULL;
5186       info->has_nonlin_use = false;
5187       info->preserve_biv = false;
5188       info->inv_id = 0;
5189     }
5190   bitmap_clear (data->relevant);
5191   bitmap_clear (data->important_candidates);
5192
5193   for (i = 0; i < n_iv_uses (data); i++)
5194     {
5195       struct iv_use *use = iv_use (data, i);
5196
5197       free (use->iv);
5198       BITMAP_FREE (use->related_cands);
5199       for (j = 0; j < use->n_map_members; j++)
5200         if (use->cost_map[j].depends_on)
5201           BITMAP_FREE (use->cost_map[j].depends_on);
5202       free (use->cost_map);
5203       free (use);
5204     }
5205   VARRAY_POP_ALL (data->iv_uses);
5206
5207   for (i = 0; i < n_iv_cands (data); i++)
5208     {
5209       struct iv_cand *cand = iv_cand (data, i);
5210
5211       if (cand->iv)
5212         free (cand->iv);
5213       free (cand);
5214     }
5215   VARRAY_POP_ALL (data->iv_candidates);
5216
5217   if (data->version_info_size < num_ssa_names)
5218     {
5219       data->version_info_size = 2 * num_ssa_names;
5220       free (data->version_info);
5221       data->version_info = xcalloc (data->version_info_size,
5222                                     sizeof (struct version_info));
5223     }
5224
5225   data->max_inv_id = 0;
5226
5227   for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5228     {
5229       tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5230
5231       SET_DECL_RTL (obj, NULL_RTX);
5232     }
5233   VARRAY_POP_ALL (decl_rtl_to_reset);
5234 }
5235
5236 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5237    loop tree.  */
5238
5239 static void
5240 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5241 {
5242   unsigned i;
5243
5244   for (i = 1; i < loops->num; i++)
5245     if (loops->parray[i])
5246       {
5247         free (loops->parray[i]->aux);
5248         loops->parray[i]->aux = NULL;
5249       }
5250
5251   free_loop_data (data);
5252   free (data->version_info);
5253   BITMAP_FREE (data->relevant);
5254   BITMAP_FREE (data->important_candidates);
5255   htab_delete (data->niters);
5256
5257   VARRAY_FREE (decl_rtl_to_reset);
5258   VARRAY_FREE (data->iv_uses);
5259   VARRAY_FREE (data->iv_candidates);
5260 }
5261
5262 /* Optimizes the LOOP.  Returns true if anything changed.  */
5263
5264 static bool
5265 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5266 {
5267   bool changed = false;
5268   struct iv_ca *iv_ca;
5269   edge exit;
5270
5271   data->current_loop = loop;
5272
5273   if (dump_file && (dump_flags & TDF_DETAILS))
5274     {
5275       fprintf (dump_file, "Processing loop %d\n", loop->num);
5276
5277       exit = single_dom_exit (loop);
5278       if (exit)
5279         {
5280           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5281                    exit->src->index, exit->dest->index);
5282           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5283           fprintf (dump_file, "\n");
5284         }
5285
5286       fprintf (dump_file, "\n");
5287     }
5288
5289   /* For each ssa name determines whether it behaves as an induction variable
5290      in some loop.  */
5291   if (!find_induction_variables (data))
5292     goto finish;
5293
5294   /* Finds interesting uses (item 1).  */
5295   find_interesting_uses (data);
5296   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5297     goto finish;
5298
5299   /* Finds candidates for the induction variables (item 2).  */
5300   find_iv_candidates (data);
5301
5302   /* Calculates the costs (item 3, part 1).  */
5303   determine_use_iv_costs (data);
5304   determine_iv_costs (data);
5305   determine_set_costs (data);
5306
5307   /* Find the optimal set of induction variables (item 3, part 2).  */
5308   iv_ca = find_optimal_iv_set (data);
5309   if (!iv_ca)
5310     goto finish;
5311   changed = true;
5312
5313   /* Create the new induction variables (item 4, part 1).  */
5314   create_new_ivs (data, iv_ca);
5315   iv_ca_free (&iv_ca);
5316
5317   /* Rewrite the uses (item 4, part 2).  */
5318   rewrite_uses (data);
5319
5320   /* Remove the ivs that are unused after rewriting.  */
5321   remove_unused_ivs (data);
5322
5323   /* We have changed the structure of induction variables; it might happen
5324      that definitions in the scev database refer to some of them that were
5325      eliminated.  */
5326   scev_reset ();
5327
5328 finish:
5329   free_loop_data (data);
5330
5331   return changed;
5332 }
5333
5334 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5335
5336 void
5337 tree_ssa_iv_optimize (struct loops *loops)
5338 {
5339   struct loop *loop;
5340   struct ivopts_data data;
5341
5342   tree_ssa_iv_optimize_init (loops, &data);
5343
5344   /* Optimize the loops starting with the innermost ones.  */
5345   loop = loops->tree_root;
5346   while (loop->inner)
5347     loop = loop->inner;
5348
5349 #ifdef ENABLE_CHECKING
5350   verify_loop_closed_ssa ();
5351   verify_stmts ();
5352 #endif
5353
5354   /* Scan the loops, inner ones first.  */
5355   while (loop != loops->tree_root)
5356     {
5357       if (dump_file && (dump_flags & TDF_DETAILS))
5358         flow_loop_dump (loop, dump_file, NULL, 1);
5359
5360       tree_ssa_iv_optimize_loop (&data, loop);
5361
5362       if (loop->next)
5363         {
5364           loop = loop->next;
5365           while (loop->inner)
5366             loop = loop->inner;
5367         }
5368       else
5369         loop = loop->outer;
5370     }
5371
5372 #ifdef ENABLE_CHECKING
5373   verify_loop_closed_ssa ();
5374   verify_stmts ();
5375 #endif
5376
5377   tree_ssa_iv_optimize_finalize (loops, &data);
5378 }