gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "tree-pretty-print.h"
  74 #include "gimple-pretty-print.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "tree-pass.h"
  80 #include "ggc.h"
  81 #include "insn-config.h"
  82 #include "recog.h"
  83 #include "pointer-set.h"
  84 #include "hashtab.h"
  85 #include "tree-chrec.h"
  86 #include "tree-scalar-evolution.h"
  87 #include "cfgloop.h"
  88 #include "params.h"
  89 #include "langhooks.h"
  90 #include "tree-affine.h"
  91 #include "target.h"
  92 #include "tree-inline.h"
  93 #include "tree-ssa-propagate.h"
  94
  95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  96    cost of different addressing modes.  This should be moved to a TBD
  97    interface between the GIMPLE and RTL worlds.  */
  98 #include "expr.h"
  99
 100 /* The infinite cost.  */
 101 #define INFTY 10000000
 102
 103 #define AVG_LOOP_NITER(LOOP) 5
 104
 105 /* Returns the expected number of loop iterations for LOOP.
 106    The average trip count is computed from profile data if it
 107    exists. */
 108
 109 static inline HOST_WIDE_INT
 110 avg_loop_niter (struct loop *loop)
 111 {
 112   HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
 113   if (niter == -1)
 114     return AVG_LOOP_NITER (loop);
 115
 116   return niter;
 117 }
 118
 119 /* Representation of the induction variable.  */
 120 struct iv
 121 {
 122   tree base;            /* Initial value of the iv.  */
 123   tree base_object;     /* A memory object to that the induction variable points.  */
 124   tree step;            /* Step of the iv (constant only).  */
 125   tree ssa_name;        /* The ssa name with the value.  */
 126   bool biv_p;           /* Is it a biv?  */
 127   bool have_use_for;    /* Do we already have a use for it?  */
 128   unsigned use_id;      /* The identifier in the use if it is the case.  */
 129 };
 130
 131 /* Per-ssa version information (induction variable descriptions, etc.).  */
 132 struct version_info
 133 {
 134   tree name;            /* The ssa name.  */
 135   struct iv *iv;        /* Induction variable description.  */
 136   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 137                            an expression that is not an induction variable.  */
 138   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 139   unsigned inv_id;      /* Id of an invariant.  */
 140 };
 141
 142 /* Types of uses.  */
 143 enum use_type
 144 {
 145   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 146   USE_ADDRESS,          /* Use in an address.  */
 147   USE_COMPARE           /* Use is a compare.  */
 148 };
 149
 150 /* Cost of a computation.  */
 151 typedef struct
 152 {
 153   int cost;             /* The runtime cost.  */
 154   unsigned complexity;  /* The estimate of the complexity of the code for
 155                            the computation (in no concrete units --
 156                            complexity field should be larger for more
 157                            complex expressions and addressing modes).  */
 158 } comp_cost;
 159
 160 static const comp_cost zero_cost = {0, 0};
 161 static const comp_cost infinite_cost = {INFTY, INFTY};
 162
 163 /* The candidate - cost pair.  */
 164 struct cost_pair
 165 {
 166   struct iv_cand *cand; /* The candidate.  */
 167   comp_cost cost;       /* The cost.  */
 168   bitmap depends_on;    /* The list of invariants that have to be
 169                            preserved.  */
 170   tree value;           /* For final value elimination, the expression for
 171                            the final value of the iv.  For iv elimination,
 172                            the new bound to compare with.  */
 173   int inv_expr_id;      /* Loop invariant expression id.  */
 174 };
 175
 176 /* Use.  */
 177 struct iv_use
 178 {
 179   unsigned id;          /* The id of the use.  */
 180   enum use_type type;   /* Type of the use.  */
 181   struct iv *iv;        /* The induction variable it is based on.  */
 182   gimple stmt;          /* Statement in that it occurs.  */
 183   tree *op_p;           /* The place where it occurs.  */
 184   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 185                            important ones.  */
 186
 187   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 188   struct cost_pair *cost_map;
 189                         /* The costs wrto the iv candidates.  */
 190
 191   struct iv_cand *selected;
 192                         /* The selected candidate.  */
 193 };
 194
 195 /* The position where the iv is computed.  */
 196 enum iv_position
 197 {
 198   IP_NORMAL,            /* At the end, just before the exit condition.  */
 199   IP_END,               /* At the end of the latch block.  */
 200   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 201   IP_AFTER_USE,         /* Immediately after a specific use.  */
 202   IP_ORIGINAL           /* The original biv.  */
 203 };
 204
 205 /* The induction variable candidate.  */
 206 struct iv_cand
 207 {
 208   unsigned id;          /* The number of the candidate.  */
 209   bool important;       /* Whether this is an "important" candidate, i.e. such
 210                            that it should be considered by all uses.  */
 211   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 212   gimple incremented_at;/* For original biv, the statement where it is
 213                            incremented.  */
 214   tree var_before;      /* The variable used for it before increment.  */
 215   tree var_after;       /* The variable used for it after increment.  */
 216   struct iv *iv;        /* The value of the candidate.  NULL for
 217                            "pseudocandidate" used to indicate the possibility
 218                            to replace the final value of an iv by direct
 219                            computation of the value.  */
 220   unsigned cost;        /* Cost of the candidate.  */
 221   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 222   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 223                               where it is incremented.  */
 224   bitmap depends_on;    /* The list of invariants that are used in step of the
 225                            biv.  */
 226 };
 227
 228 /* Loop invariant expression hashtable entry.  */
 229 struct iv_inv_expr_ent
 230 {
 231   tree expr;
 232   int id;
 233   hashval_t hash;
 234 };
 235
 236 /* The data used by the induction variable optimizations.  */
 237
 238 typedef struct iv_use *iv_use_p;
 239 DEF_VEC_P(iv_use_p);
 240 DEF_VEC_ALLOC_P(iv_use_p,heap);
 241
 242 typedef struct iv_cand *iv_cand_p;
 243 DEF_VEC_P(iv_cand_p);
 244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 245
 246 struct ivopts_data
 247 {
 248   /* The currently optimized loop.  */
 249   struct loop *current_loop;
 250
 251   /* Numbers of iterations for all exits of the current loop.  */
 252   struct pointer_map_t *niters;
 253
 254   /* Number of registers used in it.  */
 255   unsigned regs_used;
 256
 257   /* The size of version_info array allocated.  */
 258   unsigned version_info_size;
 259
 260   /* The array of information for the ssa names.  */
 261   struct version_info *version_info;
 262
 263   /* The hashtable of loop invariant expressions created
 264      by ivopt.  */
 265   htab_t inv_expr_tab;
 266
 267   /* Loop invariant expression id.  */
 268   int inv_expr_id;
 269
 270   /* The bitmap of indices in version_info whose value was changed.  */
 271   bitmap relevant;
 272
 273   /* The uses of induction variables.  */
 274   VEC(iv_use_p,heap) *iv_uses;
 275
 276   /* The candidates.  */
 277   VEC(iv_cand_p,heap) *iv_candidates;
 278
 279   /* A bitmap of important candidates.  */
 280   bitmap important_candidates;
 281
 282   /* The maximum invariant id.  */
 283   unsigned max_inv_id;
 284
 285   /* Whether to consider just related and important candidates when replacing a
 286      use.  */
 287   bool consider_all_candidates;
 288
 289   /* Are we optimizing for speed?  */
 290   bool speed;
 291
 292   /* Whether the loop body includes any function calls.  */
 293   bool body_includes_call;
 294 };
 295
 296 /* An assignment of iv candidates to uses.  */
 297
 298 struct iv_ca
 299 {
 300   /* The number of uses covered by the assignment.  */
 301   unsigned upto;
 302
 303   /* Number of uses that cannot be expressed by the candidates in the set.  */
 304   unsigned bad_uses;
 305
 306   /* Candidate assigned to a use, together with the related costs.  */
 307   struct cost_pair **cand_for_use;
 308
 309   /* Number of times each candidate is used.  */
 310   unsigned *n_cand_uses;
 311
 312   /* The candidates used.  */
 313   bitmap cands;
 314
 315   /* The number of candidates in the set.  */
 316   unsigned n_cands;
 317
 318   /* Total number of registers needed.  */
 319   unsigned n_regs;
 320
 321   /* Total cost of expressing uses.  */
 322   comp_cost cand_use_cost;
 323
 324   /* Total cost of candidates.  */
 325   unsigned cand_cost;
 326
 327   /* Number of times each invariant is used.  */
 328   unsigned *n_invariant_uses;
 329
 330   /* The array holding the number of uses of each loop
 331      invariant expressions created by ivopt.  */
 332   unsigned *used_inv_expr;
 333
 334   /* The number of created loop invariants.  */
 335   unsigned num_used_inv_expr;
 336
 337   /* Total cost of the assignment.  */
 338   comp_cost cost;
 339 };
 340
 341 /* Difference of two iv candidate assignments.  */
 342
 343 struct iv_ca_delta
 344 {
 345   /* Changed use.  */
 346   struct iv_use *use;
 347
 348   /* An old assignment (for rollback purposes).  */
 349   struct cost_pair *old_cp;
 350
 351   /* A new assignment.  */
 352   struct cost_pair *new_cp;
 353
 354   /* Next change in the list.  */
 355   struct iv_ca_delta *next_change;
 356 };
 357
 358 /* Bound on number of candidates below that all candidates are considered.  */
 359
 360 #define CONSIDER_ALL_CANDIDATES_BOUND \
 361   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 362
 363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 364    optimizing such a loop would help, and it would take ages).  */
 365
 366 #define MAX_CONSIDERED_USES \
 367   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 368
 369 /* If there are at most this number of ivs in the set, try removing unnecessary
 370    ivs from the set always.  */
 371
 372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 373   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 374
 375 /* The list of trees for that the decl_rtl field must be reset is stored
 376    here.  */
 377
 378 static VEC(tree,heap) *decl_rtl_to_reset;
 379
 380 /* Number of uses recorded in DATA.  */
 381
 382 static inline unsigned
 383 n_iv_uses (struct ivopts_data *data)
 384 {
 385   return VEC_length (iv_use_p, data->iv_uses);
 386 }
 387
 388 /* Ith use recorded in DATA.  */
 389
 390 static inline struct iv_use *
 391 iv_use (struct ivopts_data *data, unsigned i)
 392 {
 393   return VEC_index (iv_use_p, data->iv_uses, i);
 394 }
 395
 396 /* Number of candidates recorded in DATA.  */
 397
 398 static inline unsigned
 399 n_iv_cands (struct ivopts_data *data)
 400 {
 401   return VEC_length (iv_cand_p, data->iv_candidates);
 402 }
 403
 404 /* Ith candidate recorded in DATA.  */
 405
 406 static inline struct iv_cand *
 407 iv_cand (struct ivopts_data *data, unsigned i)
 408 {
 409   return VEC_index (iv_cand_p, data->iv_candidates, i);
 410 }
 411
 412 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 413
 414 edge
 415 single_dom_exit (struct loop *loop)
 416 {
 417   edge exit = single_exit (loop);
 418
 419   if (!exit)
 420     return NULL;
 421
 422   if (!just_once_each_iteration_p (loop, exit->src))
 423     return NULL;
 424
 425   return exit;
 426 }
 427
 428 /* Dumps information about the induction variable IV to FILE.  */
 429
 430 extern void dump_iv (FILE *, struct iv *);
 431 void
 432 dump_iv (FILE *file, struct iv *iv)
 433 {
 434   if (iv->ssa_name)
 435     {
 436       fprintf (file, "ssa name ");
 437       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 438       fprintf (file, "\n");
 439     }
 440
 441   fprintf (file, "  type ");
 442   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 443   fprintf (file, "\n");
 444
 445   if (iv->step)
 446     {
 447       fprintf (file, "  base ");
 448       print_generic_expr (file, iv->base, TDF_SLIM);
 449       fprintf (file, "\n");
 450
 451       fprintf (file, "  step ");
 452       print_generic_expr (file, iv->step, TDF_SLIM);
 453       fprintf (file, "\n");
 454     }
 455   else
 456     {
 457       fprintf (file, "  invariant ");
 458       print_generic_expr (file, iv->base, TDF_SLIM);
 459       fprintf (file, "\n");
 460     }
 461
 462   if (iv->base_object)
 463     {
 464       fprintf (file, "  base object ");
 465       print_generic_expr (file, iv->base_object, TDF_SLIM);
 466       fprintf (file, "\n");
 467     }
 468
 469   if (iv->biv_p)
 470     fprintf (file, "  is a biv\n");
 471 }
 472
 473 /* Dumps information about the USE to FILE.  */
 474
 475 extern void dump_use (FILE *, struct iv_use *);
 476 void
 477 dump_use (FILE *file, struct iv_use *use)
 478 {
 479   fprintf (file, "use %d\n", use->id);
 480
 481   switch (use->type)
 482     {
 483     case USE_NONLINEAR_EXPR:
 484       fprintf (file, "  generic\n");
 485       break;
 486
 487     case USE_ADDRESS:
 488       fprintf (file, "  address\n");
 489       break;
 490
 491     case USE_COMPARE:
 492       fprintf (file, "  compare\n");
 493       break;
 494
 495     default:
 496       gcc_unreachable ();
 497     }
 498
 499   fprintf (file, "  in statement ");
 500   print_gimple_stmt (file, use->stmt, 0, 0);
 501   fprintf (file, "\n");
 502
 503   fprintf (file, "  at position ");
 504   if (use->op_p)
 505     print_generic_expr (file, *use->op_p, TDF_SLIM);
 506   fprintf (file, "\n");
 507
 508   dump_iv (file, use->iv);
 509
 510   if (use->related_cands)
 511     {
 512       fprintf (file, "  related candidates ");
 513       dump_bitmap (file, use->related_cands);
 514     }
 515 }
 516
 517 /* Dumps information about the uses to FILE.  */
 518
 519 extern void dump_uses (FILE *, struct ivopts_data *);
 520 void
 521 dump_uses (FILE *file, struct ivopts_data *data)
 522 {
 523   unsigned i;
 524   struct iv_use *use;
 525
 526   for (i = 0; i < n_iv_uses (data); i++)
 527     {
 528       use = iv_use (data, i);
 529
 530       dump_use (file, use);
 531       fprintf (file, "\n");
 532     }
 533 }
 534
 535 /* Dumps information about induction variable candidate CAND to FILE.  */
 536
 537 extern void dump_cand (FILE *, struct iv_cand *);
 538 void
 539 dump_cand (FILE *file, struct iv_cand *cand)
 540 {
 541   struct iv *iv = cand->iv;
 542
 543   fprintf (file, "candidate %d%s\n",
 544            cand->id, cand->important ? " (important)" : "");
 545
 546   if (cand->depends_on)
 547     {
 548       fprintf (file, "  depends on ");
 549       dump_bitmap (file, cand->depends_on);
 550     }
 551
 552   if (!iv)
 553     {
 554       fprintf (file, "  final value replacement\n");
 555       return;
 556     }
 557
 558   if (cand->var_before)
 559     {
 560       fprintf (file, "  var_before ");
 561       print_generic_expr (file, cand->var_before, TDF_SLIM);
 562       fprintf (file, "\n");
 563     }
 564   if (cand->var_after)
 565     {
 566       fprintf (file, "  var_after ");
 567       print_generic_expr (file, cand->var_after, TDF_SLIM);
 568       fprintf (file, "\n");
 569     }
 570
 571   switch (cand->pos)
 572     {
 573     case IP_NORMAL:
 574       fprintf (file, "  incremented before exit test\n");
 575       break;
 576
 577     case IP_BEFORE_USE:
 578       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 579       break;
 580
 581     case IP_AFTER_USE:
 582       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 583       break;
 584
 585     case IP_END:
 586       fprintf (file, "  incremented at end\n");
 587       break;
 588
 589     case IP_ORIGINAL:
 590       fprintf (file, "  original biv\n");
 591       break;
 592     }
 593
 594   dump_iv (file, iv);
 595 }
 596
 597 /* Returns the info for ssa version VER.  */
 598
 599 static inline struct version_info *
 600 ver_info (struct ivopts_data *data, unsigned ver)
 601 {
 602   return data->version_info + ver;
 603 }
 604
 605 /* Returns the info for ssa name NAME.  */
 606
 607 static inline struct version_info *
 608 name_info (struct ivopts_data *data, tree name)
 609 {
 610   return ver_info (data, SSA_NAME_VERSION (name));
 611 }
 612
 613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 614    emitted in LOOP.  */
 615
 616 static bool
 617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 618 {
 619   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 620
 621   gcc_assert (bb);
 622
 623   if (sbb == loop->latch)
 624     return true;
 625
 626   if (sbb != bb)
 627     return false;
 628
 629   return stmt == last_stmt (bb);
 630 }
 631
 632 /* Returns true if STMT if after the place where the original induction
 633    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 634    if the positions are identical.  */
 635
 636 static bool
 637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 638 {
 639   basic_block cand_bb = gimple_bb (cand->incremented_at);
 640   basic_block stmt_bb = gimple_bb (stmt);
 641
 642   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 643     return false;
 644
 645   if (stmt_bb != cand_bb)
 646     return true;
 647
 648   if (true_if_equal
 649       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 650     return true;
 651   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 652 }
 653
 654 /* Returns true if STMT if after the place where the induction variable
 655    CAND is incremented in LOOP.  */
 656
 657 static bool
 658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 659 {
 660   switch (cand->pos)
 661     {
 662     case IP_END:
 663       return false;
 664
 665     case IP_NORMAL:
 666       return stmt_after_ip_normal_pos (loop, stmt);
 667
 668     case IP_ORIGINAL:
 669     case IP_AFTER_USE:
 670       return stmt_after_inc_pos (cand, stmt, false);
 671
 672     case IP_BEFORE_USE:
 673       return stmt_after_inc_pos (cand, stmt, true);
 674
 675     default:
 676       gcc_unreachable ();
 677     }
 678 }
 679
 680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 681
 682 static bool
 683 abnormal_ssa_name_p (tree exp)
 684 {
 685   if (!exp)
 686     return false;
 687
 688   if (TREE_CODE (exp) != SSA_NAME)
 689     return false;
 690
 691   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 692 }
 693
 694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 695    abnormal phi node.  Callback for for_each_index.  */
 696
 697 static bool
 698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 699                                   void *data ATTRIBUTE_UNUSED)
 700 {
 701   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 702     {
 703       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 704         return false;
 705       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 706         return false;
 707     }
 708
 709   return !abnormal_ssa_name_p (*index);
 710 }
 711
 712 /* Returns true if EXPR contains a ssa name that occurs in an
 713    abnormal phi node.  */
 714
 715 bool
 716 contains_abnormal_ssa_name_p (tree expr)
 717 {
 718   enum tree_code code;
 719   enum tree_code_class codeclass;
 720
 721   if (!expr)
 722     return false;
 723
 724   code = TREE_CODE (expr);
 725   codeclass = TREE_CODE_CLASS (code);
 726
 727   if (code == SSA_NAME)
 728     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 729
 730   if (code == INTEGER_CST
 731       || is_gimple_min_invariant (expr))
 732     return false;
 733
 734   if (code == ADDR_EXPR)
 735     return !for_each_index (&TREE_OPERAND (expr, 0),
 736                             idx_contains_abnormal_ssa_name_p,
 737                             NULL);
 738
 739   if (code == COND_EXPR)
 740     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 741       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 742       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 743
 744   switch (codeclass)
 745     {
 746     case tcc_binary:
 747     case tcc_comparison:
 748       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 749         return true;
 750
 751       /* Fallthru.  */
 752     case tcc_unary:
 753       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 754         return true;
 755
 756       break;
 757
 758     default:
 759       gcc_unreachable ();
 760     }
 761
 762   return false;
 763 }
 764
 765 /*  Returns tree describing number of iterations determined from
 766     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 767
 768 static tree
 769 niter_for_exit (struct ivopts_data *data, edge exit,
 770                 struct tree_niter_desc **desc_p)
 771 {
 772   struct tree_niter_desc* desc = NULL;
 773   tree niter;
 774   void **slot;
 775
 776   if (!data->niters)
 777     {
 778       data->niters = pointer_map_create ();
 779       slot = NULL;
 780     }
 781   else
 782     slot = pointer_map_contains (data->niters, exit);
 783
 784   if (!slot)
 785     {
 786       /* Try to determine number of iterations.  We must know it
 787          unconditionally (i.e., without possibility of # of iterations
 788          being zero).  Also, we cannot safely work with ssa names that
 789          appear in phi nodes on abnormal edges, so that we do not create
 790          overlapping life ranges for them (PR 27283).  */
 791       desc = XNEW (struct tree_niter_desc);
 792       if (number_of_iterations_exit (data->current_loop,
 793                                      exit, desc, true)
 794           && integer_zerop (desc->may_be_zero)
 795           && !contains_abnormal_ssa_name_p (desc->niter))
 796         niter = desc->niter;
 797       else
 798         niter = NULL_TREE;
 799
 800       desc->niter = niter;
 801       slot = pointer_map_insert (data->niters, exit);
 802       *slot = desc;
 803     }
 804   else
 805     niter = ((struct tree_niter_desc *) *slot)->niter;
 806
 807   if (desc_p)
 808     *desc_p = (struct tree_niter_desc *) *slot;
 809   return niter;
 810 }
 811
 812 /* Returns tree describing number of iterations determined from
 813    single dominating exit of DATA->current_loop, or NULL if something
 814    goes wrong.  */
 815
 816 static tree
 817 niter_for_single_dom_exit (struct ivopts_data *data)
 818 {
 819   edge exit = single_dom_exit (data->current_loop);
 820
 821   if (!exit)
 822     return NULL;
 823
 824   return niter_for_exit (data, exit, NULL);
 825 }
 826
 827 /* Hash table equality function for expressions.  */
 828
 829 static int
 830 htab_inv_expr_eq (const void *ent1, const void *ent2)
 831 {
 832   const struct iv_inv_expr_ent *expr1 =
 833       (const struct iv_inv_expr_ent *)ent1;
 834   const struct iv_inv_expr_ent *expr2 =
 835       (const struct iv_inv_expr_ent *)ent2;
 836
 837   return operand_equal_p (expr1->expr, expr2->expr, 0);
 838 }
 839
 840 /* Hash function for loop invariant expressions.  */
 841
 842 static hashval_t
 843 htab_inv_expr_hash (const void *ent)
 844 {
 845   const struct iv_inv_expr_ent *expr =
 846       (const struct iv_inv_expr_ent *)ent;
 847   return expr->hash;
 848 }
 849
 850 /* Initializes data structures used by the iv optimization pass, stored
 851    in DATA.  */
 852
 853 static void
 854 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 855 {
 856   data->version_info_size = 2 * num_ssa_names;
 857   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 858   data->relevant = BITMAP_ALLOC (NULL);
 859   data->important_candidates = BITMAP_ALLOC (NULL);
 860   data->max_inv_id = 0;
 861   data->niters = NULL;
 862   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 863   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 864   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 865                                     htab_inv_expr_eq, free);
 866   data->inv_expr_id = 0;
 867   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 868 }
 869
 870 /* Returns a memory object to that EXPR points.  In case we are able to
 871    determine that it does not point to any such object, NULL is returned.  */
 872
 873 static tree
 874 determine_base_object (tree expr)
 875 {
 876   enum tree_code code = TREE_CODE (expr);
 877   tree base, obj;
 878
 879   /* If this is a pointer casted to any type, we need to determine
 880      the base object for the pointer; so handle conversions before
 881      throwing away non-pointer expressions.  */
 882   if (CONVERT_EXPR_P (expr))
 883     return determine_base_object (TREE_OPERAND (expr, 0));
 884
 885   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 886     return NULL_TREE;
 887
 888   switch (code)
 889     {
 890     case INTEGER_CST:
 891       return NULL_TREE;
 892
 893     case ADDR_EXPR:
 894       obj = TREE_OPERAND (expr, 0);
 895       base = get_base_address (obj);
 896
 897       if (!base)
 898         return expr;
 899
 900       if (TREE_CODE (base) == MEM_REF)
 901         return determine_base_object (TREE_OPERAND (base, 0));
 902
 903       return fold_convert (ptr_type_node,
 904                            build_fold_addr_expr (base));
 905
 906     case POINTER_PLUS_EXPR:
 907       return determine_base_object (TREE_OPERAND (expr, 0));
 908
 909     case PLUS_EXPR:
 910     case MINUS_EXPR:
 911       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 912       gcc_unreachable ();
 913
 914     default:
 915       return fold_convert (ptr_type_node, expr);
 916     }
 917 }
 918
 919 /* Allocates an induction variable with given initial value BASE and step STEP
 920    for loop LOOP.  */
 921
 922 static struct iv *
 923 alloc_iv (tree base, tree step)
 924 {
 925   struct iv *iv = XCNEW (struct iv);
 926   gcc_assert (step != NULL_TREE);
 927
 928   iv->base = base;
 929   iv->base_object = determine_base_object (base);
 930   iv->step = step;
 931   iv->biv_p = false;
 932   iv->have_use_for = false;
 933   iv->use_id = 0;
 934   iv->ssa_name = NULL_TREE;
 935
 936   return iv;
 937 }
 938
 939 /* Sets STEP and BASE for induction variable IV.  */
 940
 941 static void
 942 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 943 {
 944   struct version_info *info = name_info (data, iv);
 945
 946   gcc_assert (!info->iv);
 947
 948   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 949   info->iv = alloc_iv (base, step);
 950   info->iv->ssa_name = iv;
 951 }
 952
 953 /* Finds induction variable declaration for VAR.  */
 954
 955 static struct iv *
 956 get_iv (struct ivopts_data *data, tree var)
 957 {
 958   basic_block bb;
 959   tree type = TREE_TYPE (var);
 960
 961   if (!POINTER_TYPE_P (type)
 962       && !INTEGRAL_TYPE_P (type))
 963     return NULL;
 964
 965   if (!name_info (data, var)->iv)
 966     {
 967       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 968
 969       if (!bb
 970           || !flow_bb_inside_loop_p (data->current_loop, bb))
 971         set_iv (data, var, var, build_int_cst (type, 0));
 972     }
 973
 974   return name_info (data, var)->iv;
 975 }
 976
 977 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 978    not define a simple affine biv with nonzero step.  */
 979
 980 static tree
 981 determine_biv_step (gimple phi)
 982 {
 983   struct loop *loop = gimple_bb (phi)->loop_father;
 984   tree name = PHI_RESULT (phi);
 985   affine_iv iv;
 986
 987   if (!is_gimple_reg (name))
 988     return NULL_TREE;
 989
 990   if (!simple_iv (loop, loop, name, &iv, true))
 991     return NULL_TREE;
 992
 993   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 994 }
 995
 996 /* Finds basic ivs.  */
 997
 998 static bool
 999 find_bivs (struct ivopts_data *data)
1000 {
1001   gimple phi;
1002   tree step, type, base;
1003   bool found = false;
1004   struct loop *loop = data->current_loop;
1005   gimple_stmt_iterator psi;
1006
1007   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1008     {
1009       phi = gsi_stmt (psi);
1010
1011       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1012         continue;
1013
1014       step = determine_biv_step (phi);
1015       if (!step)
1016         continue;
1017
1018       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1019       base = expand_simple_operations (base);
1020       if (contains_abnormal_ssa_name_p (base)
1021           || contains_abnormal_ssa_name_p (step))
1022         continue;
1023
1024       type = TREE_TYPE (PHI_RESULT (phi));
1025       base = fold_convert (type, base);
1026       if (step)
1027         {
1028           if (POINTER_TYPE_P (type))
1029             step = fold_convert (sizetype, step);
1030           else
1031             step = fold_convert (type, step);
1032         }
1033
1034       set_iv (data, PHI_RESULT (phi), base, step);
1035       found = true;
1036     }
1037
1038   return found;
1039 }
1040
1041 /* Marks basic ivs.  */
1042
1043 static void
1044 mark_bivs (struct ivopts_data *data)
1045 {
1046   gimple phi;
1047   tree var;
1048   struct iv *iv, *incr_iv;
1049   struct loop *loop = data->current_loop;
1050   basic_block incr_bb;
1051   gimple_stmt_iterator psi;
1052
1053   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1054     {
1055       phi = gsi_stmt (psi);
1056
1057       iv = get_iv (data, PHI_RESULT (phi));
1058       if (!iv)
1059         continue;
1060
1061       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1062       incr_iv = get_iv (data, var);
1063       if (!incr_iv)
1064         continue;
1065
1066       /* If the increment is in the subloop, ignore it.  */
1067       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1068       if (incr_bb->loop_father != data->current_loop
1069           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1070         continue;
1071
1072       iv->biv_p = true;
1073       incr_iv->biv_p = true;
1074     }
1075 }
1076
1077 /* Checks whether STMT defines a linear induction variable and stores its
1078    parameters to IV.  */
1079
1080 static bool
1081 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1082 {
1083   tree lhs;
1084   struct loop *loop = data->current_loop;
1085
1086   iv->base = NULL_TREE;
1087   iv->step = NULL_TREE;
1088
1089   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1090     return false;
1091
1092   lhs = gimple_assign_lhs (stmt);
1093   if (TREE_CODE (lhs) != SSA_NAME)
1094     return false;
1095
1096   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1097     return false;
1098   iv->base = expand_simple_operations (iv->base);
1099
1100   if (contains_abnormal_ssa_name_p (iv->base)
1101       || contains_abnormal_ssa_name_p (iv->step))
1102     return false;
1103
1104   return true;
1105 }
1106
1107 /* Finds general ivs in statement STMT.  */
1108
1109 static void
1110 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1111 {
1112   affine_iv iv;
1113
1114   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1115     return;
1116
1117   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1118 }
1119
1120 /* Finds general ivs in basic block BB.  */
1121
1122 static void
1123 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1124 {
1125   gimple_stmt_iterator bsi;
1126
1127   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1128     find_givs_in_stmt (data, gsi_stmt (bsi));
1129 }
1130
1131 /* Finds general ivs.  */
1132
1133 static void
1134 find_givs (struct ivopts_data *data)
1135 {
1136   struct loop *loop = data->current_loop;
1137   basic_block *body = get_loop_body_in_dom_order (loop);
1138   unsigned i;
1139
1140   for (i = 0; i < loop->num_nodes; i++)
1141     find_givs_in_bb (data, body[i]);
1142   free (body);
1143 }
1144
1145 /* For each ssa name defined in LOOP determines whether it is an induction
1146    variable and if so, its initial value and step.  */
1147
1148 static bool
1149 find_induction_variables (struct ivopts_data *data)
1150 {
1151   unsigned i;
1152   bitmap_iterator bi;
1153
1154   if (!find_bivs (data))
1155     return false;
1156
1157   find_givs (data);
1158   mark_bivs (data);
1159
1160   if (dump_file && (dump_flags & TDF_DETAILS))
1161     {
1162       tree niter = niter_for_single_dom_exit (data);
1163
1164       if (niter)
1165         {
1166           fprintf (dump_file, "  number of iterations ");
1167           print_generic_expr (dump_file, niter, TDF_SLIM);
1168           fprintf (dump_file, "\n\n");
1169         };
1170
1171       fprintf (dump_file, "Induction variables:\n\n");
1172
1173       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1174         {
1175           if (ver_info (data, i)->iv)
1176             dump_iv (dump_file, ver_info (data, i)->iv);
1177         }
1178     }
1179
1180   return true;
1181 }
1182
1183 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1184
1185 static struct iv_use *
1186 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1187             gimple stmt, enum use_type use_type)
1188 {
1189   struct iv_use *use = XCNEW (struct iv_use);
1190
1191   use->id = n_iv_uses (data);
1192   use->type = use_type;
1193   use->iv = iv;
1194   use->stmt = stmt;
1195   use->op_p = use_p;
1196   use->related_cands = BITMAP_ALLOC (NULL);
1197
1198   /* To avoid showing ssa name in the dumps, if it was not reset by the
1199      caller.  */
1200   iv->ssa_name = NULL_TREE;
1201
1202   if (dump_file && (dump_flags & TDF_DETAILS))
1203     dump_use (dump_file, use);
1204
1205   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1206
1207   return use;
1208 }
1209
1210 /* Checks whether OP is a loop-level invariant and if so, records it.
1211    NONLINEAR_USE is true if the invariant is used in a way we do not
1212    handle specially.  */
1213
1214 static void
1215 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1216 {
1217   basic_block bb;
1218   struct version_info *info;
1219
1220   if (TREE_CODE (op) != SSA_NAME
1221       || !is_gimple_reg (op))
1222     return;
1223
1224   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1225   if (bb
1226       && flow_bb_inside_loop_p (data->current_loop, bb))
1227     return;
1228
1229   info = name_info (data, op);
1230   info->name = op;
1231   info->has_nonlin_use |= nonlinear_use;
1232   if (!info->inv_id)
1233     info->inv_id = ++data->max_inv_id;
1234   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1235 }
1236
1237 /* Checks whether the use OP is interesting and if so, records it.  */
1238
1239 static struct iv_use *
1240 find_interesting_uses_op (struct ivopts_data *data, tree op)
1241 {
1242   struct iv *iv;
1243   struct iv *civ;
1244   gimple stmt;
1245   struct iv_use *use;
1246
1247   if (TREE_CODE (op) != SSA_NAME)
1248     return NULL;
1249
1250   iv = get_iv (data, op);
1251   if (!iv)
1252     return NULL;
1253
1254   if (iv->have_use_for)
1255     {
1256       use = iv_use (data, iv->use_id);
1257
1258       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1259       return use;
1260     }
1261
1262   if (integer_zerop (iv->step))
1263     {
1264       record_invariant (data, op, true);
1265       return NULL;
1266     }
1267   iv->have_use_for = true;
1268
1269   civ = XNEW (struct iv);
1270   *civ = *iv;
1271
1272   stmt = SSA_NAME_DEF_STMT (op);
1273   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1274               || is_gimple_assign (stmt));
1275
1276   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1277   iv->use_id = use->id;
1278
1279   return use;
1280 }
1281
1282 /* Given a condition in statement STMT, checks whether it is a compare
1283    of an induction variable and an invariant.  If this is the case,
1284    CONTROL_VAR is set to location of the iv, BOUND to the location of
1285    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1286    induction variable descriptions, and true is returned.  If this is not
1287    the case, CONTROL_VAR and BOUND are set to the arguments of the
1288    condition and false is returned.  */
1289
1290 static bool
1291 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1292                        tree **control_var, tree **bound,
1293                        struct iv **iv_var, struct iv **iv_bound)
1294 {
1295   /* The objects returned when COND has constant operands.  */
1296   static struct iv const_iv;
1297   static tree zero;
1298   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1299   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1300   bool ret = false;
1301
1302   if (gimple_code (stmt) == GIMPLE_COND)
1303     {
1304       op0 = gimple_cond_lhs_ptr (stmt);
1305       op1 = gimple_cond_rhs_ptr (stmt);
1306     }
1307   else
1308     {
1309       op0 = gimple_assign_rhs1_ptr (stmt);
1310       op1 = gimple_assign_rhs2_ptr (stmt);
1311     }
1312
1313   zero = integer_zero_node;
1314   const_iv.step = integer_zero_node;
1315
1316   if (TREE_CODE (*op0) == SSA_NAME)
1317     iv0 = get_iv (data, *op0);
1318   if (TREE_CODE (*op1) == SSA_NAME)
1319     iv1 = get_iv (data, *op1);
1320
1321   /* Exactly one of the compared values must be an iv, and the other one must
1322      be an invariant.  */
1323   if (!iv0 || !iv1)
1324     goto end;
1325
1326   if (integer_zerop (iv0->step))
1327     {
1328       /* Control variable may be on the other side.  */
1329       tmp_op = op0; op0 = op1; op1 = tmp_op;
1330       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1331     }
1332   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1333
1334 end:
1335   if (control_var)
1336     *control_var = op0;;
1337   if (iv_var)
1338     *iv_var = iv0;;
1339   if (bound)
1340     *bound = op1;
1341   if (iv_bound)
1342     *iv_bound = iv1;
1343
1344   return ret;
1345 }
1346
1347 /* Checks whether the condition in STMT is interesting and if so,
1348    records it.  */
1349
1350 static void
1351 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1352 {
1353   tree *var_p, *bound_p;
1354   struct iv *var_iv, *civ;
1355
1356   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1357     {
1358       find_interesting_uses_op (data, *var_p);
1359       find_interesting_uses_op (data, *bound_p);
1360       return;
1361     }
1362
1363   civ = XNEW (struct iv);
1364   *civ = *var_iv;
1365   record_use (data, NULL, civ, stmt, USE_COMPARE);
1366 }
1367
1368 /* Returns true if expression EXPR is obviously invariant in LOOP,
1369    i.e. if all its operands are defined outside of the LOOP.  LOOP
1370    should not be the function body.  */
1371
1372 bool
1373 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1374 {
1375   basic_block def_bb;
1376   unsigned i, len;
1377
1378   gcc_assert (loop_depth (loop) > 0);
1379
1380   if (is_gimple_min_invariant (expr))
1381     return true;
1382
1383   if (TREE_CODE (expr) == SSA_NAME)
1384     {
1385       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1386       if (def_bb
1387           && flow_bb_inside_loop_p (loop, def_bb))
1388         return false;
1389
1390       return true;
1391     }
1392
1393   if (!EXPR_P (expr))
1394     return false;
1395
1396   len = TREE_OPERAND_LENGTH (expr);
1397   for (i = 0; i < len; i++)
1398     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1399       return false;
1400
1401   return true;
1402 }
1403
1404 /* Returns true if statement STMT is obviously invariant in LOOP,
1405    i.e. if all its operands on the RHS are defined outside of the LOOP.
1406    LOOP should not be the function body.  */
1407
1408 bool
1409 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1410 {
1411   unsigned i;
1412   tree lhs;
1413
1414   gcc_assert (loop_depth (loop) > 0);
1415
1416   lhs = gimple_get_lhs (stmt);
1417   for (i = 0; i < gimple_num_ops (stmt); i++)
1418     {
1419       tree op = gimple_op (stmt, i);
1420       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1421         return false;
1422     }
1423
1424   return true;
1425 }
1426
1427 /* Cumulates the steps of indices into DATA and replaces their values with the
1428    initial ones.  Returns false when the value of the index cannot be determined.
1429    Callback for for_each_index.  */
1430
1431 struct ifs_ivopts_data
1432 {
1433   struct ivopts_data *ivopts_data;
1434   gimple stmt;
1435   tree step;
1436 };
1437
1438 static bool
1439 idx_find_step (tree base, tree *idx, void *data)
1440 {
1441   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1442   struct iv *iv;
1443   tree step, iv_base, iv_step, lbound, off;
1444   struct loop *loop = dta->ivopts_data->current_loop;
1445
1446   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1447     return false;
1448
1449   /* If base is a component ref, require that the offset of the reference
1450      be invariant.  */
1451   if (TREE_CODE (base) == COMPONENT_REF)
1452     {
1453       off = component_ref_field_offset (base);
1454       return expr_invariant_in_loop_p (loop, off);
1455     }
1456
1457   /* If base is array, first check whether we will be able to move the
1458      reference out of the loop (in order to take its address in strength
1459      reduction).  In order for this to work we need both lower bound
1460      and step to be loop invariants.  */
1461   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1462     {
1463       /* Moreover, for a range, the size needs to be invariant as well.  */
1464       if (TREE_CODE (base) == ARRAY_RANGE_REF
1465           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1466         return false;
1467
1468       step = array_ref_element_size (base);
1469       lbound = array_ref_low_bound (base);
1470
1471       if (!expr_invariant_in_loop_p (loop, step)
1472           || !expr_invariant_in_loop_p (loop, lbound))
1473         return false;
1474     }
1475
1476   if (TREE_CODE (*idx) != SSA_NAME)
1477     return true;
1478
1479   iv = get_iv (dta->ivopts_data, *idx);
1480   if (!iv)
1481     return false;
1482
1483   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1484           *&x[0], which is not folded and does not trigger the
1485           ARRAY_REF path below.  */
1486   *idx = iv->base;
1487
1488   if (integer_zerop (iv->step))
1489     return true;
1490
1491   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1492     {
1493       step = array_ref_element_size (base);
1494
1495       /* We only handle addresses whose step is an integer constant.  */
1496       if (TREE_CODE (step) != INTEGER_CST)
1497         return false;
1498     }
1499   else
1500     /* The step for pointer arithmetics already is 1 byte.  */
1501     step = size_one_node;
1502
1503   iv_base = iv->base;
1504   iv_step = iv->step;
1505   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1506                             sizetype, &iv_base, &iv_step, dta->stmt,
1507                             false))
1508     {
1509       /* The index might wrap.  */
1510       return false;
1511     }
1512
1513   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1514   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1515
1516   return true;
1517 }
1518
1519 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1520    object is passed to it in DATA.  */
1521
1522 static bool
1523 idx_record_use (tree base, tree *idx,
1524                 void *vdata)
1525 {
1526   struct ivopts_data *data = (struct ivopts_data *) vdata;
1527   find_interesting_uses_op (data, *idx);
1528   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1529     {
1530       find_interesting_uses_op (data, array_ref_element_size (base));
1531       find_interesting_uses_op (data, array_ref_low_bound (base));
1532     }
1533   return true;
1534 }
1535
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537    store cst to MUL and return true.  Otherwise return false.
1538    The returned value is always sign-extended, regardless of the
1539    signedness of TOP and BOT.  */
1540
1541 static bool
1542 constant_multiple_of (tree top, tree bot, double_int *mul)
1543 {
1544   tree mby;
1545   enum tree_code code;
1546   double_int res, p0, p1;
1547   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1548
1549   STRIP_NOPS (top);
1550   STRIP_NOPS (bot);
1551
1552   if (operand_equal_p (top, bot, 0))
1553     {
1554       *mul = double_int_one;
1555       return true;
1556     }
1557
1558   code = TREE_CODE (top);
1559   switch (code)
1560     {
1561     case MULT_EXPR:
1562       mby = TREE_OPERAND (top, 1);
1563       if (TREE_CODE (mby) != INTEGER_CST)
1564         return false;
1565
1566       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1567         return false;
1568
1569       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1570                               precision);
1571       return true;
1572
1573     case PLUS_EXPR:
1574     case MINUS_EXPR:
1575       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1576           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1577         return false;
1578
1579       if (code == MINUS_EXPR)
1580         p1 = double_int_neg (p1);
1581       *mul = double_int_sext (double_int_add (p0, p1), precision);
1582       return true;
1583
1584     case INTEGER_CST:
1585       if (TREE_CODE (bot) != INTEGER_CST)
1586         return false;
1587
1588       p0 = double_int_sext (tree_to_double_int (top), precision);
1589       p1 = double_int_sext (tree_to_double_int (bot), precision);
1590       if (double_int_zero_p (p1))
1591         return false;
1592       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1593                               precision);
1594       return double_int_zero_p (res);
1595
1596     default:
1597       return false;
1598     }
1599 }
1600
1601 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1602
1603 static bool
1604 may_be_unaligned_p (tree ref, tree step)
1605 {
1606   tree base;
1607   tree base_type;
1608   HOST_WIDE_INT bitsize;
1609   HOST_WIDE_INT bitpos;
1610   tree toffset;
1611   enum machine_mode mode;
1612   int unsignedp, volatilep;
1613   unsigned base_align;
1614
1615   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1616      thus they are not misaligned.  */
1617   if (TREE_CODE (ref) == TARGET_MEM_REF)
1618     return false;
1619
1620   /* The test below is basically copy of what expr.c:normal_inner_ref
1621      does to check whether the object must be loaded by parts when
1622      STRICT_ALIGNMENT is true.  */
1623   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1624                               &unsignedp, &volatilep, true);
1625   base_type = TREE_TYPE (base);
1626   base_align = TYPE_ALIGN (base_type);
1627
1628   if (mode != BLKmode)
1629     {
1630       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1631
1632       if (base_align < mode_align
1633           || (bitpos % mode_align) != 0
1634           || (bitpos % BITS_PER_UNIT) != 0)
1635         return true;
1636
1637       if (toffset
1638           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1639         return true;
1640
1641       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1642         return true;
1643     }
1644
1645   return false;
1646 }
1647
1648 /* Return true if EXPR may be non-addressable.   */
1649
1650 bool
1651 may_be_nonaddressable_p (tree expr)
1652 {
1653   switch (TREE_CODE (expr))
1654     {
1655     case TARGET_MEM_REF:
1656       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1657          target, thus they are always addressable.  */
1658       return false;
1659
1660     case COMPONENT_REF:
1661       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1662              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1663
1664     case VIEW_CONVERT_EXPR:
1665       /* This kind of view-conversions may wrap non-addressable objects
1666          and make them look addressable.  After some processing the
1667          non-addressability may be uncovered again, causing ADDR_EXPRs
1668          of inappropriate objects to be built.  */
1669       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1670           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1671         return true;
1672
1673       /* ... fall through ... */
1674
1675     case ARRAY_REF:
1676     case ARRAY_RANGE_REF:
1677       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1678
1679     CASE_CONVERT:
1680       return true;
1681
1682     default:
1683       break;
1684     }
1685
1686   return false;
1687 }
1688
1689 /* Finds addresses in *OP_P inside STMT.  */
1690
1691 static void
1692 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1693 {
1694   tree base = *op_p, step = size_zero_node;
1695   struct iv *civ;
1696   struct ifs_ivopts_data ifs_ivopts_data;
1697
1698   /* Do not play with volatile memory references.  A bit too conservative,
1699      perhaps, but safe.  */
1700   if (gimple_has_volatile_ops (stmt))
1701     goto fail;
1702
1703   /* Ignore bitfields for now.  Not really something terribly complicated
1704      to handle.  TODO.  */
1705   if (TREE_CODE (base) == BIT_FIELD_REF)
1706     goto fail;
1707
1708   base = unshare_expr (base);
1709
1710   if (TREE_CODE (base) == TARGET_MEM_REF)
1711     {
1712       tree type = build_pointer_type (TREE_TYPE (base));
1713       tree astep;
1714
1715       if (TMR_BASE (base)
1716           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1717         {
1718           civ = get_iv (data, TMR_BASE (base));
1719           if (!civ)
1720             goto fail;
1721
1722           TMR_BASE (base) = civ->base;
1723           step = civ->step;
1724         }
1725       if (TMR_INDEX2 (base)
1726           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1727         {
1728           civ = get_iv (data, TMR_INDEX2 (base));
1729           if (!civ)
1730             goto fail;
1731
1732           TMR_INDEX2 (base) = civ->base;
1733           step = civ->step;
1734         }
1735       if (TMR_INDEX (base)
1736           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1737         {
1738           civ = get_iv (data, TMR_INDEX (base));
1739           if (!civ)
1740             goto fail;
1741
1742           TMR_INDEX (base) = civ->base;
1743           astep = civ->step;
1744
1745           if (astep)
1746             {
1747               if (TMR_STEP (base))
1748                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1749
1750               step = fold_build2 (PLUS_EXPR, type, step, astep);
1751             }
1752         }
1753
1754       if (integer_zerop (step))
1755         goto fail;
1756       base = tree_mem_ref_addr (type, base);
1757     }
1758   else
1759     {
1760       ifs_ivopts_data.ivopts_data = data;
1761       ifs_ivopts_data.stmt = stmt;
1762       ifs_ivopts_data.step = size_zero_node;
1763       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1764           || integer_zerop (ifs_ivopts_data.step))
1765         goto fail;
1766       step = ifs_ivopts_data.step;
1767
1768       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1769
1770       /* Check that the base expression is addressable.  This needs
1771          to be done after substituting bases of IVs into it.  */
1772       if (may_be_nonaddressable_p (base))
1773         goto fail;
1774
1775       /* Moreover, on strict alignment platforms, check that it is
1776          sufficiently aligned.  */
1777       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1778         goto fail;
1779
1780       base = build_fold_addr_expr (base);
1781
1782       /* Substituting bases of IVs into the base expression might
1783          have caused folding opportunities.  */
1784       if (TREE_CODE (base) == ADDR_EXPR)
1785         {
1786           tree *ref = &TREE_OPERAND (base, 0);
1787           while (handled_component_p (*ref))
1788             ref = &TREE_OPERAND (*ref, 0);
1789           if (TREE_CODE (*ref) == MEM_REF)
1790             {
1791               tree tem = TREE_OPERAND (*ref, 0);
1792               STRIP_NOPS (tem);
1793               if (tem != TREE_OPERAND (*ref, 0))
1794                 tem = fold_build2 (MEM_REF, TREE_TYPE (*ref),
1795                                    tem, TREE_OPERAND (*ref, 1));
1796               else
1797                 tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1798                                    tem, TREE_OPERAND (*ref, 1));
1799               if (tem)
1800                 *ref = tem;
1801             }
1802         }
1803     }
1804
1805   civ = alloc_iv (base, step);
1806   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1807   return;
1808
1809 fail:
1810   for_each_index (op_p, idx_record_use, data);
1811 }
1812
1813 /* Finds and records invariants used in STMT.  */
1814
1815 static void
1816 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1817 {
1818   ssa_op_iter iter;
1819   use_operand_p use_p;
1820   tree op;
1821
1822   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1823     {
1824       op = USE_FROM_PTR (use_p);
1825       record_invariant (data, op, false);
1826     }
1827 }
1828
1829 /* Finds interesting uses of induction variables in the statement STMT.  */
1830
1831 static void
1832 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1833 {
1834   struct iv *iv;
1835   tree op, *lhs, *rhs;
1836   ssa_op_iter iter;
1837   use_operand_p use_p;
1838   enum tree_code code;
1839
1840   find_invariants_stmt (data, stmt);
1841
1842   if (gimple_code (stmt) == GIMPLE_COND)
1843     {
1844       find_interesting_uses_cond (data, stmt);
1845       return;
1846     }
1847
1848   if (is_gimple_assign (stmt))
1849     {
1850       lhs = gimple_assign_lhs_ptr (stmt);
1851       rhs = gimple_assign_rhs1_ptr (stmt);
1852
1853       if (TREE_CODE (*lhs) == SSA_NAME)
1854         {
1855           /* If the statement defines an induction variable, the uses are not
1856              interesting by themselves.  */
1857
1858           iv = get_iv (data, *lhs);
1859
1860           if (iv && !integer_zerop (iv->step))
1861             return;
1862         }
1863
1864       code = gimple_assign_rhs_code (stmt);
1865       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1866           && (REFERENCE_CLASS_P (*rhs)
1867               || is_gimple_val (*rhs)))
1868         {
1869           if (REFERENCE_CLASS_P (*rhs))
1870             find_interesting_uses_address (data, stmt, rhs);
1871           else
1872             find_interesting_uses_op (data, *rhs);
1873
1874           if (REFERENCE_CLASS_P (*lhs))
1875             find_interesting_uses_address (data, stmt, lhs);
1876           return;
1877         }
1878       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1879         {
1880           find_interesting_uses_cond (data, stmt);
1881           return;
1882         }
1883
1884       /* TODO -- we should also handle address uses of type
1885
1886          memory = call (whatever);
1887
1888          and
1889
1890          call (memory).  */
1891     }
1892
1893   if (gimple_code (stmt) == GIMPLE_PHI
1894       && gimple_bb (stmt) == data->current_loop->header)
1895     {
1896       iv = get_iv (data, PHI_RESULT (stmt));
1897
1898       if (iv && !integer_zerop (iv->step))
1899         return;
1900     }
1901
1902   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1903     {
1904       op = USE_FROM_PTR (use_p);
1905
1906       if (TREE_CODE (op) != SSA_NAME)
1907         continue;
1908
1909       iv = get_iv (data, op);
1910       if (!iv)
1911         continue;
1912
1913       find_interesting_uses_op (data, op);
1914     }
1915 }
1916
1917 /* Finds interesting uses of induction variables outside of loops
1918    on loop exit edge EXIT.  */
1919
1920 static void
1921 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1922 {
1923   gimple phi;
1924   gimple_stmt_iterator psi;
1925   tree def;
1926
1927   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1928     {
1929       phi = gsi_stmt (psi);
1930       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1931       if (is_gimple_reg (def))
1932         find_interesting_uses_op (data, def);
1933     }
1934 }
1935
1936 /* Finds uses of the induction variables that are interesting.  */
1937
1938 static void
1939 find_interesting_uses (struct ivopts_data *data)
1940 {
1941   basic_block bb;
1942   gimple_stmt_iterator bsi;
1943   basic_block *body = get_loop_body (data->current_loop);
1944   unsigned i;
1945   struct version_info *info;
1946   edge e;
1947
1948   if (dump_file && (dump_flags & TDF_DETAILS))
1949     fprintf (dump_file, "Uses:\n\n");
1950
1951   for (i = 0; i < data->current_loop->num_nodes; i++)
1952     {
1953       edge_iterator ei;
1954       bb = body[i];
1955
1956       FOR_EACH_EDGE (e, ei, bb->succs)
1957         if (e->dest != EXIT_BLOCK_PTR
1958             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1959           find_interesting_uses_outside (data, e);
1960
1961       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1962         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1963       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1964         if (!is_gimple_debug (gsi_stmt (bsi)))
1965           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1966     }
1967
1968   if (dump_file && (dump_flags & TDF_DETAILS))
1969     {
1970       bitmap_iterator bi;
1971
1972       fprintf (dump_file, "\n");
1973
1974       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1975         {
1976           info = ver_info (data, i);
1977           if (info->inv_id)
1978             {
1979               fprintf (dump_file, "  ");
1980               print_generic_expr (dump_file, info->name, TDF_SLIM);
1981               fprintf (dump_file, " is invariant (%d)%s\n",
1982                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1983             }
1984         }
1985
1986       fprintf (dump_file, "\n");
1987     }
1988
1989   free (body);
1990 }
1991
1992 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1993    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1994    we are at the top-level of the processed address.  */
1995
1996 static tree
1997 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1998                 unsigned HOST_WIDE_INT *offset)
1999 {
2000   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2001   enum tree_code code;
2002   tree type, orig_type = TREE_TYPE (expr);
2003   unsigned HOST_WIDE_INT off0, off1, st;
2004   tree orig_expr = expr;
2005
2006   STRIP_NOPS (expr);
2007
2008   type = TREE_TYPE (expr);
2009   code = TREE_CODE (expr);
2010   *offset = 0;
2011
2012   switch (code)
2013     {
2014     case INTEGER_CST:
2015       if (!cst_and_fits_in_hwi (expr)
2016           || integer_zerop (expr))
2017         return orig_expr;
2018
2019       *offset = int_cst_value (expr);
2020       return build_int_cst (orig_type, 0);
2021
2022     case POINTER_PLUS_EXPR:
2023     case PLUS_EXPR:
2024     case MINUS_EXPR:
2025       op0 = TREE_OPERAND (expr, 0);
2026       op1 = TREE_OPERAND (expr, 1);
2027
2028       op0 = strip_offset_1 (op0, false, false, &off0);
2029       op1 = strip_offset_1 (op1, false, false, &off1);
2030
2031       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2032       if (op0 == TREE_OPERAND (expr, 0)
2033           && op1 == TREE_OPERAND (expr, 1))
2034         return orig_expr;
2035
2036       if (integer_zerop (op1))
2037         expr = op0;
2038       else if (integer_zerop (op0))
2039         {
2040           if (code == MINUS_EXPR)
2041             expr = fold_build1 (NEGATE_EXPR, type, op1);
2042           else
2043             expr = op1;
2044         }
2045       else
2046         expr = fold_build2 (code, type, op0, op1);
2047
2048       return fold_convert (orig_type, expr);
2049
2050     case MULT_EXPR:
2051       op1 = TREE_OPERAND (expr, 1);
2052       if (!cst_and_fits_in_hwi (op1))
2053         return orig_expr;
2054
2055       op0 = TREE_OPERAND (expr, 0);
2056       op0 = strip_offset_1 (op0, false, false, &off0);
2057       if (op0 == TREE_OPERAND (expr, 0))
2058         return orig_expr;
2059
2060       *offset = off0 * int_cst_value (op1);
2061       if (integer_zerop (op0))
2062         expr = op0;
2063       else
2064         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2065
2066       return fold_convert (orig_type, expr);
2067
2068     case ARRAY_REF:
2069     case ARRAY_RANGE_REF:
2070       if (!inside_addr)
2071         return orig_expr;
2072
2073       step = array_ref_element_size (expr);
2074       if (!cst_and_fits_in_hwi (step))
2075         break;
2076
2077       st = int_cst_value (step);
2078       op1 = TREE_OPERAND (expr, 1);
2079       op1 = strip_offset_1 (op1, false, false, &off1);
2080       *offset = off1 * st;
2081
2082       if (top_compref
2083           && integer_zerop (op1))
2084         {
2085           /* Strip the component reference completely.  */
2086           op0 = TREE_OPERAND (expr, 0);
2087           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2088           *offset += off0;
2089           return op0;
2090         }
2091       break;
2092
2093     case COMPONENT_REF:
2094       if (!inside_addr)
2095         return orig_expr;
2096
2097       tmp = component_ref_field_offset (expr);
2098       if (top_compref
2099           && cst_and_fits_in_hwi (tmp))
2100         {
2101           /* Strip the component reference completely.  */
2102           op0 = TREE_OPERAND (expr, 0);
2103           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2104           *offset = off0 + int_cst_value (tmp);
2105           return op0;
2106         }
2107       break;
2108
2109     case ADDR_EXPR:
2110       op0 = TREE_OPERAND (expr, 0);
2111       op0 = strip_offset_1 (op0, true, true, &off0);
2112       *offset += off0;
2113
2114       if (op0 == TREE_OPERAND (expr, 0))
2115         return orig_expr;
2116
2117       expr = build_fold_addr_expr (op0);
2118       return fold_convert (orig_type, expr);
2119
2120     case MEM_REF:
2121       /* ???  Offset operand?  */
2122       inside_addr = false;
2123       break;
2124
2125     default:
2126       return orig_expr;
2127     }
2128
2129   /* Default handling of expressions for that we want to recurse into
2130      the first operand.  */
2131   op0 = TREE_OPERAND (expr, 0);
2132   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2133   *offset += off0;
2134
2135   if (op0 == TREE_OPERAND (expr, 0)
2136       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2137     return orig_expr;
2138
2139   expr = copy_node (expr);
2140   TREE_OPERAND (expr, 0) = op0;
2141   if (op1)
2142     TREE_OPERAND (expr, 1) = op1;
2143
2144   /* Inside address, we might strip the top level component references,
2145      thus changing type of the expression.  Handling of ADDR_EXPR
2146      will fix that.  */
2147   expr = fold_convert (orig_type, expr);
2148
2149   return expr;
2150 }
2151
2152 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2153
2154 static tree
2155 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2156 {
2157   return strip_offset_1 (expr, false, false, offset);
2158 }
2159
2160 /* Returns variant of TYPE that can be used as base for different uses.
2161    We return unsigned type with the same precision, which avoids problems
2162    with overflows.  */
2163
2164 static tree
2165 generic_type_for (tree type)
2166 {
2167   if (POINTER_TYPE_P (type))
2168     return unsigned_type_for (type);
2169
2170   if (TYPE_UNSIGNED (type))
2171     return type;
2172
2173   return unsigned_type_for (type);
2174 }
2175
2176 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2177    the bitmap to that we should store it.  */
2178
2179 static struct ivopts_data *fd_ivopts_data;
2180 static tree
2181 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2182 {
2183   bitmap *depends_on = (bitmap *) data;
2184   struct version_info *info;
2185
2186   if (TREE_CODE (*expr_p) != SSA_NAME)
2187     return NULL_TREE;
2188   info = name_info (fd_ivopts_data, *expr_p);
2189
2190   if (!info->inv_id || info->has_nonlin_use)
2191     return NULL_TREE;
2192
2193   if (!*depends_on)
2194     *depends_on = BITMAP_ALLOC (NULL);
2195   bitmap_set_bit (*depends_on, info->inv_id);
2196
2197   return NULL_TREE;
2198 }
2199
2200 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2201    position to POS.  If USE is not NULL, the candidate is set as related to
2202    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2203    replacement of the final value of the iv by a direct computation.  */
2204
2205 static struct iv_cand *
2206 add_candidate_1 (struct ivopts_data *data,
2207                  tree base, tree step, bool important, enum iv_position pos,
2208                  struct iv_use *use, gimple incremented_at)
2209 {
2210   unsigned i;
2211   struct iv_cand *cand = NULL;
2212   tree type, orig_type;
2213
2214   if (base)
2215     {
2216       orig_type = TREE_TYPE (base);
2217       type = generic_type_for (orig_type);
2218       if (type != orig_type)
2219         {
2220           base = fold_convert (type, base);
2221           step = fold_convert (type, step);
2222         }
2223     }
2224
2225   for (i = 0; i < n_iv_cands (data); i++)
2226     {
2227       cand = iv_cand (data, i);
2228
2229       if (cand->pos != pos)
2230         continue;
2231
2232       if (cand->incremented_at != incremented_at
2233           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2234               && cand->ainc_use != use))
2235         continue;
2236
2237       if (!cand->iv)
2238         {
2239           if (!base && !step)
2240             break;
2241
2242           continue;
2243         }
2244
2245       if (!base && !step)
2246         continue;
2247
2248       if (operand_equal_p (base, cand->iv->base, 0)
2249           && operand_equal_p (step, cand->iv->step, 0)
2250           && (TYPE_PRECISION (TREE_TYPE (base))
2251               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2252         break;
2253     }
2254
2255   if (i == n_iv_cands (data))
2256     {
2257       cand = XCNEW (struct iv_cand);
2258       cand->id = i;
2259
2260       if (!base && !step)
2261         cand->iv = NULL;
2262       else
2263         cand->iv = alloc_iv (base, step);
2264
2265       cand->pos = pos;
2266       if (pos != IP_ORIGINAL && cand->iv)
2267         {
2268           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2269           cand->var_after = cand->var_before;
2270         }
2271       cand->important = important;
2272       cand->incremented_at = incremented_at;
2273       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2274
2275       if (step
2276           && TREE_CODE (step) != INTEGER_CST)
2277         {
2278           fd_ivopts_data = data;
2279           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2280         }
2281
2282       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2283         cand->ainc_use = use;
2284       else
2285         cand->ainc_use = NULL;
2286
2287       if (dump_file && (dump_flags & TDF_DETAILS))
2288         dump_cand (dump_file, cand);
2289     }
2290
2291   if (important && !cand->important)
2292     {
2293       cand->important = true;
2294       if (dump_file && (dump_flags & TDF_DETAILS))
2295         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2296     }
2297
2298   if (use)
2299     {
2300       bitmap_set_bit (use->related_cands, i);
2301       if (dump_file && (dump_flags & TDF_DETAILS))
2302         fprintf (dump_file, "Candidate %d is related to use %d\n",
2303                  cand->id, use->id);
2304     }
2305
2306   return cand;
2307 }
2308
2309 /* Returns true if incrementing the induction variable at the end of the LOOP
2310    is allowed.
2311
2312    The purpose is to avoid splitting latch edge with a biv increment, thus
2313    creating a jump, possibly confusing other optimization passes and leaving
2314    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2315    is not available (so we do not have a better alternative), or if the latch
2316    edge is already nonempty.  */
2317
2318 static bool
2319 allow_ip_end_pos_p (struct loop *loop)
2320 {
2321   if (!ip_normal_pos (loop))
2322     return true;
2323
2324   if (!empty_block_p (ip_end_pos (loop)))
2325     return true;
2326
2327   return false;
2328 }
2329
2330 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2331    Important field is set to IMPORTANT.  */
2332
2333 static void
2334 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2335                         bool important, struct iv_use *use)
2336 {
2337   basic_block use_bb = gimple_bb (use->stmt);
2338   enum machine_mode mem_mode;
2339   unsigned HOST_WIDE_INT cstepi;
2340
2341   /* If we insert the increment in any position other than the standard
2342      ones, we must ensure that it is incremented once per iteration.
2343      It must not be in an inner nested loop, or one side of an if
2344      statement.  */
2345   if (use_bb->loop_father != data->current_loop
2346       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2347       || stmt_could_throw_p (use->stmt)
2348       || !cst_and_fits_in_hwi (step))
2349     return;
2350
2351   cstepi = int_cst_value (step);
2352
2353   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2354   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2355       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2356     {
2357       enum tree_code code = MINUS_EXPR;
2358       tree new_base;
2359       tree new_step = step;
2360
2361       if (POINTER_TYPE_P (TREE_TYPE (base)))
2362         {
2363           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2364           code = POINTER_PLUS_EXPR;
2365         }
2366       else
2367         new_step = fold_convert (TREE_TYPE (base), new_step);
2368       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2369       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2370                        use->stmt);
2371     }
2372   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2373       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2374     {
2375       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2376                        use->stmt);
2377     }
2378 }
2379
2380 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2381    position to POS.  If USE is not NULL, the candidate is set as related to
2382    it.  The candidate computation is scheduled on all available positions.  */
2383
2384 static void
2385 add_candidate (struct ivopts_data *data,
2386                tree base, tree step, bool important, struct iv_use *use)
2387 {
2388   if (ip_normal_pos (data->current_loop))
2389     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2390   if (ip_end_pos (data->current_loop)
2391       && allow_ip_end_pos_p (data->current_loop))
2392     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2393
2394   if (use != NULL && use->type == USE_ADDRESS)
2395     add_autoinc_candidates (data, base, step, important, use);
2396 }
2397
2398 /* Add a standard "0 + 1 * iteration" iv candidate for a
2399    type with SIZE bits.  */
2400
2401 static void
2402 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2403                                      unsigned int size)
2404 {
2405   tree type = lang_hooks.types.type_for_size (size, true);
2406   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2407                  true, NULL);
2408 }
2409
2410 /* Adds standard iv candidates.  */
2411
2412 static void
2413 add_standard_iv_candidates (struct ivopts_data *data)
2414 {
2415   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2416
2417   /* The same for a double-integer type if it is still fast enough.  */
2418   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2419     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2420 }
2421
2422
2423 /* Adds candidates bases on the old induction variable IV.  */
2424
2425 static void
2426 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2427 {
2428   gimple phi;
2429   tree def;
2430   struct iv_cand *cand;
2431
2432   add_candidate (data, iv->base, iv->step, true, NULL);
2433
2434   /* The same, but with initial value zero.  */
2435   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2436     add_candidate (data, size_int (0), iv->step, true, NULL);
2437   else
2438     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2439                    iv->step, true, NULL);
2440
2441   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2442   if (gimple_code (phi) == GIMPLE_PHI)
2443     {
2444       /* Additionally record the possibility of leaving the original iv
2445          untouched.  */
2446       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2447       cand = add_candidate_1 (data,
2448                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2449                               SSA_NAME_DEF_STMT (def));
2450       cand->var_before = iv->ssa_name;
2451       cand->var_after = def;
2452     }
2453 }
2454
2455 /* Adds candidates based on the old induction variables.  */
2456
2457 static void
2458 add_old_ivs_candidates (struct ivopts_data *data)
2459 {
2460   unsigned i;
2461   struct iv *iv;
2462   bitmap_iterator bi;
2463
2464   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2465     {
2466       iv = ver_info (data, i)->iv;
2467       if (iv && iv->biv_p && !integer_zerop (iv->step))
2468         add_old_iv_candidates (data, iv);
2469     }
2470 }
2471
2472 /* Adds candidates based on the value of the induction variable IV and USE.  */
2473
2474 static void
2475 add_iv_value_candidates (struct ivopts_data *data,
2476                          struct iv *iv, struct iv_use *use)
2477 {
2478   unsigned HOST_WIDE_INT offset;
2479   tree base;
2480   tree basetype;
2481
2482   add_candidate (data, iv->base, iv->step, false, use);
2483
2484   /* The same, but with initial value zero.  Make such variable important,
2485      since it is generic enough so that possibly many uses may be based
2486      on it.  */
2487   basetype = TREE_TYPE (iv->base);
2488   if (POINTER_TYPE_P (basetype))
2489     basetype = sizetype;
2490   add_candidate (data, build_int_cst (basetype, 0),
2491                  iv->step, true, use);
2492
2493   /* Third, try removing the constant offset.  Make sure to even
2494      add a candidate for &a[0] vs. (T *)&a.  */
2495   base = strip_offset (iv->base, &offset);
2496   if (offset
2497       || base != iv->base)
2498     add_candidate (data, base, iv->step, false, use);
2499 }
2500
2501 /* Adds candidates based on the uses.  */
2502
2503 static void
2504 add_derived_ivs_candidates (struct ivopts_data *data)
2505 {
2506   unsigned i;
2507
2508   for (i = 0; i < n_iv_uses (data); i++)
2509     {
2510       struct iv_use *use = iv_use (data, i);
2511
2512       if (!use)
2513         continue;
2514
2515       switch (use->type)
2516         {
2517         case USE_NONLINEAR_EXPR:
2518         case USE_COMPARE:
2519         case USE_ADDRESS:
2520           /* Just add the ivs based on the value of the iv used here.  */
2521           add_iv_value_candidates (data, use->iv, use);
2522           break;
2523
2524         default:
2525           gcc_unreachable ();
2526         }
2527     }
2528 }
2529
2530 /* Record important candidates and add them to related_cands bitmaps
2531    if needed.  */
2532
2533 static void
2534 record_important_candidates (struct ivopts_data *data)
2535 {
2536   unsigned i;
2537   struct iv_use *use;
2538
2539   for (i = 0; i < n_iv_cands (data); i++)
2540     {
2541       struct iv_cand *cand = iv_cand (data, i);
2542
2543       if (cand->important)
2544         bitmap_set_bit (data->important_candidates, i);
2545     }
2546
2547   data->consider_all_candidates = (n_iv_cands (data)
2548                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2549
2550   if (data->consider_all_candidates)
2551     {
2552       /* We will not need "related_cands" bitmaps in this case,
2553          so release them to decrease peak memory consumption.  */
2554       for (i = 0; i < n_iv_uses (data); i++)
2555         {
2556           use = iv_use (data, i);
2557           BITMAP_FREE (use->related_cands);
2558         }
2559     }
2560   else
2561     {
2562       /* Add important candidates to the related_cands bitmaps.  */
2563       for (i = 0; i < n_iv_uses (data); i++)
2564         bitmap_ior_into (iv_use (data, i)->related_cands,
2565                          data->important_candidates);
2566     }
2567 }
2568
2569 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2570    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2571    we allocate a simple list to every use.  */
2572
2573 static void
2574 alloc_use_cost_map (struct ivopts_data *data)
2575 {
2576   unsigned i, size, s, j;
2577
2578   for (i = 0; i < n_iv_uses (data); i++)
2579     {
2580       struct iv_use *use = iv_use (data, i);
2581       bitmap_iterator bi;
2582
2583       if (data->consider_all_candidates)
2584         size = n_iv_cands (data);
2585       else
2586         {
2587           s = 0;
2588           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2589             {
2590               s++;
2591             }
2592
2593           /* Round up to the power of two, so that moduling by it is fast.  */
2594           for (size = 1; size < s; size <<= 1)
2595             continue;
2596         }
2597
2598       use->n_map_members = size;
2599       use->cost_map = XCNEWVEC (struct cost_pair, size);
2600     }
2601 }
2602
2603 /* Returns description of computation cost of expression whose runtime
2604    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2605
2606 static comp_cost
2607 new_cost (unsigned runtime, unsigned complexity)
2608 {
2609   comp_cost cost;
2610
2611   cost.cost = runtime;
2612   cost.complexity = complexity;
2613
2614   return cost;
2615 }
2616
2617 /* Adds costs COST1 and COST2.  */
2618
2619 static comp_cost
2620 add_costs (comp_cost cost1, comp_cost cost2)
2621 {
2622   cost1.cost += cost2.cost;
2623   cost1.complexity += cost2.complexity;
2624
2625   return cost1;
2626 }
2627 /* Subtracts costs COST1 and COST2.  */
2628
2629 static comp_cost
2630 sub_costs (comp_cost cost1, comp_cost cost2)
2631 {
2632   cost1.cost -= cost2.cost;
2633   cost1.complexity -= cost2.complexity;
2634
2635   return cost1;
2636 }
2637
2638 /* Returns a negative number if COST1 < COST2, a positive number if
2639    COST1 > COST2, and 0 if COST1 = COST2.  */
2640
2641 static int
2642 compare_costs (comp_cost cost1, comp_cost cost2)
2643 {
2644   if (cost1.cost == cost2.cost)
2645     return cost1.complexity - cost2.complexity;
2646
2647   return cost1.cost - cost2.cost;
2648 }
2649
2650 /* Returns true if COST is infinite.  */
2651
2652 static bool
2653 infinite_cost_p (comp_cost cost)
2654 {
2655   return cost.cost == INFTY;
2656 }
2657
2658 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2659    on invariants DEPENDS_ON and that the value used in expressing it
2660    is VALUE.  */
2661
2662 static void
2663 set_use_iv_cost (struct ivopts_data *data,
2664                  struct iv_use *use, struct iv_cand *cand,
2665                  comp_cost cost, bitmap depends_on, tree value,
2666                  int inv_expr_id)
2667 {
2668   unsigned i, s;
2669
2670   if (infinite_cost_p (cost))
2671     {
2672       BITMAP_FREE (depends_on);
2673       return;
2674     }
2675
2676   if (data->consider_all_candidates)
2677     {
2678       use->cost_map[cand->id].cand = cand;
2679       use->cost_map[cand->id].cost = cost;
2680       use->cost_map[cand->id].depends_on = depends_on;
2681       use->cost_map[cand->id].value = value;
2682       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2683       return;
2684     }
2685
2686   /* n_map_members is a power of two, so this computes modulo.  */
2687   s = cand->id & (use->n_map_members - 1);
2688   for (i = s; i < use->n_map_members; i++)
2689     if (!use->cost_map[i].cand)
2690       goto found;
2691   for (i = 0; i < s; i++)
2692     if (!use->cost_map[i].cand)
2693       goto found;
2694
2695   gcc_unreachable ();
2696
2697 found:
2698   use->cost_map[i].cand = cand;
2699   use->cost_map[i].cost = cost;
2700   use->cost_map[i].depends_on = depends_on;
2701   use->cost_map[i].value = value;
2702   use->cost_map[i].inv_expr_id = inv_expr_id;
2703 }
2704
2705 /* Gets cost of (USE, CANDIDATE) pair.  */
2706
2707 static struct cost_pair *
2708 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2709                  struct iv_cand *cand)
2710 {
2711   unsigned i, s;
2712   struct cost_pair *ret;
2713
2714   if (!cand)
2715     return NULL;
2716
2717   if (data->consider_all_candidates)
2718     {
2719       ret = use->cost_map + cand->id;
2720       if (!ret->cand)
2721         return NULL;
2722
2723       return ret;
2724     }
2725
2726   /* n_map_members is a power of two, so this computes modulo.  */
2727   s = cand->id & (use->n_map_members - 1);
2728   for (i = s; i < use->n_map_members; i++)
2729     if (use->cost_map[i].cand == cand)
2730       return use->cost_map + i;
2731
2732   for (i = 0; i < s; i++)
2733     if (use->cost_map[i].cand == cand)
2734       return use->cost_map + i;
2735
2736   return NULL;
2737 }
2738
2739 /* Returns estimate on cost of computing SEQ.  */
2740
2741 static unsigned
2742 seq_cost (rtx seq, bool speed)
2743 {
2744   unsigned cost = 0;
2745   rtx set;
2746
2747   for (; seq; seq = NEXT_INSN (seq))
2748     {
2749       set = single_set (seq);
2750       if (set)
2751         cost += rtx_cost (set, SET,speed);
2752       else
2753         cost++;
2754     }
2755
2756   return cost;
2757 }
2758
2759 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2760 static rtx
2761 produce_memory_decl_rtl (tree obj, int *regno)
2762 {
2763   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2764   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2765   rtx x;
2766
2767   gcc_assert (obj);
2768   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2769     {
2770       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2771       x = gen_rtx_SYMBOL_REF (address_mode, name);
2772       SET_SYMBOL_REF_DECL (x, obj);
2773       x = gen_rtx_MEM (DECL_MODE (obj), x);
2774       set_mem_addr_space (x, as);
2775       targetm.encode_section_info (obj, x, true);
2776     }
2777   else
2778     {
2779       x = gen_raw_REG (address_mode, (*regno)++);
2780       x = gen_rtx_MEM (DECL_MODE (obj), x);
2781       set_mem_addr_space (x, as);
2782     }
2783
2784   return x;
2785 }
2786
2787 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2788    walk_tree.  DATA contains the actual fake register number.  */
2789
2790 static tree
2791 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2792 {
2793   tree obj = NULL_TREE;
2794   rtx x = NULL_RTX;
2795   int *regno = (int *) data;
2796
2797   switch (TREE_CODE (*expr_p))
2798     {
2799     case ADDR_EXPR:
2800       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2801            handled_component_p (*expr_p);
2802            expr_p = &TREE_OPERAND (*expr_p, 0))
2803         continue;
2804       obj = *expr_p;
2805       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2806         x = produce_memory_decl_rtl (obj, regno);
2807       break;
2808
2809     case SSA_NAME:
2810       *ws = 0;
2811       obj = SSA_NAME_VAR (*expr_p);
2812       if (!DECL_RTL_SET_P (obj))
2813         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2814       break;
2815
2816     case VAR_DECL:
2817     case PARM_DECL:
2818     case RESULT_DECL:
2819       *ws = 0;
2820       obj = *expr_p;
2821
2822       if (DECL_RTL_SET_P (obj))
2823         break;
2824
2825       if (DECL_MODE (obj) == BLKmode)
2826         x = produce_memory_decl_rtl (obj, regno);
2827       else
2828         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2829
2830       break;
2831
2832     default:
2833       break;
2834     }
2835
2836   if (x)
2837     {
2838       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2839       SET_DECL_RTL (obj, x);
2840     }
2841
2842   return NULL_TREE;
2843 }
2844
2845 /* Determines cost of the computation of EXPR.  */
2846
2847 static unsigned
2848 computation_cost (tree expr, bool speed)
2849 {
2850   rtx seq, rslt;
2851   tree type = TREE_TYPE (expr);
2852   unsigned cost;
2853   /* Avoid using hard regs in ways which may be unsupported.  */
2854   int regno = LAST_VIRTUAL_REGISTER + 1;
2855   struct cgraph_node *node = cgraph_node (current_function_decl);
2856   enum node_frequency real_frequency = node->frequency;
2857
2858   node->frequency = NODE_FREQUENCY_NORMAL;
2859   crtl->maybe_hot_insn_p = speed;
2860   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2861   start_sequence ();
2862   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2863   seq = get_insns ();
2864   end_sequence ();
2865   default_rtl_profile ();
2866   node->frequency = real_frequency;
2867
2868   cost = seq_cost (seq, speed);
2869   if (MEM_P (rslt))
2870     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2871                           TYPE_ADDR_SPACE (type), speed);
2872
2873   return cost;
2874 }
2875
2876 /* Returns variable containing the value of candidate CAND at statement AT.  */
2877
2878 static tree
2879 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2880 {
2881   if (stmt_after_increment (loop, cand, stmt))
2882     return cand->var_after;
2883   else
2884     return cand->var_before;
2885 }
2886
2887 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2888    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2889
2890 int
2891 tree_int_cst_sign_bit (const_tree t)
2892 {
2893   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2894   unsigned HOST_WIDE_INT w;
2895
2896   if (bitno < HOST_BITS_PER_WIDE_INT)
2897     w = TREE_INT_CST_LOW (t);
2898   else
2899     {
2900       w = TREE_INT_CST_HIGH (t);
2901       bitno -= HOST_BITS_PER_WIDE_INT;
2902     }
2903
2904   return (w >> bitno) & 1;
2905 }
2906
2907 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2908    same precision that is at least as wide as the precision of TYPE, stores
2909    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2910    type of A and B.  */
2911
2912 static tree
2913 determine_common_wider_type (tree *a, tree *b)
2914 {
2915   tree wider_type = NULL;
2916   tree suba, subb;
2917   tree atype = TREE_TYPE (*a);
2918
2919   if (CONVERT_EXPR_P (*a))
2920     {
2921       suba = TREE_OPERAND (*a, 0);
2922       wider_type = TREE_TYPE (suba);
2923       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2924         return atype;
2925     }
2926   else
2927     return atype;
2928
2929   if (CONVERT_EXPR_P (*b))
2930     {
2931       subb = TREE_OPERAND (*b, 0);
2932       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2933         return atype;
2934     }
2935   else
2936     return atype;
2937
2938   *a = suba;
2939   *b = subb;
2940   return wider_type;
2941 }
2942
2943 /* Determines the expression by that USE is expressed from induction variable
2944    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2945    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2946
2947 static bool
2948 get_computation_aff (struct loop *loop,
2949                      struct iv_use *use, struct iv_cand *cand, gimple at,
2950                      struct affine_tree_combination *aff)
2951 {
2952   tree ubase = use->iv->base;
2953   tree ustep = use->iv->step;
2954   tree cbase = cand->iv->base;
2955   tree cstep = cand->iv->step, cstep_common;
2956   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2957   tree common_type, var;
2958   tree uutype;
2959   aff_tree cbase_aff, var_aff;
2960   double_int rat;
2961
2962   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2963     {
2964       /* We do not have a precision to express the values of use.  */
2965       return false;
2966     }
2967
2968   var = var_at_stmt (loop, cand, at);
2969   uutype = unsigned_type_for (utype);
2970
2971   /* If the conversion is not noop, perform it.  */
2972   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2973     {
2974       cstep = fold_convert (uutype, cstep);
2975       cbase = fold_convert (uutype, cbase);
2976       var = fold_convert (uutype, var);
2977     }
2978
2979   if (!constant_multiple_of (ustep, cstep, &rat))
2980     return false;
2981
2982   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2983      type, we achieve better folding by computing their difference in this
2984      wider type, and cast the result to UUTYPE.  We do not need to worry about
2985      overflows, as all the arithmetics will in the end be performed in UUTYPE
2986      anyway.  */
2987   common_type = determine_common_wider_type (&ubase, &cbase);
2988
2989   /* use = ubase - ratio * cbase + ratio * var.  */
2990   tree_to_aff_combination (ubase, common_type, aff);
2991   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2992   tree_to_aff_combination (var, uutype, &var_aff);
2993
2994   /* We need to shift the value if we are after the increment.  */
2995   if (stmt_after_increment (loop, cand, at))
2996     {
2997       aff_tree cstep_aff;
2998
2999       if (common_type != uutype)
3000         cstep_common = fold_convert (common_type, cstep);
3001       else
3002         cstep_common = cstep;
3003
3004       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3005       aff_combination_add (&cbase_aff, &cstep_aff);
3006     }
3007
3008   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3009   aff_combination_add (aff, &cbase_aff);
3010   if (common_type != uutype)
3011     aff_combination_convert (aff, uutype);
3012
3013   aff_combination_scale (&var_aff, rat);
3014   aff_combination_add (aff, &var_aff);
3015
3016   return true;
3017 }
3018
3019 /* Determines the expression by that USE is expressed from induction variable
3020    CAND at statement AT in LOOP.  The computation is unshared.  */
3021
3022 static tree
3023 get_computation_at (struct loop *loop,
3024                     struct iv_use *use, struct iv_cand *cand, gimple at)
3025 {
3026   aff_tree aff;
3027   tree type = TREE_TYPE (use->iv->base);
3028
3029   if (!get_computation_aff (loop, use, cand, at, &aff))
3030     return NULL_TREE;
3031   unshare_aff_combination (&aff);
3032   return fold_convert (type, aff_combination_to_tree (&aff));
3033 }
3034
3035 /* Determines the expression by that USE is expressed from induction variable
3036    CAND in LOOP.  The computation is unshared.  */
3037
3038 static tree
3039 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3040 {
3041   return get_computation_at (loop, use, cand, use->stmt);
3042 }
3043
3044 /* Adjust the cost COST for being in loop setup rather than loop body.
3045    If we're optimizing for space, the loop setup overhead is constant;
3046    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3047 static unsigned
3048 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3049 {
3050   if (cost == INFTY)
3051     return cost;
3052   else if (optimize_loop_for_speed_p (data->current_loop))
3053     return cost / avg_loop_niter (data->current_loop);
3054   else
3055     return cost;
3056 }
3057
3058 /* Returns cost of addition in MODE.  */
3059
3060 static unsigned
3061 add_cost (enum machine_mode mode, bool speed)
3062 {
3063   static unsigned costs[NUM_MACHINE_MODES];
3064   rtx seq;
3065   unsigned cost;
3066
3067   if (costs[mode])
3068     return costs[mode];
3069
3070   start_sequence ();
3071   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3072                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3073                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3074                  NULL_RTX);
3075   seq = get_insns ();
3076   end_sequence ();
3077
3078   cost = seq_cost (seq, speed);
3079   if (!cost)
3080     cost = 1;
3081
3082   costs[mode] = cost;
3083
3084   if (dump_file && (dump_flags & TDF_DETAILS))
3085     fprintf (dump_file, "Addition in %s costs %d\n",
3086              GET_MODE_NAME (mode), cost);
3087   return cost;
3088 }
3089
3090 /* Entry in a hashtable of already known costs for multiplication.  */
3091 struct mbc_entry
3092 {
3093   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3094   enum machine_mode mode;       /* In mode.  */
3095   unsigned cost;                /* The cost.  */
3096 };
3097
3098 /* Counts hash value for the ENTRY.  */
3099
3100 static hashval_t
3101 mbc_entry_hash (const void *entry)
3102 {
3103   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3104
3105   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3106 }
3107
3108 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3109
3110 static int
3111 mbc_entry_eq (const void *entry1, const void *entry2)
3112 {
3113   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3114   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3115
3116   return (e1->mode == e2->mode
3117           && e1->cst == e2->cst);
3118 }
3119
3120 /* Returns cost of multiplication by constant CST in MODE.  */
3121
3122 unsigned
3123 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3124 {
3125   static htab_t costs;
3126   struct mbc_entry **cached, act;
3127   rtx seq;
3128   unsigned cost;
3129
3130   if (!costs)
3131     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3132
3133   act.mode = mode;
3134   act.cst = cst;
3135   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3136   if (*cached)
3137     return (*cached)->cost;
3138
3139   *cached = XNEW (struct mbc_entry);
3140   (*cached)->mode = mode;
3141   (*cached)->cst = cst;
3142
3143   start_sequence ();
3144   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3145                gen_int_mode (cst, mode), NULL_RTX, 0);
3146   seq = get_insns ();
3147   end_sequence ();
3148
3149   cost = seq_cost (seq, speed);
3150
3151   if (dump_file && (dump_flags & TDF_DETAILS))
3152     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3153              (int) cst, GET_MODE_NAME (mode), cost);
3154
3155   (*cached)->cost = cost;
3156
3157   return cost;
3158 }
3159
3160 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3161    validity for a memory reference accessing memory of mode MODE in
3162    address space AS.  */
3163
3164 DEF_VEC_P (sbitmap);
3165 DEF_VEC_ALLOC_P (sbitmap, heap);
3166
3167 bool
3168 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3169                                  addr_space_t as)
3170 {
3171 #define MAX_RATIO 128
3172   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3173   static VEC (sbitmap, heap) *valid_mult_list;
3174   sbitmap valid_mult;
3175
3176   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3177     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3178
3179   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3180   if (!valid_mult)
3181     {
3182       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3183       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3184       rtx addr;
3185       HOST_WIDE_INT i;
3186
3187       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3188       sbitmap_zero (valid_mult);
3189       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3190       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3191         {
3192           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3193           if (memory_address_addr_space_p (mode, addr, as))
3194             SET_BIT (valid_mult, i + MAX_RATIO);
3195         }
3196
3197       if (dump_file && (dump_flags & TDF_DETAILS))
3198         {
3199           fprintf (dump_file, "  allowed multipliers:");
3200           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3201             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3202               fprintf (dump_file, " %d", (int) i);
3203           fprintf (dump_file, "\n");
3204           fprintf (dump_file, "\n");
3205         }
3206
3207       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3208     }
3209
3210   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3211     return false;
3212
3213   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3214 }
3215
3216 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3217    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3218    variable is omitted.  Compute the cost for a memory reference that accesses
3219    a memory location of mode MEM_MODE in address space AS.
3220
3221    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3222    size of MEM_MODE / RATIO) is available.  To make this determination, we
3223    look at the size of the increment to be made, which is given in CSTEP.
3224    CSTEP may be zero if the step is unknown.
3225    STMT_AFTER_INC is true iff the statement we're looking at is after the
3226    increment of the original biv.
3227
3228    TODO -- there must be some better way.  This all is quite crude.  */
3229
3230 typedef struct
3231 {
3232   HOST_WIDE_INT min_offset, max_offset;
3233   unsigned costs[2][2][2][2];
3234 } *address_cost_data;
3235
3236 DEF_VEC_P (address_cost_data);
3237 DEF_VEC_ALLOC_P (address_cost_data, heap);
3238
3239 static comp_cost
3240 get_address_cost (bool symbol_present, bool var_present,
3241                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3242                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3243                   addr_space_t as, bool speed,
3244                   bool stmt_after_inc, bool *may_autoinc)
3245 {
3246   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3247   static VEC(address_cost_data, heap) *address_cost_data_list;
3248   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3249   address_cost_data data;
3250   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3251   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3252   unsigned cost, acost, complexity;
3253   bool offset_p, ratio_p, autoinc;
3254   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3255   unsigned HOST_WIDE_INT mask;
3256   unsigned bits;
3257
3258   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3259     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3260                            data_index + 1);
3261
3262   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3263   if (!data)
3264     {
3265       HOST_WIDE_INT i;
3266       HOST_WIDE_INT rat, off = 0;
3267       int old_cse_not_expected, width;
3268       unsigned sym_p, var_p, off_p, rat_p, add_c;
3269       rtx seq, addr, base;
3270       rtx reg0, reg1;
3271
3272       data = (address_cost_data) xcalloc (1, sizeof (*data));
3273
3274       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3275
3276       width = GET_MODE_BITSIZE (address_mode) - 1;
3277       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3278         width = HOST_BITS_PER_WIDE_INT - 1;
3279       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3280
3281       for (i = width; i >= 0; i--)
3282         {
3283           off = -((HOST_WIDE_INT) 1 << i);
3284           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3285           if (memory_address_addr_space_p (mem_mode, addr, as))
3286             break;
3287         }
3288       data->min_offset = (i == -1? 0 : off);
3289
3290       for (i = width; i >= 0; i--)
3291         {
3292           off = ((HOST_WIDE_INT) 1 << i) - 1;
3293           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3294           if (memory_address_addr_space_p (mem_mode, addr, as))
3295             break;
3296         }
3297       if (i == -1)
3298         off = 0;
3299       data->max_offset = off;
3300
3301       if (dump_file && (dump_flags & TDF_DETAILS))
3302         {
3303           fprintf (dump_file, "get_address_cost:\n");
3304           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3305                    GET_MODE_NAME (mem_mode),
3306                    data->min_offset);
3307           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3308                    GET_MODE_NAME (mem_mode),
3309                    data->max_offset);
3310         }
3311
3312       rat = 1;
3313       for (i = 2; i <= MAX_RATIO; i++)
3314         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3315           {
3316             rat = i;
3317             break;
3318           }
3319
3320       /* Compute the cost of various addressing modes.  */
3321       acost = 0;
3322       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3323       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3324
3325       if (HAVE_PRE_DECREMENT)
3326         {
3327           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3328           has_predec[mem_mode]
3329             = memory_address_addr_space_p (mem_mode, addr, as);
3330         }
3331       if (HAVE_POST_DECREMENT)
3332         {
3333           addr = gen_rtx_POST_DEC (address_mode, reg0);
3334           has_postdec[mem_mode]
3335             = memory_address_addr_space_p (mem_mode, addr, as);
3336         }
3337       if (HAVE_PRE_INCREMENT)
3338         {
3339           addr = gen_rtx_PRE_INC (address_mode, reg0);
3340           has_preinc[mem_mode]
3341             = memory_address_addr_space_p (mem_mode, addr, as);
3342         }
3343       if (HAVE_POST_INCREMENT)
3344         {
3345           addr = gen_rtx_POST_INC (address_mode, reg0);
3346           has_postinc[mem_mode]
3347             = memory_address_addr_space_p (mem_mode, addr, as);
3348         }
3349       for (i = 0; i < 16; i++)
3350         {
3351           sym_p = i & 1;
3352           var_p = (i >> 1) & 1;
3353           off_p = (i >> 2) & 1;
3354           rat_p = (i >> 3) & 1;
3355
3356           addr = reg0;
3357           if (rat_p)
3358             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3359                                    gen_int_mode (rat, address_mode));
3360
3361           if (var_p)
3362             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3363
3364           if (sym_p)
3365             {
3366               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3367               /* ??? We can run into trouble with some backends by presenting
3368                  it with symbols which haven't been properly passed through
3369                  targetm.encode_section_info.  By setting the local bit, we
3370                  enhance the probability of things working.  */
3371               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3372
3373               if (off_p)
3374                 base = gen_rtx_fmt_e (CONST, address_mode,
3375                                       gen_rtx_fmt_ee
3376                                         (PLUS, address_mode, base,
3377                                          gen_int_mode (off, address_mode)));
3378             }
3379           else if (off_p)
3380             base = gen_int_mode (off, address_mode);
3381           else
3382             base = NULL_RTX;
3383
3384           if (base)
3385             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3386
3387           start_sequence ();
3388           /* To avoid splitting addressing modes, pretend that no cse will
3389              follow.  */
3390           old_cse_not_expected = cse_not_expected;
3391           cse_not_expected = true;
3392           addr = memory_address_addr_space (mem_mode, addr, as);
3393           cse_not_expected = old_cse_not_expected;
3394           seq = get_insns ();
3395           end_sequence ();
3396
3397           acost = seq_cost (seq, speed);
3398           acost += address_cost (addr, mem_mode, as, speed);
3399
3400           if (!acost)
3401             acost = 1;
3402           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3403         }
3404
3405       /* On some targets, it is quite expensive to load symbol to a register,
3406          which makes addresses that contain symbols look much more expensive.
3407          However, the symbol will have to be loaded in any case before the
3408          loop (and quite likely we have it in register already), so it does not
3409          make much sense to penalize them too heavily.  So make some final
3410          tweaks for the SYMBOL_PRESENT modes:
3411
3412          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3413          var is cheaper, use this mode with small penalty.
3414          If VAR_PRESENT is true, try whether the mode with
3415          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3416          if this is the case, use it.  */
3417       add_c = add_cost (address_mode, speed);
3418       for (i = 0; i < 8; i++)
3419         {
3420           var_p = i & 1;
3421           off_p = (i >> 1) & 1;
3422           rat_p = (i >> 2) & 1;
3423
3424           acost = data->costs[0][1][off_p][rat_p] + 1;
3425           if (var_p)
3426             acost += add_c;
3427
3428           if (acost < data->costs[1][var_p][off_p][rat_p])
3429             data->costs[1][var_p][off_p][rat_p] = acost;
3430         }
3431
3432       if (dump_file && (dump_flags & TDF_DETAILS))
3433         {
3434           fprintf (dump_file, "Address costs:\n");
3435
3436           for (i = 0; i < 16; i++)
3437             {
3438               sym_p = i & 1;
3439               var_p = (i >> 1) & 1;
3440               off_p = (i >> 2) & 1;
3441               rat_p = (i >> 3) & 1;
3442
3443               fprintf (dump_file, "  ");
3444               if (sym_p)
3445                 fprintf (dump_file, "sym + ");
3446               if (var_p)
3447                 fprintf (dump_file, "var + ");
3448               if (off_p)
3449                 fprintf (dump_file, "cst + ");
3450               if (rat_p)
3451                 fprintf (dump_file, "rat * ");
3452
3453               acost = data->costs[sym_p][var_p][off_p][rat_p];
3454               fprintf (dump_file, "index costs %d\n", acost);
3455             }
3456           if (has_predec[mem_mode] || has_postdec[mem_mode]
3457               || has_preinc[mem_mode] || has_postinc[mem_mode])
3458             fprintf (dump_file, "  May include autoinc/dec\n");
3459           fprintf (dump_file, "\n");
3460         }
3461
3462       VEC_replace (address_cost_data, address_cost_data_list,
3463                    data_index, data);
3464     }
3465
3466   bits = GET_MODE_BITSIZE (address_mode);
3467   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3468   offset &= mask;
3469   if ((offset >> (bits - 1) & 1))
3470     offset |= ~mask;
3471   s_offset = offset;
3472
3473   autoinc = false;
3474   msize = GET_MODE_SIZE (mem_mode);
3475   autoinc_offset = offset;
3476   if (stmt_after_inc)
3477     autoinc_offset += ratio * cstep;
3478   if (symbol_present || var_present || ratio != 1)
3479     autoinc = false;
3480   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3481                && msize == cstep)
3482            || (has_postdec[mem_mode] && autoinc_offset == 0
3483                && msize == -cstep)
3484            || (has_preinc[mem_mode] && autoinc_offset == msize
3485                && msize == cstep)
3486            || (has_predec[mem_mode] && autoinc_offset == -msize
3487                && msize == -cstep))
3488     autoinc = true;
3489
3490   cost = 0;
3491   offset_p = (s_offset != 0
3492               && data->min_offset <= s_offset
3493               && s_offset <= data->max_offset);
3494   ratio_p = (ratio != 1
3495              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3496
3497   if (ratio != 1 && !ratio_p)
3498     cost += multiply_by_cost (ratio, address_mode, speed);
3499
3500   if (s_offset && !offset_p && !symbol_present)
3501     cost += add_cost (address_mode, speed);
3502
3503   if (may_autoinc)
3504     *may_autoinc = autoinc;
3505   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3506   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3507   return new_cost (cost + acost, complexity);
3508 }
3509
3510 /* Estimates cost of forcing expression EXPR into a variable.  */
3511
3512 static comp_cost
3513 force_expr_to_var_cost (tree expr, bool speed)
3514 {
3515   static bool costs_initialized = false;
3516   static unsigned integer_cost [2];
3517   static unsigned symbol_cost [2];
3518   static unsigned address_cost [2];
3519   tree op0, op1;
3520   comp_cost cost0, cost1, cost;
3521   enum machine_mode mode;
3522
3523   if (!costs_initialized)
3524     {
3525       tree type = build_pointer_type (integer_type_node);
3526       tree var, addr;
3527       rtx x;
3528       int i;
3529
3530       var = create_tmp_var_raw (integer_type_node, "test_var");
3531       TREE_STATIC (var) = 1;
3532       x = produce_memory_decl_rtl (var, NULL);
3533       SET_DECL_RTL (var, x);
3534
3535       addr = build1 (ADDR_EXPR, type, var);
3536
3537
3538       for (i = 0; i < 2; i++)
3539         {
3540           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3541                                                              2000), i);
3542
3543           symbol_cost[i] = computation_cost (addr, i) + 1;
3544
3545           address_cost[i]
3546             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3547                                         addr,
3548                                         build_int_cst (sizetype, 2000)), i) + 1;
3549           if (dump_file && (dump_flags & TDF_DETAILS))
3550             {
3551               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3552               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3553               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3554               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3555               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3556               fprintf (dump_file, "\n");
3557             }
3558         }
3559
3560       costs_initialized = true;
3561     }
3562
3563   STRIP_NOPS (expr);
3564
3565   if (SSA_VAR_P (expr))
3566     return zero_cost;
3567
3568   if (is_gimple_min_invariant (expr))
3569     {
3570       if (TREE_CODE (expr) == INTEGER_CST)
3571         return new_cost (integer_cost [speed], 0);
3572
3573       if (TREE_CODE (expr) == ADDR_EXPR)
3574         {
3575           tree obj = TREE_OPERAND (expr, 0);
3576
3577           if (TREE_CODE (obj) == VAR_DECL
3578               || TREE_CODE (obj) == PARM_DECL
3579               || TREE_CODE (obj) == RESULT_DECL)
3580             return new_cost (symbol_cost [speed], 0);
3581         }
3582
3583       return new_cost (address_cost [speed], 0);
3584     }
3585
3586   switch (TREE_CODE (expr))
3587     {
3588     case POINTER_PLUS_EXPR:
3589     case PLUS_EXPR:
3590     case MINUS_EXPR:
3591     case MULT_EXPR:
3592       op0 = TREE_OPERAND (expr, 0);
3593       op1 = TREE_OPERAND (expr, 1);
3594       STRIP_NOPS (op0);
3595       STRIP_NOPS (op1);
3596
3597       if (is_gimple_val (op0))
3598         cost0 = zero_cost;
3599       else
3600         cost0 = force_expr_to_var_cost (op0, speed);
3601
3602       if (is_gimple_val (op1))
3603         cost1 = zero_cost;
3604       else
3605         cost1 = force_expr_to_var_cost (op1, speed);
3606
3607       break;
3608
3609     case NEGATE_EXPR:
3610       op0 = TREE_OPERAND (expr, 0);
3611       STRIP_NOPS (op0);
3612       op1 = NULL_TREE;
3613
3614       if (is_gimple_val (op0))
3615         cost0 = zero_cost;
3616       else
3617         cost0 = force_expr_to_var_cost (op0, speed);
3618
3619       cost1 = zero_cost;
3620       break;
3621
3622     default:
3623       /* Just an arbitrary value, FIXME.  */
3624       return new_cost (target_spill_cost[speed], 0);
3625     }
3626
3627   mode = TYPE_MODE (TREE_TYPE (expr));
3628   switch (TREE_CODE (expr))
3629     {
3630     case POINTER_PLUS_EXPR:
3631     case PLUS_EXPR:
3632     case MINUS_EXPR:
3633     case NEGATE_EXPR:
3634       cost = new_cost (add_cost (mode, speed), 0);
3635       break;
3636
3637     case MULT_EXPR:
3638       if (cst_and_fits_in_hwi (op0))
3639         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3640       else if (cst_and_fits_in_hwi (op1))
3641         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3642       else
3643         return new_cost (target_spill_cost [speed], 0);
3644       break;
3645
3646     default:
3647       gcc_unreachable ();
3648     }
3649
3650   cost = add_costs (cost, cost0);
3651   cost = add_costs (cost, cost1);
3652
3653   /* Bound the cost by target_spill_cost.  The parts of complicated
3654      computations often are either loop invariant or at least can
3655      be shared between several iv uses, so letting this grow without
3656      limits would not give reasonable results.  */
3657   if (cost.cost > (int) target_spill_cost [speed])
3658     cost.cost = target_spill_cost [speed];
3659
3660   return cost;
3661 }
3662
3663 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3664    invariants the computation depends on.  */
3665
3666 static comp_cost
3667 force_var_cost (struct ivopts_data *data,
3668                 tree expr, bitmap *depends_on)
3669 {
3670   if (depends_on)
3671     {
3672       fd_ivopts_data = data;
3673       walk_tree (&expr, find_depends, depends_on, NULL);
3674     }
3675
3676   return force_expr_to_var_cost (expr, data->speed);
3677 }
3678
3679 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3680    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3681    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3682    invariants the computation depends on.  */
3683
3684 static comp_cost
3685 split_address_cost (struct ivopts_data *data,
3686                     tree addr, bool *symbol_present, bool *var_present,
3687                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3688 {
3689   tree core;
3690   HOST_WIDE_INT bitsize;
3691   HOST_WIDE_INT bitpos;
3692   tree toffset;
3693   enum machine_mode mode;
3694   int unsignedp, volatilep;
3695
3696   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3697                               &unsignedp, &volatilep, false);
3698
3699   if (toffset != 0
3700       || bitpos % BITS_PER_UNIT != 0
3701       || TREE_CODE (core) != VAR_DECL)
3702     {
3703       *symbol_present = false;
3704       *var_present = true;
3705       fd_ivopts_data = data;
3706       walk_tree (&addr, find_depends, depends_on, NULL);
3707       return new_cost (target_spill_cost[data->speed], 0);
3708     }
3709
3710   *offset += bitpos / BITS_PER_UNIT;
3711   if (TREE_STATIC (core)
3712       || DECL_EXTERNAL (core))
3713     {
3714       *symbol_present = true;
3715       *var_present = false;
3716       return zero_cost;
3717     }
3718
3719   *symbol_present = false;
3720   *var_present = true;
3721   return zero_cost;
3722 }
3723
3724 /* Estimates cost of expressing difference of addresses E1 - E2 as
3725    var + symbol + offset.  The value of offset is added to OFFSET,
3726    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3727    part is missing.  DEPENDS_ON is a set of the invariants the computation
3728    depends on.  */
3729
3730 static comp_cost
3731 ptr_difference_cost (struct ivopts_data *data,
3732                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3733                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3734 {
3735   HOST_WIDE_INT diff = 0;
3736   aff_tree aff_e1, aff_e2;
3737   tree type;
3738
3739   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3740
3741   if (ptr_difference_const (e1, e2, &diff))
3742     {
3743       *offset += diff;
3744       *symbol_present = false;
3745       *var_present = false;
3746       return zero_cost;
3747     }
3748
3749   if (integer_zerop (e2))
3750     return split_address_cost (data, TREE_OPERAND (e1, 0),
3751                                symbol_present, var_present, offset, depends_on);
3752
3753   *symbol_present = false;
3754   *var_present = true;
3755
3756   type = signed_type_for (TREE_TYPE (e1));
3757   tree_to_aff_combination (e1, type, &aff_e1);
3758   tree_to_aff_combination (e2, type, &aff_e2);
3759   aff_combination_scale (&aff_e2, double_int_minus_one);
3760   aff_combination_add (&aff_e1, &aff_e2);
3761
3762   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3763 }
3764
3765 /* Estimates cost of expressing difference E1 - E2 as
3766    var + symbol + offset.  The value of offset is added to OFFSET,
3767    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3768    part is missing.  DEPENDS_ON is a set of the invariants the computation
3769    depends on.  */
3770
3771 static comp_cost
3772 difference_cost (struct ivopts_data *data,
3773                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3774                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3775 {
3776   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3777   unsigned HOST_WIDE_INT off1, off2;
3778   aff_tree aff_e1, aff_e2;
3779   tree type;
3780
3781   e1 = strip_offset (e1, &off1);
3782   e2 = strip_offset (e2, &off2);
3783   *offset += off1 - off2;
3784
3785   STRIP_NOPS (e1);
3786   STRIP_NOPS (e2);
3787
3788   if (TREE_CODE (e1) == ADDR_EXPR)
3789     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3790                                 offset, depends_on);
3791   *symbol_present = false;
3792
3793   if (operand_equal_p (e1, e2, 0))
3794     {
3795       *var_present = false;
3796       return zero_cost;
3797     }
3798
3799   *var_present = true;
3800
3801   if (integer_zerop (e2))
3802     return force_var_cost (data, e1, depends_on);
3803
3804   if (integer_zerop (e1))
3805     {
3806       comp_cost cost = force_var_cost (data, e2, depends_on);
3807       cost.cost += multiply_by_cost (-1, mode, data->speed);
3808       return cost;
3809     }
3810
3811   type = signed_type_for (TREE_TYPE (e1));
3812   tree_to_aff_combination (e1, type, &aff_e1);
3813   tree_to_aff_combination (e2, type, &aff_e2);
3814   aff_combination_scale (&aff_e2, double_int_minus_one);
3815   aff_combination_add (&aff_e1, &aff_e2);
3816
3817   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3818 }
3819
3820 /* Returns true if AFF1 and AFF2 are identical.  */
3821
3822 static bool
3823 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3824 {
3825   unsigned i;
3826
3827   if (aff1->n != aff2->n)
3828     return false;
3829
3830   for (i = 0; i < aff1->n; i++)
3831     {
3832       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3833         return false;
3834
3835       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3836         return false;
3837     }
3838   return true;
3839 }
3840
3841 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3842    requires a new compiler generated temporary.  Returns -1 otherwise.
3843    ADDRESS_P is a flag indicating if the expression is for address
3844    computation.  */
3845
3846 static int
3847 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3848                             tree cbase, HOST_WIDE_INT ratio,
3849                             bool address_p)
3850 {
3851   aff_tree ubase_aff, cbase_aff;
3852   tree expr, ub, cb;
3853   struct iv_inv_expr_ent ent;
3854   struct iv_inv_expr_ent **slot;
3855
3856   STRIP_NOPS (ubase);
3857   STRIP_NOPS (cbase);
3858   ub = ubase;
3859   cb = cbase;
3860
3861   if ((TREE_CODE (ubase) == INTEGER_CST)
3862       && (TREE_CODE (cbase) == INTEGER_CST))
3863     return -1;
3864
3865   /* Strips the constant part. */
3866   if (TREE_CODE (ubase) == PLUS_EXPR
3867       || TREE_CODE (ubase) == MINUS_EXPR
3868       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3869     {
3870       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3871         ubase = TREE_OPERAND (ubase, 0);
3872     }
3873
3874   /* Strips the constant part. */
3875   if (TREE_CODE (cbase) == PLUS_EXPR
3876       || TREE_CODE (cbase) == MINUS_EXPR
3877       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3878     {
3879       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3880         cbase = TREE_OPERAND (cbase, 0);
3881     }
3882
3883   if (address_p)
3884     {
3885       if (((TREE_CODE (ubase) == SSA_NAME)
3886            || (TREE_CODE (ubase) == ADDR_EXPR
3887                && is_gimple_min_invariant (ubase)))
3888           && (TREE_CODE (cbase) == INTEGER_CST))
3889         return -1;
3890
3891       if (((TREE_CODE (cbase) == SSA_NAME)
3892            || (TREE_CODE (cbase) == ADDR_EXPR
3893                && is_gimple_min_invariant (cbase)))
3894           && (TREE_CODE (ubase) == INTEGER_CST))
3895         return -1;
3896     }
3897
3898   if (ratio == 1)
3899     {
3900       if(operand_equal_p (ubase, cbase, 0))
3901         return -1;
3902
3903       if (TREE_CODE (ubase) == ADDR_EXPR
3904           && TREE_CODE (cbase) == ADDR_EXPR)
3905         {
3906           tree usym, csym;
3907
3908           usym = TREE_OPERAND (ubase, 0);
3909           csym = TREE_OPERAND (cbase, 0);
3910           if (TREE_CODE (usym) == ARRAY_REF)
3911             {
3912               tree ind = TREE_OPERAND (usym, 1);
3913               if (TREE_CODE (ind) == INTEGER_CST
3914                   && host_integerp (ind, 0)
3915                   && TREE_INT_CST_LOW (ind) == 0)
3916                 usym = TREE_OPERAND (usym, 0);
3917             }
3918           if (TREE_CODE (csym) == ARRAY_REF)
3919             {
3920               tree ind = TREE_OPERAND (csym, 1);
3921               if (TREE_CODE (ind) == INTEGER_CST
3922                   && host_integerp (ind, 0)
3923                   && TREE_INT_CST_LOW (ind) == 0)
3924                 csym = TREE_OPERAND (csym, 0);
3925             }
3926           if (operand_equal_p (usym, csym, 0))
3927             return -1;
3928         }
3929       /* Now do more complex comparison  */
3930       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3931       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3932       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3933         return -1;
3934     }
3935
3936   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3937   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3938
3939   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3940   aff_combination_add (&ubase_aff, &cbase_aff);
3941   expr = aff_combination_to_tree (&ubase_aff);
3942   ent.expr = expr;
3943   ent.hash = iterative_hash_expr (expr, 0);
3944   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3945                                                      &ent, INSERT);
3946   if (*slot)
3947     return (*slot)->id;
3948
3949   *slot = XNEW (struct iv_inv_expr_ent);
3950   (*slot)->expr = expr;
3951   (*slot)->hash = ent.hash;
3952   (*slot)->id = data->inv_expr_id++;
3953   return  (*slot)->id;
3954 }
3955
3956
3957
3958 /* Determines the cost of the computation by that USE is expressed
3959    from induction variable CAND.  If ADDRESS_P is true, we just need
3960    to create an address from it, otherwise we want to get it into
3961    register.  A set of invariants we depend on is stored in
3962    DEPENDS_ON.  AT is the statement at that the value is computed.
3963    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3964    addressing is likely.  */
3965
3966 static comp_cost
3967 get_computation_cost_at (struct ivopts_data *data,
3968                          struct iv_use *use, struct iv_cand *cand,
3969                          bool address_p, bitmap *depends_on, gimple at,
3970                          bool *can_autoinc,
3971                          int *inv_expr_id)
3972 {
3973   tree ubase = use->iv->base, ustep = use->iv->step;
3974   tree cbase, cstep;
3975   tree utype = TREE_TYPE (ubase), ctype;
3976   unsigned HOST_WIDE_INT cstepi, offset = 0;
3977   HOST_WIDE_INT ratio, aratio;
3978   bool var_present, symbol_present, stmt_is_after_inc;
3979   comp_cost cost;
3980   double_int rat;
3981   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3982
3983   *depends_on = NULL;
3984
3985   /* Only consider real candidates.  */
3986   if (!cand->iv)
3987     return infinite_cost;
3988
3989   cbase = cand->iv->base;
3990   cstep = cand->iv->step;
3991   ctype = TREE_TYPE (cbase);
3992
3993   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3994     {
3995       /* We do not have a precision to express the values of use.  */
3996       return infinite_cost;
3997     }
3998
3999   if (address_p)
4000     {
4001       /* Do not try to express address of an object with computation based
4002          on address of a different object.  This may cause problems in rtl
4003          level alias analysis (that does not expect this to be happening,
4004          as this is illegal in C), and would be unlikely to be useful
4005          anyway.  */
4006       if (use->iv->base_object
4007           && cand->iv->base_object
4008           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4009         return infinite_cost;
4010     }
4011
4012   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4013     {
4014       /* TODO -- add direct handling of this case.  */
4015       goto fallback;
4016     }
4017
4018   /* CSTEPI is removed from the offset in case statement is after the
4019      increment.  If the step is not constant, we use zero instead.
4020      This is a bit imprecise (there is the extra addition), but
4021      redundancy elimination is likely to transform the code so that
4022      it uses value of the variable before increment anyway,
4023      so it is not that much unrealistic.  */
4024   if (cst_and_fits_in_hwi (cstep))
4025     cstepi = int_cst_value (cstep);
4026   else
4027     cstepi = 0;
4028
4029   if (!constant_multiple_of (ustep, cstep, &rat))
4030     return infinite_cost;
4031
4032   if (double_int_fits_in_shwi_p (rat))
4033     ratio = double_int_to_shwi (rat);
4034   else
4035     return infinite_cost;
4036
4037   STRIP_NOPS (cbase);
4038   ctype = TREE_TYPE (cbase);
4039
4040   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4041      or ratio == 1, it is better to handle this like
4042
4043      ubase - ratio * cbase + ratio * var
4044
4045      (also holds in the case ratio == -1, TODO.  */
4046
4047   if (cst_and_fits_in_hwi (cbase))
4048     {
4049       offset = - ratio * int_cst_value (cbase);
4050       cost = difference_cost (data,
4051                               ubase, build_int_cst (utype, 0),
4052                               &symbol_present, &var_present, &offset,
4053                               depends_on);
4054       cost.cost /= avg_loop_niter (data->current_loop);
4055     }
4056   else if (ratio == 1)
4057     {
4058       cost = difference_cost (data,
4059                               ubase, cbase,
4060                               &symbol_present, &var_present, &offset,
4061                               depends_on);
4062       cost.cost /= avg_loop_niter (data->current_loop);
4063     }
4064   else if (address_p
4065            && !POINTER_TYPE_P (ctype)
4066            && multiplier_allowed_in_address_p
4067                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4068                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4069     {
4070       cbase
4071         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4072       cost = difference_cost (data,
4073                               ubase, cbase,
4074                               &symbol_present, &var_present, &offset,
4075                               depends_on);
4076       cost.cost /= avg_loop_niter (data->current_loop);
4077     }
4078   else
4079     {
4080       cost = force_var_cost (data, cbase, depends_on);
4081       cost = add_costs (cost,
4082                         difference_cost (data,
4083                                          ubase, build_int_cst (utype, 0),
4084                                          &symbol_present, &var_present,
4085                                          &offset, depends_on));
4086       cost.cost /= avg_loop_niter (data->current_loop);
4087       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4088     }
4089
4090   if (inv_expr_id)
4091     {
4092       *inv_expr_id =
4093           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4094       /* Clear depends on.  */
4095       if (*inv_expr_id != -1 && depends_on && *depends_on)
4096         bitmap_clear (*depends_on);
4097     }
4098
4099   /* If we are after the increment, the value of the candidate is higher by
4100      one iteration.  */
4101   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4102   if (stmt_is_after_inc)
4103     offset -= ratio * cstepi;
4104
4105   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4106      (symbol/var1/const parts may be omitted).  If we are looking for an
4107      address, find the cost of addressing this.  */
4108   if (address_p)
4109     return add_costs (cost,
4110                       get_address_cost (symbol_present, var_present,
4111                                         offset, ratio, cstepi,
4112                                         TYPE_MODE (TREE_TYPE (utype)),
4113                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4114                                         speed, stmt_is_after_inc,
4115                                         can_autoinc));
4116
4117   /* Otherwise estimate the costs for computing the expression.  */
4118   if (!symbol_present && !var_present && !offset)
4119     {
4120       if (ratio != 1)
4121         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4122       return cost;
4123     }
4124
4125   /* Symbol + offset should be compile-time computable so consider that they
4126       are added once to the variable, if present.  */
4127   if (var_present && (symbol_present || offset))
4128     cost.cost += adjust_setup_cost (data,
4129                                     add_cost (TYPE_MODE (ctype), speed));
4130
4131   /* Having offset does not affect runtime cost in case it is added to
4132      symbol, but it increases complexity.  */
4133   if (offset)
4134     cost.complexity++;
4135
4136   cost.cost += add_cost (TYPE_MODE (ctype), speed);
4137
4138   aratio = ratio > 0 ? ratio : -ratio;
4139   if (aratio != 1)
4140     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4141   return cost;
4142
4143 fallback:
4144   if (can_autoinc)
4145     *can_autoinc = false;
4146
4147   {
4148     /* Just get the expression, expand it and measure the cost.  */
4149     tree comp = get_computation_at (data->current_loop, use, cand, at);
4150
4151     if (!comp)
4152       return infinite_cost;
4153
4154     if (address_p)
4155       comp = build_simple_mem_ref (comp);
4156
4157     return new_cost (computation_cost (comp, speed), 0);
4158   }
4159 }
4160
4161 /* Determines the cost of the computation by that USE is expressed
4162    from induction variable CAND.  If ADDRESS_P is true, we just need
4163    to create an address from it, otherwise we want to get it into
4164    register.  A set of invariants we depend on is stored in
4165    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4166    autoinc addressing is likely.  */
4167
4168 static comp_cost
4169 get_computation_cost (struct ivopts_data *data,
4170                       struct iv_use *use, struct iv_cand *cand,
4171                       bool address_p, bitmap *depends_on,
4172                       bool *can_autoinc, int *inv_expr_id)
4173 {
4174   return get_computation_cost_at (data,
4175                                   use, cand, address_p, depends_on, use->stmt,
4176                                   can_autoinc, inv_expr_id);
4177 }
4178
4179 /* Determines cost of basing replacement of USE on CAND in a generic
4180    expression.  */
4181
4182 static bool
4183 determine_use_iv_cost_generic (struct ivopts_data *data,
4184                                struct iv_use *use, struct iv_cand *cand)
4185 {
4186   bitmap depends_on;
4187   comp_cost cost;
4188   int inv_expr_id = -1;
4189
4190   /* The simple case first -- if we need to express value of the preserved
4191      original biv, the cost is 0.  This also prevents us from counting the
4192      cost of increment twice -- once at this use and once in the cost of
4193      the candidate.  */
4194   if (cand->pos == IP_ORIGINAL
4195       && cand->incremented_at == use->stmt)
4196     {
4197       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4198       return true;
4199     }
4200
4201   cost = get_computation_cost (data, use, cand, false, &depends_on,
4202                                NULL, &inv_expr_id);
4203
4204   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4205                    inv_expr_id);
4206
4207   return !infinite_cost_p (cost);
4208 }
4209
4210 /* Determines cost of basing replacement of USE on CAND in an address.  */
4211
4212 static bool
4213 determine_use_iv_cost_address (struct ivopts_data *data,
4214                                struct iv_use *use, struct iv_cand *cand)
4215 {
4216   bitmap depends_on;
4217   bool can_autoinc;
4218   int inv_expr_id = -1;
4219   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4220                                          &can_autoinc, &inv_expr_id);
4221
4222   if (cand->ainc_use == use)
4223     {
4224       if (can_autoinc)
4225         cost.cost -= cand->cost_step;
4226       /* If we generated the candidate solely for exploiting autoincrement
4227          opportunities, and it turns out it can't be used, set the cost to
4228          infinity to make sure we ignore it.  */
4229       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4230         cost = infinite_cost;
4231     }
4232   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4233                    inv_expr_id);
4234
4235   return !infinite_cost_p (cost);
4236 }
4237
4238 /* Computes value of candidate CAND at position AT in iteration NITER, and
4239    stores it to VAL.  */
4240
4241 static void
4242 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4243                aff_tree *val)
4244 {
4245   aff_tree step, delta, nit;
4246   struct iv *iv = cand->iv;
4247   tree type = TREE_TYPE (iv->base);
4248   tree steptype = type;
4249   if (POINTER_TYPE_P (type))
4250     steptype = sizetype;
4251
4252   tree_to_aff_combination (iv->step, steptype, &step);
4253   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4254   aff_combination_convert (&nit, steptype);
4255   aff_combination_mult (&nit, &step, &delta);
4256   if (stmt_after_increment (loop, cand, at))
4257     aff_combination_add (&delta, &step);
4258
4259   tree_to_aff_combination (iv->base, type, val);
4260   aff_combination_add (val, &delta);
4261 }
4262
4263 /* Returns period of induction variable iv.  */
4264
4265 static tree
4266 iv_period (struct iv *iv)
4267 {
4268   tree step = iv->step, period, type;
4269   tree pow2div;
4270
4271   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4272
4273   type = unsigned_type_for (TREE_TYPE (step));
4274   /* Period of the iv is lcm (step, type_range)/step -1,
4275      i.e., N*type_range/step - 1. Since type range is power
4276      of two, N == (step >> num_of_ending_zeros_binary (step),
4277      so the final result is
4278
4279        (type_range >> num_of_ending_zeros_binary (step)) - 1
4280
4281   */
4282   pow2div = num_ending_zeros (step);
4283
4284   period = build_low_bits_mask (type,
4285                                 (TYPE_PRECISION (type)
4286                                  - tree_low_cst (pow2div, 1)));
4287
4288   return period;
4289 }
4290
4291 /* Returns the comparison operator used when eliminating the iv USE.  */
4292
4293 static enum tree_code
4294 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4295 {
4296   struct loop *loop = data->current_loop;
4297   basic_block ex_bb;
4298   edge exit;
4299
4300   ex_bb = gimple_bb (use->stmt);
4301   exit = EDGE_SUCC (ex_bb, 0);
4302   if (flow_bb_inside_loop_p (loop, exit->dest))
4303     exit = EDGE_SUCC (ex_bb, 1);
4304
4305   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4306 }
4307
4308 /* Check whether it is possible to express the condition in USE by comparison
4309    of candidate CAND.  If so, store the value compared with to BOUND.  */
4310
4311 static bool
4312 may_eliminate_iv (struct ivopts_data *data,
4313                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4314 {
4315   basic_block ex_bb;
4316   edge exit;
4317   tree nit, period;
4318   struct loop *loop = data->current_loop;
4319   aff_tree bnd;
4320   struct tree_niter_desc *desc = NULL;
4321
4322   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4323     return false;
4324
4325   /* For now works only for exits that dominate the loop latch.
4326      TODO: extend to other conditions inside loop body.  */
4327   ex_bb = gimple_bb (use->stmt);
4328   if (use->stmt != last_stmt (ex_bb)
4329       || gimple_code (use->stmt) != GIMPLE_COND
4330       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4331     return false;
4332
4333   exit = EDGE_SUCC (ex_bb, 0);
4334   if (flow_bb_inside_loop_p (loop, exit->dest))
4335     exit = EDGE_SUCC (ex_bb, 1);
4336   if (flow_bb_inside_loop_p (loop, exit->dest))
4337     return false;
4338
4339   nit = niter_for_exit (data, exit, &desc);
4340   if (!nit)
4341     return false;
4342
4343   /* Determine whether we can use the variable to test the exit condition.
4344      This is the case iff the period of the induction variable is greater
4345      than the number of iterations for which the exit condition is true.  */
4346   period = iv_period (cand->iv);
4347
4348   /* If the number of iterations is constant, compare against it directly.  */
4349   if (TREE_CODE (nit) == INTEGER_CST)
4350     {
4351       /* See cand_value_at.  */
4352       if (stmt_after_increment (loop, cand, use->stmt))
4353         {
4354           if (!tree_int_cst_lt (nit, period))
4355             return false;
4356         }
4357       else
4358         {
4359           if (tree_int_cst_lt (period, nit))
4360             return false;
4361         }
4362     }
4363
4364   /* If not, and if this is the only possible exit of the loop, see whether
4365      we can get a conservative estimate on the number of iterations of the
4366      entire loop and compare against that instead.  */
4367   else
4368     {
4369       double_int period_value, max_niter;
4370
4371       max_niter = desc->max;
4372       if (stmt_after_increment (loop, cand, use->stmt))
4373         max_niter = double_int_add (max_niter, double_int_one);
4374       period_value = tree_to_double_int (period);
4375       if (double_int_ucmp (max_niter, period_value) > 0)
4376         {
4377           /* See if we can take advantage of infered loop bound information.  */
4378           if (loop_only_exit_p (loop, exit))
4379             {
4380               if (!estimated_loop_iterations (loop, true, &max_niter))
4381                 return false;
4382               /* The loop bound is already adjusted by adding 1.  */
4383               if (double_int_ucmp (max_niter, period_value) > 0)
4384                 return false;
4385             }
4386           else
4387             return false;
4388         }
4389     }
4390
4391   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4392
4393   *bound = aff_combination_to_tree (&bnd);
4394   /* It is unlikely that computing the number of iterations using division
4395      would be more profitable than keeping the original induction variable.  */
4396   if (expression_expensive_p (*bound))
4397     return false;
4398   return true;
4399 }
4400
4401
4402 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4403
4404 static bool
4405 determine_use_iv_cost_condition (struct ivopts_data *data,
4406                                  struct iv_use *use, struct iv_cand *cand)
4407 {
4408   tree bound = NULL_TREE;
4409   struct iv *cmp_iv;
4410   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4411   comp_cost elim_cost, express_cost, cost;
4412   bool ok;
4413   int inv_expr_id = -1;
4414   tree *control_var, *bound_cst;
4415
4416   /* Only consider real candidates.  */
4417   if (!cand->iv)
4418     {
4419       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4420       return false;
4421     }
4422
4423   /* Try iv elimination.  */
4424   if (may_eliminate_iv (data, use, cand, &bound))
4425     {
4426       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4427       /* The bound is a loop invariant, so it will be only computed
4428          once.  */
4429       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4430     }
4431   else
4432     elim_cost = infinite_cost;
4433
4434   /* Try expressing the original giv.  If it is compared with an invariant,
4435      note that we cannot get rid of it.  */
4436   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4437                               NULL, &cmp_iv);
4438   gcc_assert (ok);
4439
4440   /* When the condition is a comparison of the candidate IV against
4441      zero, prefer this IV.
4442
4443      TODO: The constant that we're substracting from the cost should
4444      be target-dependent.  This information should be added to the
4445      target costs for each backend.  */
4446   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4447       && integer_zerop (*bound_cst)
4448       && (operand_equal_p (*control_var, cand->var_after, 0)
4449           || operand_equal_p (*control_var, cand->var_before, 0)))
4450     elim_cost.cost -= 1;
4451
4452   express_cost = get_computation_cost (data, use, cand, false,
4453                                        &depends_on_express, NULL,
4454                                        &inv_expr_id);
4455   fd_ivopts_data = data;
4456   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4457
4458   /* Choose the better approach, preferring the eliminated IV. */
4459   if (compare_costs (elim_cost, express_cost) <= 0)
4460     {
4461       cost = elim_cost;
4462       depends_on = depends_on_elim;
4463       depends_on_elim = NULL;
4464     }
4465   else
4466     {
4467       cost = express_cost;
4468       depends_on = depends_on_express;
4469       depends_on_express = NULL;
4470       bound = NULL_TREE;
4471     }
4472
4473   set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4474
4475   if (depends_on_elim)
4476     BITMAP_FREE (depends_on_elim);
4477   if (depends_on_express)
4478     BITMAP_FREE (depends_on_express);
4479
4480   return !infinite_cost_p (cost);
4481 }
4482
4483 /* Determines cost of basing replacement of USE on CAND.  Returns false
4484    if USE cannot be based on CAND.  */
4485
4486 static bool
4487 determine_use_iv_cost (struct ivopts_data *data,
4488                        struct iv_use *use, struct iv_cand *cand)
4489 {
4490   switch (use->type)
4491     {
4492     case USE_NONLINEAR_EXPR:
4493       return determine_use_iv_cost_generic (data, use, cand);
4494
4495     case USE_ADDRESS:
4496       return determine_use_iv_cost_address (data, use, cand);
4497
4498     case USE_COMPARE:
4499       return determine_use_iv_cost_condition (data, use, cand);
4500
4501     default:
4502       gcc_unreachable ();
4503     }
4504 }
4505
4506 /* Return true if get_computation_cost indicates that autoincrement is
4507    a possibility for the pair of USE and CAND, false otherwise.  */
4508
4509 static bool
4510 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4511                            struct iv_cand *cand)
4512 {
4513   bitmap depends_on;
4514   bool can_autoinc;
4515   comp_cost cost;
4516
4517   if (use->type != USE_ADDRESS)
4518     return false;
4519
4520   cost = get_computation_cost (data, use, cand, true, &depends_on,
4521                                &can_autoinc, NULL);
4522
4523   BITMAP_FREE (depends_on);
4524
4525   return !infinite_cost_p (cost) && can_autoinc;
4526 }
4527
4528 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4529    use that allows autoincrement, and set their AINC_USE if possible.  */
4530
4531 static void
4532 set_autoinc_for_original_candidates (struct ivopts_data *data)
4533 {
4534   unsigned i, j;
4535
4536   for (i = 0; i < n_iv_cands (data); i++)
4537     {
4538       struct iv_cand *cand = iv_cand (data, i);
4539       struct iv_use *closest = NULL;
4540       if (cand->pos != IP_ORIGINAL)
4541         continue;
4542       for (j = 0; j < n_iv_uses (data); j++)
4543         {
4544           struct iv_use *use = iv_use (data, j);
4545           unsigned uid = gimple_uid (use->stmt);
4546           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4547               || uid > gimple_uid (cand->incremented_at))
4548             continue;
4549           if (closest == NULL || uid > gimple_uid (closest->stmt))
4550             closest = use;
4551         }
4552       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4553         continue;
4554       cand->ainc_use = closest;
4555     }
4556 }
4557
4558 /* Finds the candidates for the induction variables.  */
4559
4560 static void
4561 find_iv_candidates (struct ivopts_data *data)
4562 {
4563   /* Add commonly used ivs.  */
4564   add_standard_iv_candidates (data);
4565
4566   /* Add old induction variables.  */
4567   add_old_ivs_candidates (data);
4568
4569   /* Add induction variables derived from uses.  */
4570   add_derived_ivs_candidates (data);
4571
4572   set_autoinc_for_original_candidates (data);
4573
4574   /* Record the important candidates.  */
4575   record_important_candidates (data);
4576 }
4577
4578 /* Determines costs of basing the use of the iv on an iv candidate.  */
4579
4580 static void
4581 determine_use_iv_costs (struct ivopts_data *data)
4582 {
4583   unsigned i, j;
4584   struct iv_use *use;
4585   struct iv_cand *cand;
4586   bitmap to_clear = BITMAP_ALLOC (NULL);
4587
4588   alloc_use_cost_map (data);
4589
4590   for (i = 0; i < n_iv_uses (data); i++)
4591     {
4592       use = iv_use (data, i);
4593
4594       if (data->consider_all_candidates)
4595         {
4596           for (j = 0; j < n_iv_cands (data); j++)
4597             {
4598               cand = iv_cand (data, j);
4599               determine_use_iv_cost (data, use, cand);
4600             }
4601         }
4602       else
4603         {
4604           bitmap_iterator bi;
4605
4606           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4607             {
4608               cand = iv_cand (data, j);
4609               if (!determine_use_iv_cost (data, use, cand))
4610                 bitmap_set_bit (to_clear, j);
4611             }
4612
4613           /* Remove the candidates for that the cost is infinite from
4614              the list of related candidates.  */
4615           bitmap_and_compl_into (use->related_cands, to_clear);
4616           bitmap_clear (to_clear);
4617         }
4618     }
4619
4620   BITMAP_FREE (to_clear);
4621
4622   if (dump_file && (dump_flags & TDF_DETAILS))
4623     {
4624       fprintf (dump_file, "Use-candidate costs:\n");
4625
4626       for (i = 0; i < n_iv_uses (data); i++)
4627         {
4628           use = iv_use (data, i);
4629
4630           fprintf (dump_file, "Use %d:\n", i);
4631           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4632           for (j = 0; j < use->n_map_members; j++)
4633             {
4634               if (!use->cost_map[j].cand
4635                   || infinite_cost_p (use->cost_map[j].cost))
4636                 continue;
4637
4638               fprintf (dump_file, "  %d\t%d\t%d\t",
4639                        use->cost_map[j].cand->id,
4640                        use->cost_map[j].cost.cost,
4641                        use->cost_map[j].cost.complexity);
4642               if (use->cost_map[j].depends_on)
4643                 bitmap_print (dump_file,
4644                               use->cost_map[j].depends_on, "","");
4645               if (use->cost_map[j].inv_expr_id != -1)
4646                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4647               fprintf (dump_file, "\n");
4648             }
4649
4650           fprintf (dump_file, "\n");
4651         }
4652       fprintf (dump_file, "\n");
4653     }
4654 }
4655
4656 /* Determines cost of the candidate CAND.  */
4657
4658 static void
4659 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4660 {
4661   comp_cost cost_base;
4662   unsigned cost, cost_step;
4663   tree base;
4664
4665   if (!cand->iv)
4666     {
4667       cand->cost = 0;
4668       return;
4669     }
4670
4671   /* There are two costs associated with the candidate -- its increment
4672      and its initialization.  The second is almost negligible for any loop
4673      that rolls enough, so we take it just very little into account.  */
4674
4675   base = cand->iv->base;
4676   cost_base = force_var_cost (data, base, NULL);
4677   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4678
4679   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4680
4681   /* Prefer the original ivs unless we may gain something by replacing it.
4682      The reason is to make debugging simpler; so this is not relevant for
4683      artificial ivs created by other optimization passes.  */
4684   if (cand->pos != IP_ORIGINAL
4685       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4686     cost++;
4687
4688   /* Prefer not to insert statements into latch unless there are some
4689      already (so that we do not create unnecessary jumps).  */
4690   if (cand->pos == IP_END
4691       && empty_block_p (ip_end_pos (data->current_loop)))
4692     cost++;
4693
4694   cand->cost = cost;
4695   cand->cost_step = cost_step;
4696 }
4697
4698 /* Determines costs of computation of the candidates.  */
4699
4700 static void
4701 determine_iv_costs (struct ivopts_data *data)
4702 {
4703   unsigned i;
4704
4705   if (dump_file && (dump_flags & TDF_DETAILS))
4706     {
4707       fprintf (dump_file, "Candidate costs:\n");
4708       fprintf (dump_file, "  cand\tcost\n");
4709     }
4710
4711   for (i = 0; i < n_iv_cands (data); i++)
4712     {
4713       struct iv_cand *cand = iv_cand (data, i);
4714
4715       determine_iv_cost (data, cand);
4716
4717       if (dump_file && (dump_flags & TDF_DETAILS))
4718         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4719     }
4720
4721   if (dump_file && (dump_flags & TDF_DETAILS))
4722     fprintf (dump_file, "\n");
4723 }
4724
4725 /* Calculates cost for having SIZE induction variables.  */
4726
4727 static unsigned
4728 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4729 {
4730   /* We add size to the cost, so that we prefer eliminating ivs
4731      if possible.  */
4732   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4733                                             data->body_includes_call);
4734 }
4735
4736 /* For each size of the induction variable set determine the penalty.  */
4737
4738 static void
4739 determine_set_costs (struct ivopts_data *data)
4740 {
4741   unsigned j, n;
4742   gimple phi;
4743   gimple_stmt_iterator psi;
4744   tree op;
4745   struct loop *loop = data->current_loop;
4746   bitmap_iterator bi;
4747
4748   if (dump_file && (dump_flags & TDF_DETAILS))
4749     {
4750       fprintf (dump_file, "Global costs:\n");
4751       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4752       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
4753       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4754       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4755     }
4756
4757   n = 0;
4758   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4759     {
4760       phi = gsi_stmt (psi);
4761       op = PHI_RESULT (phi);
4762
4763       if (!is_gimple_reg (op))
4764         continue;
4765
4766       if (get_iv (data, op))
4767         continue;
4768
4769       n++;
4770     }
4771
4772   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4773     {
4774       struct version_info *info = ver_info (data, j);
4775
4776       if (info->inv_id && info->has_nonlin_use)
4777         n++;
4778     }
4779
4780   data->regs_used = n;
4781   if (dump_file && (dump_flags & TDF_DETAILS))
4782     fprintf (dump_file, "  regs_used %d\n", n);
4783
4784   if (dump_file && (dump_flags & TDF_DETAILS))
4785     {
4786       fprintf (dump_file, "  cost for size:\n");
4787       fprintf (dump_file, "  ivs\tcost\n");
4788       for (j = 0; j <= 2 * target_avail_regs; j++)
4789         fprintf (dump_file, "  %d\t%d\n", j,
4790                  ivopts_global_cost_for_size (data, j));
4791       fprintf (dump_file, "\n");
4792     }
4793 }
4794
4795 /* Returns true if A is a cheaper cost pair than B.  */
4796
4797 static bool
4798 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4799 {
4800   int cmp;
4801
4802   if (!a)
4803     return false;
4804
4805   if (!b)
4806     return true;
4807
4808   cmp = compare_costs (a->cost, b->cost);
4809   if (cmp < 0)
4810     return true;
4811
4812   if (cmp > 0)
4813     return false;
4814
4815   /* In case the costs are the same, prefer the cheaper candidate.  */
4816   if (a->cand->cost < b->cand->cost)
4817     return true;
4818
4819   return false;
4820 }
4821
4822
4823 /* Returns candidate by that USE is expressed in IVS.  */
4824
4825 static struct cost_pair *
4826 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4827 {
4828   return ivs->cand_for_use[use->id];
4829 }
4830
4831 /* Computes the cost field of IVS structure.  */
4832
4833 static void
4834 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4835 {
4836   comp_cost cost = ivs->cand_use_cost;
4837
4838   cost.cost += ivs->cand_cost;
4839
4840   cost.cost += ivopts_global_cost_for_size (data,
4841                                             ivs->n_regs + ivs->num_used_inv_expr);
4842
4843   ivs->cost = cost;
4844 }
4845
4846 /* Remove invariants in set INVS to set IVS.  */
4847
4848 static void
4849 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4850 {
4851   bitmap_iterator bi;
4852   unsigned iid;
4853
4854   if (!invs)
4855     return;
4856
4857   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4858     {
4859       ivs->n_invariant_uses[iid]--;
4860       if (ivs->n_invariant_uses[iid] == 0)
4861         ivs->n_regs--;
4862     }
4863 }
4864
4865 /* Set USE not to be expressed by any candidate in IVS.  */
4866
4867 static void
4868 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4869                  struct iv_use *use)
4870 {
4871   unsigned uid = use->id, cid;
4872   struct cost_pair *cp;
4873
4874   cp = ivs->cand_for_use[uid];
4875   if (!cp)
4876     return;
4877   cid = cp->cand->id;
4878
4879   ivs->bad_uses++;
4880   ivs->cand_for_use[uid] = NULL;
4881   ivs->n_cand_uses[cid]--;
4882
4883   if (ivs->n_cand_uses[cid] == 0)
4884     {
4885       bitmap_clear_bit (ivs->cands, cid);
4886       /* Do not count the pseudocandidates.  */
4887       if (cp->cand->iv)
4888         ivs->n_regs--;
4889       ivs->n_cands--;
4890       ivs->cand_cost -= cp->cand->cost;
4891
4892       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4893     }
4894
4895   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4896
4897   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4898
4899   if (cp->inv_expr_id != -1)
4900     {
4901       ivs->used_inv_expr[cp->inv_expr_id]--;
4902       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4903         ivs->num_used_inv_expr--;
4904     }
4905   iv_ca_recount_cost (data, ivs);
4906 }
4907
4908 /* Add invariants in set INVS to set IVS.  */
4909
4910 static void
4911 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4912 {
4913   bitmap_iterator bi;
4914   unsigned iid;
4915
4916   if (!invs)
4917     return;
4918
4919   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4920     {
4921       ivs->n_invariant_uses[iid]++;
4922       if (ivs->n_invariant_uses[iid] == 1)
4923         ivs->n_regs++;
4924     }
4925 }
4926
4927 /* Set cost pair for USE in set IVS to CP.  */
4928
4929 static void
4930 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4931               struct iv_use *use, struct cost_pair *cp)
4932 {
4933   unsigned uid = use->id, cid;
4934
4935   if (ivs->cand_for_use[uid] == cp)
4936     return;
4937
4938   if (ivs->cand_for_use[uid])
4939     iv_ca_set_no_cp (data, ivs, use);
4940
4941   if (cp)
4942     {
4943       cid = cp->cand->id;
4944
4945       ivs->bad_uses--;
4946       ivs->cand_for_use[uid] = cp;
4947       ivs->n_cand_uses[cid]++;
4948       if (ivs->n_cand_uses[cid] == 1)
4949         {
4950           bitmap_set_bit (ivs->cands, cid);
4951           /* Do not count the pseudocandidates.  */
4952           if (cp->cand->iv)
4953             ivs->n_regs++;
4954           ivs->n_cands++;
4955           ivs->cand_cost += cp->cand->cost;
4956
4957           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4958         }
4959
4960       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4961       iv_ca_set_add_invariants (ivs, cp->depends_on);
4962
4963       if (cp->inv_expr_id != -1)
4964         {
4965           ivs->used_inv_expr[cp->inv_expr_id]++;
4966           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4967             ivs->num_used_inv_expr++;
4968         }
4969       iv_ca_recount_cost (data, ivs);
4970     }
4971 }
4972
4973 /* Extend set IVS by expressing USE by some of the candidates in it
4974    if possible. All important candidates will be considered
4975    if IMPORTANT_CANDIDATES is true.  */
4976
4977 static void
4978 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4979                struct iv_use *use, bool important_candidates)
4980 {
4981   struct cost_pair *best_cp = NULL, *cp;
4982   bitmap_iterator bi;
4983   bitmap cands;
4984   unsigned i;
4985
4986   gcc_assert (ivs->upto >= use->id);
4987
4988   if (ivs->upto == use->id)
4989     {
4990       ivs->upto++;
4991       ivs->bad_uses++;
4992     }
4993
4994   cands = (important_candidates ? data->important_candidates : ivs->cands);
4995   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
4996     {
4997       struct iv_cand *cand = iv_cand (data, i);
4998
4999       cp = get_use_iv_cost (data, use, cand);
5000
5001       if (cheaper_cost_pair (cp, best_cp))
5002         best_cp = cp;
5003     }
5004
5005   iv_ca_set_cp (data, ivs, use, best_cp);
5006 }
5007
5008 /* Get cost for assignment IVS.  */
5009
5010 static comp_cost
5011 iv_ca_cost (struct iv_ca *ivs)
5012 {
5013   /* This was a conditional expression but it triggered a bug in
5014      Sun C 5.5.  */
5015   if (ivs->bad_uses)
5016     return infinite_cost;
5017   else
5018     return ivs->cost;
5019 }
5020
5021 /* Returns true if all dependences of CP are among invariants in IVS.  */
5022
5023 static bool
5024 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5025 {
5026   unsigned i;
5027   bitmap_iterator bi;
5028
5029   if (!cp->depends_on)
5030     return true;
5031
5032   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5033     {
5034       if (ivs->n_invariant_uses[i] == 0)
5035         return false;
5036     }
5037
5038   return true;
5039 }
5040
5041 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5042    it before NEXT_CHANGE.  */
5043
5044 static struct iv_ca_delta *
5045 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5046                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5047 {
5048   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5049
5050   change->use = use;
5051   change->old_cp = old_cp;
5052   change->new_cp = new_cp;
5053   change->next_change = next_change;
5054
5055   return change;
5056 }
5057
5058 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5059    are rewritten.  */
5060
5061 static struct iv_ca_delta *
5062 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5063 {
5064   struct iv_ca_delta *last;
5065
5066   if (!l2)
5067     return l1;
5068
5069   if (!l1)
5070     return l2;
5071
5072   for (last = l1; last->next_change; last = last->next_change)
5073     continue;
5074   last->next_change = l2;
5075
5076   return l1;
5077 }
5078
5079 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5080
5081 static struct iv_ca_delta *
5082 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5083 {
5084   struct iv_ca_delta *act, *next, *prev = NULL;
5085   struct cost_pair *tmp;
5086
5087   for (act = delta; act; act = next)
5088     {
5089       next = act->next_change;
5090       act->next_change = prev;
5091       prev = act;
5092
5093       tmp = act->old_cp;
5094       act->old_cp = act->new_cp;
5095       act->new_cp = tmp;
5096     }
5097
5098   return prev;
5099 }
5100
5101 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5102    reverted instead.  */
5103
5104 static void
5105 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5106                     struct iv_ca_delta *delta, bool forward)
5107 {
5108   struct cost_pair *from, *to;
5109   struct iv_ca_delta *act;
5110
5111   if (!forward)
5112     delta = iv_ca_delta_reverse (delta);
5113
5114   for (act = delta; act; act = act->next_change)
5115     {
5116       from = act->old_cp;
5117       to = act->new_cp;
5118       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5119       iv_ca_set_cp (data, ivs, act->use, to);
5120     }
5121
5122   if (!forward)
5123     iv_ca_delta_reverse (delta);
5124 }
5125
5126 /* Returns true if CAND is used in IVS.  */
5127
5128 static bool
5129 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5130 {
5131   return ivs->n_cand_uses[cand->id] > 0;
5132 }
5133
5134 /* Returns number of induction variable candidates in the set IVS.  */
5135
5136 static unsigned
5137 iv_ca_n_cands (struct iv_ca *ivs)
5138 {
5139   return ivs->n_cands;
5140 }
5141
5142 /* Free the list of changes DELTA.  */
5143
5144 static void
5145 iv_ca_delta_free (struct iv_ca_delta **delta)
5146 {
5147   struct iv_ca_delta *act, *next;
5148
5149   for (act = *delta; act; act = next)
5150     {
5151       next = act->next_change;
5152       free (act);
5153     }
5154
5155   *delta = NULL;
5156 }
5157
5158 /* Allocates new iv candidates assignment.  */
5159
5160 static struct iv_ca *
5161 iv_ca_new (struct ivopts_data *data)
5162 {
5163   struct iv_ca *nw = XNEW (struct iv_ca);
5164
5165   nw->upto = 0;
5166   nw->bad_uses = 0;
5167   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5168   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5169   nw->cands = BITMAP_ALLOC (NULL);
5170   nw->n_cands = 0;
5171   nw->n_regs = 0;
5172   nw->cand_use_cost = zero_cost;
5173   nw->cand_cost = 0;
5174   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5175   nw->cost = zero_cost;
5176   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5177   nw->num_used_inv_expr = 0;
5178
5179   return nw;
5180 }
5181
5182 /* Free memory occupied by the set IVS.  */
5183
5184 static void
5185 iv_ca_free (struct iv_ca **ivs)
5186 {
5187   free ((*ivs)->cand_for_use);
5188   free ((*ivs)->n_cand_uses);
5189   BITMAP_FREE ((*ivs)->cands);
5190   free ((*ivs)->n_invariant_uses);
5191   free ((*ivs)->used_inv_expr);
5192   free (*ivs);
5193   *ivs = NULL;
5194 }
5195
5196 /* Dumps IVS to FILE.  */
5197
5198 static void
5199 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5200 {
5201   const char *pref = "  invariants ";
5202   unsigned i;
5203   comp_cost cost = iv_ca_cost (ivs);
5204
5205   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5206   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5207            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5208   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5209
5210    for (i = 0; i < ivs->upto; i++)
5211     {
5212       struct iv_use *use = iv_use (data, i);
5213       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5214       if (cp)
5215         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5216                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5217       else
5218         fprintf (file, "   use:%d --> ??\n", use->id);
5219     }
5220
5221   for (i = 1; i <= data->max_inv_id; i++)
5222     if (ivs->n_invariant_uses[i])
5223       {
5224         fprintf (file, "%s%d", pref, i);
5225         pref = ", ";
5226       }
5227   fprintf (file, "\n\n");
5228 }
5229
5230 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5231    new set, and store differences in DELTA.  Number of induction variables
5232    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5233    the function will try to find a solution with mimimal iv candidates.  */
5234
5235 static comp_cost
5236 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5237               struct iv_cand *cand, struct iv_ca_delta **delta,
5238               unsigned *n_ivs, bool min_ncand)
5239 {
5240   unsigned i;
5241   comp_cost cost;
5242   struct iv_use *use;
5243   struct cost_pair *old_cp, *new_cp;
5244
5245   *delta = NULL;
5246   for (i = 0; i < ivs->upto; i++)
5247     {
5248       use = iv_use (data, i);
5249       old_cp = iv_ca_cand_for_use (ivs, use);
5250
5251       if (old_cp
5252           && old_cp->cand == cand)
5253         continue;
5254
5255       new_cp = get_use_iv_cost (data, use, cand);
5256       if (!new_cp)
5257         continue;
5258
5259       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5260         continue;
5261
5262       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5263         continue;
5264
5265       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5266     }
5267
5268   iv_ca_delta_commit (data, ivs, *delta, true);
5269   cost = iv_ca_cost (ivs);
5270   if (n_ivs)
5271     *n_ivs = iv_ca_n_cands (ivs);
5272   iv_ca_delta_commit (data, ivs, *delta, false);
5273
5274   return cost;
5275 }
5276
5277 /* Try narrowing set IVS by removing CAND.  Return the cost of
5278    the new set and store the differences in DELTA.  */
5279
5280 static comp_cost
5281 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5282               struct iv_cand *cand, struct iv_ca_delta **delta)
5283 {
5284   unsigned i, ci;
5285   struct iv_use *use;
5286   struct cost_pair *old_cp, *new_cp, *cp;
5287   bitmap_iterator bi;
5288   struct iv_cand *cnd;
5289   comp_cost cost;
5290
5291   *delta = NULL;
5292   for (i = 0; i < n_iv_uses (data); i++)
5293     {
5294       use = iv_use (data, i);
5295
5296       old_cp = iv_ca_cand_for_use (ivs, use);
5297       if (old_cp->cand != cand)
5298         continue;
5299
5300       new_cp = NULL;
5301
5302       if (data->consider_all_candidates)
5303         {
5304           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5305             {
5306               if (ci == cand->id)
5307                 continue;
5308
5309               cnd = iv_cand (data, ci);
5310
5311               cp = get_use_iv_cost (data, use, cnd);
5312               if (!cp)
5313                 continue;
5314
5315               if (!iv_ca_has_deps (ivs, cp))
5316                 continue;
5317
5318               if (!cheaper_cost_pair (cp, new_cp))
5319                 continue;
5320
5321               new_cp = cp;
5322             }
5323         }
5324       else
5325         {
5326           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5327             {
5328               if (ci == cand->id)
5329                 continue;
5330
5331               cnd = iv_cand (data, ci);
5332
5333               cp = get_use_iv_cost (data, use, cnd);
5334               if (!cp)
5335                 continue;
5336               if (!iv_ca_has_deps (ivs, cp))
5337                 continue;
5338
5339               if (!cheaper_cost_pair (cp, new_cp))
5340                 continue;
5341
5342               new_cp = cp;
5343             }
5344         }
5345
5346       if (!new_cp)
5347         {
5348           iv_ca_delta_free (delta);
5349           return infinite_cost;
5350         }
5351
5352       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5353     }
5354
5355   iv_ca_delta_commit (data, ivs, *delta, true);
5356   cost = iv_ca_cost (ivs);
5357   iv_ca_delta_commit (data, ivs, *delta, false);
5358
5359   return cost;
5360 }
5361
5362 /* Try optimizing the set of candidates IVS by removing candidates different
5363    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5364    differences in DELTA.  */
5365
5366 static comp_cost
5367 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5368              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5369 {
5370   bitmap_iterator bi;
5371   struct iv_ca_delta *act_delta, *best_delta;
5372   unsigned i;
5373   comp_cost best_cost, acost;
5374   struct iv_cand *cand;
5375
5376   best_delta = NULL;
5377   best_cost = iv_ca_cost (ivs);
5378
5379   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5380     {
5381       cand = iv_cand (data, i);
5382
5383       if (cand == except_cand)
5384         continue;
5385
5386       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5387
5388       if (compare_costs (acost, best_cost) < 0)
5389         {
5390           best_cost = acost;
5391           iv_ca_delta_free (&best_delta);
5392           best_delta = act_delta;
5393         }
5394       else
5395         iv_ca_delta_free (&act_delta);
5396     }
5397
5398   if (!best_delta)
5399     {
5400       *delta = NULL;
5401       return best_cost;
5402     }
5403
5404   /* Recurse to possibly remove other unnecessary ivs.  */
5405   iv_ca_delta_commit (data, ivs, best_delta, true);
5406   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5407   iv_ca_delta_commit (data, ivs, best_delta, false);
5408   *delta = iv_ca_delta_join (best_delta, *delta);
5409   return best_cost;
5410 }
5411
5412 /* Tries to extend the sets IVS in the best possible way in order
5413    to express the USE.  If ORIGINALP is true, prefer candidates from
5414    the original set of IVs, otherwise favor important candidates not
5415    based on any memory object.  */
5416
5417 static bool
5418 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5419                   struct iv_use *use, bool originalp)
5420 {
5421   comp_cost best_cost, act_cost;
5422   unsigned i;
5423   bitmap_iterator bi;
5424   struct iv_cand *cand;
5425   struct iv_ca_delta *best_delta = NULL, *act_delta;
5426   struct cost_pair *cp;
5427
5428   iv_ca_add_use (data, ivs, use, false);
5429   best_cost = iv_ca_cost (ivs);
5430
5431   cp = iv_ca_cand_for_use (ivs, use);
5432   if (!cp)
5433     {
5434       ivs->upto--;
5435       ivs->bad_uses--;
5436       iv_ca_add_use (data, ivs, use, true);
5437       best_cost = iv_ca_cost (ivs);
5438       cp = iv_ca_cand_for_use (ivs, use);
5439     }
5440   if (cp)
5441     {
5442       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5443       iv_ca_set_no_cp (data, ivs, use);
5444     }
5445
5446   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5447      first try important candidates not based on any memory object.  Only if
5448      this fails, try the specific ones.  Rationale -- in loops with many
5449      variables the best choice often is to use just one generic biv.  If we
5450      added here many ivs specific to the uses, the optimization algorithm later
5451      would be likely to get stuck in a local minimum, thus causing us to create
5452      too many ivs.  The approach from few ivs to more seems more likely to be
5453      successful -- starting from few ivs, replacing an expensive use by a
5454      specific iv should always be a win.  */
5455   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5456     {
5457       cand = iv_cand (data, i);
5458
5459       if (originalp && cand->pos !=IP_ORIGINAL)
5460         continue;
5461
5462       if (!originalp && cand->iv->base_object != NULL_TREE)
5463         continue;
5464
5465       if (iv_ca_cand_used_p (ivs, cand))
5466         continue;
5467
5468       cp = get_use_iv_cost (data, use, cand);
5469       if (!cp)
5470         continue;
5471
5472       iv_ca_set_cp (data, ivs, use, cp);
5473       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5474                                true);
5475       iv_ca_set_no_cp (data, ivs, use);
5476       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5477
5478       if (compare_costs (act_cost, best_cost) < 0)
5479         {
5480           best_cost = act_cost;
5481
5482           iv_ca_delta_free (&best_delta);
5483           best_delta = act_delta;
5484         }
5485       else
5486         iv_ca_delta_free (&act_delta);
5487     }
5488
5489   if (infinite_cost_p (best_cost))
5490     {
5491       for (i = 0; i < use->n_map_members; i++)
5492         {
5493           cp = use->cost_map + i;
5494           cand = cp->cand;
5495           if (!cand)
5496             continue;
5497
5498           /* Already tried this.  */
5499           if (cand->important)
5500             {
5501               if (originalp && cand->pos == IP_ORIGINAL)
5502                 continue;
5503               if (!originalp && cand->iv->base_object == NULL_TREE)
5504                 continue;
5505             }
5506
5507           if (iv_ca_cand_used_p (ivs, cand))
5508             continue;
5509
5510           act_delta = NULL;
5511           iv_ca_set_cp (data, ivs, use, cp);
5512           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5513           iv_ca_set_no_cp (data, ivs, use);
5514           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5515                                        cp, act_delta);
5516
5517           if (compare_costs (act_cost, best_cost) < 0)
5518             {
5519               best_cost = act_cost;
5520
5521               if (best_delta)
5522                 iv_ca_delta_free (&best_delta);
5523               best_delta = act_delta;
5524             }
5525           else
5526             iv_ca_delta_free (&act_delta);
5527         }
5528     }
5529
5530   iv_ca_delta_commit (data, ivs, best_delta, true);
5531   iv_ca_delta_free (&best_delta);
5532
5533   return !infinite_cost_p (best_cost);
5534 }
5535
5536 /* Finds an initial assignment of candidates to uses.  */
5537
5538 static struct iv_ca *
5539 get_initial_solution (struct ivopts_data *data, bool originalp)
5540 {
5541   struct iv_ca *ivs = iv_ca_new (data);
5542   unsigned i;
5543
5544   for (i = 0; i < n_iv_uses (data); i++)
5545     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5546       {
5547         iv_ca_free (&ivs);
5548         return NULL;
5549       }
5550
5551   return ivs;
5552 }
5553
5554 /* Tries to improve set of induction variables IVS.  */
5555
5556 static bool
5557 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5558 {
5559   unsigned i, n_ivs;
5560   comp_cost acost, best_cost = iv_ca_cost (ivs);
5561   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5562   struct iv_cand *cand;
5563
5564   /* Try extending the set of induction variables by one.  */
5565   for (i = 0; i < n_iv_cands (data); i++)
5566     {
5567       cand = iv_cand (data, i);
5568
5569       if (iv_ca_cand_used_p (ivs, cand))
5570         continue;
5571
5572       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5573       if (!act_delta)
5574         continue;
5575
5576       /* If we successfully added the candidate and the set is small enough,
5577          try optimizing it by removing other candidates.  */
5578       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5579         {
5580           iv_ca_delta_commit (data, ivs, act_delta, true);
5581           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5582           iv_ca_delta_commit (data, ivs, act_delta, false);
5583           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5584         }
5585
5586       if (compare_costs (acost, best_cost) < 0)
5587         {
5588           best_cost = acost;
5589           iv_ca_delta_free (&best_delta);
5590           best_delta = act_delta;
5591         }
5592       else
5593         iv_ca_delta_free (&act_delta);
5594     }
5595
5596   if (!best_delta)
5597     {
5598       /* Try removing the candidates from the set instead.  */
5599       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5600
5601       /* Nothing more we can do.  */
5602       if (!best_delta)
5603         return false;
5604     }
5605
5606   iv_ca_delta_commit (data, ivs, best_delta, true);
5607   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5608   iv_ca_delta_free (&best_delta);
5609   return true;
5610 }
5611
5612 /* Attempts to find the optimal set of induction variables.  We do simple
5613    greedy heuristic -- we try to replace at most one candidate in the selected
5614    solution and remove the unused ivs while this improves the cost.  */
5615
5616 static struct iv_ca *
5617 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5618 {
5619   struct iv_ca *set;
5620
5621   /* Get the initial solution.  */
5622   set = get_initial_solution (data, originalp);
5623   if (!set)
5624     {
5625       if (dump_file && (dump_flags & TDF_DETAILS))
5626         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5627       return NULL;
5628     }
5629
5630   if (dump_file && (dump_flags & TDF_DETAILS))
5631     {
5632       fprintf (dump_file, "Initial set of candidates:\n");
5633       iv_ca_dump (data, dump_file, set);
5634     }
5635
5636   while (try_improve_iv_set (data, set))
5637     {
5638       if (dump_file && (dump_flags & TDF_DETAILS))
5639         {
5640           fprintf (dump_file, "Improved to:\n");
5641           iv_ca_dump (data, dump_file, set);
5642         }
5643     }
5644
5645   return set;
5646 }
5647
5648 static struct iv_ca *
5649 find_optimal_iv_set (struct ivopts_data *data)
5650 {
5651   unsigned i;
5652   struct iv_ca *set, *origset;
5653   struct iv_use *use;
5654   comp_cost cost, origcost;
5655
5656   /* Determine the cost based on a strategy that starts with original IVs,
5657      and try again using a strategy that prefers candidates not based
5658      on any IVs.  */
5659   origset = find_optimal_iv_set_1 (data, true);
5660   set = find_optimal_iv_set_1 (data, false);
5661
5662   if (!origset && !set)
5663     return NULL;
5664
5665   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5666   cost = set ? iv_ca_cost (set) : infinite_cost;
5667
5668   if (dump_file && (dump_flags & TDF_DETAILS))
5669     {
5670       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5671                origcost.cost, origcost.complexity);
5672       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5673                cost.cost, cost.complexity);
5674     }
5675
5676   /* Choose the one with the best cost.  */
5677   if (compare_costs (origcost, cost) <= 0)
5678     {
5679       if (set)
5680         iv_ca_free (&set);
5681       set = origset;
5682     }
5683   else if (origset)
5684     iv_ca_free (&origset);
5685
5686   for (i = 0; i < n_iv_uses (data); i++)
5687     {
5688       use = iv_use (data, i);
5689       use->selected = iv_ca_cand_for_use (set, use)->cand;
5690     }
5691
5692   return set;
5693 }
5694
5695 /* Creates a new induction variable corresponding to CAND.  */
5696
5697 static void
5698 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5699 {
5700   gimple_stmt_iterator incr_pos;
5701   tree base;
5702   bool after = false;
5703
5704   if (!cand->iv)
5705     return;
5706
5707   switch (cand->pos)
5708     {
5709     case IP_NORMAL:
5710       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5711       break;
5712
5713     case IP_END:
5714       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5715       after = true;
5716       break;
5717
5718     case IP_AFTER_USE:
5719       after = true;
5720       /* fall through */
5721     case IP_BEFORE_USE:
5722       incr_pos = gsi_for_stmt (cand->incremented_at);
5723       break;
5724
5725     case IP_ORIGINAL:
5726       /* Mark that the iv is preserved.  */
5727       name_info (data, cand->var_before)->preserve_biv = true;
5728       name_info (data, cand->var_after)->preserve_biv = true;
5729
5730       /* Rewrite the increment so that it uses var_before directly.  */
5731       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5732       return;
5733     }
5734
5735   gimple_add_tmp_var (cand->var_before);
5736   add_referenced_var (cand->var_before);
5737
5738   base = unshare_expr (cand->iv->base);
5739
5740   create_iv (base, unshare_expr (cand->iv->step),
5741              cand->var_before, data->current_loop,
5742              &incr_pos, after, &cand->var_before, &cand->var_after);
5743 }
5744
5745 /* Creates new induction variables described in SET.  */
5746
5747 static void
5748 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5749 {
5750   unsigned i;
5751   struct iv_cand *cand;
5752   bitmap_iterator bi;
5753
5754   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5755     {
5756       cand = iv_cand (data, i);
5757       create_new_iv (data, cand);
5758     }
5759
5760   if (dump_file && (dump_flags & TDF_DETAILS))
5761     {
5762       fprintf (dump_file, "\nSelected IV set: \n");
5763       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5764         {
5765           cand = iv_cand (data, i);
5766           dump_cand (dump_file, cand);
5767         }
5768       fprintf (dump_file, "\n");
5769     }
5770 }
5771
5772 /* Rewrites USE (definition of iv used in a nonlinear expression)
5773    using candidate CAND.  */
5774
5775 static void
5776 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5777                             struct iv_use *use, struct iv_cand *cand)
5778 {
5779   tree comp;
5780   tree op, tgt;
5781   gimple ass;
5782   gimple_stmt_iterator bsi;
5783
5784   /* An important special case -- if we are asked to express value of
5785      the original iv by itself, just exit; there is no need to
5786      introduce a new computation (that might also need casting the
5787      variable to unsigned and back).  */
5788   if (cand->pos == IP_ORIGINAL
5789       && cand->incremented_at == use->stmt)
5790     {
5791       tree step, ctype, utype;
5792       enum tree_code incr_code = PLUS_EXPR, old_code;
5793
5794       gcc_assert (is_gimple_assign (use->stmt));
5795       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5796
5797       step = cand->iv->step;
5798       ctype = TREE_TYPE (step);
5799       utype = TREE_TYPE (cand->var_after);
5800       if (TREE_CODE (step) == NEGATE_EXPR)
5801         {
5802           incr_code = MINUS_EXPR;
5803           step = TREE_OPERAND (step, 0);
5804         }
5805
5806       /* Check whether we may leave the computation unchanged.
5807          This is the case only if it does not rely on other
5808          computations in the loop -- otherwise, the computation
5809          we rely upon may be removed in remove_unused_ivs,
5810          thus leading to ICE.  */
5811       old_code = gimple_assign_rhs_code (use->stmt);
5812       if (old_code == PLUS_EXPR
5813           || old_code == MINUS_EXPR
5814           || old_code == POINTER_PLUS_EXPR)
5815         {
5816           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5817             op = gimple_assign_rhs2 (use->stmt);
5818           else if (old_code != MINUS_EXPR
5819                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5820             op = gimple_assign_rhs1 (use->stmt);
5821           else
5822             op = NULL_TREE;
5823         }
5824       else
5825         op = NULL_TREE;
5826
5827       if (op
5828           && (TREE_CODE (op) == INTEGER_CST
5829               || operand_equal_p (op, step, 0)))
5830         return;
5831
5832       /* Otherwise, add the necessary computations to express
5833          the iv.  */
5834       op = fold_convert (ctype, cand->var_before);
5835       comp = fold_convert (utype,
5836                            build2 (incr_code, ctype, op,
5837                                    unshare_expr (step)));
5838     }
5839   else
5840     {
5841       comp = get_computation (data->current_loop, use, cand);
5842       gcc_assert (comp != NULL_TREE);
5843     }
5844
5845   switch (gimple_code (use->stmt))
5846     {
5847     case GIMPLE_PHI:
5848       tgt = PHI_RESULT (use->stmt);
5849
5850       /* If we should keep the biv, do not replace it.  */
5851       if (name_info (data, tgt)->preserve_biv)
5852         return;
5853
5854       bsi = gsi_after_labels (gimple_bb (use->stmt));
5855       break;
5856
5857     case GIMPLE_ASSIGN:
5858       tgt = gimple_assign_lhs (use->stmt);
5859       bsi = gsi_for_stmt (use->stmt);
5860       break;
5861
5862     default:
5863       gcc_unreachable ();
5864     }
5865
5866   if (!valid_gimple_rhs_p (comp)
5867       || (gimple_code (use->stmt) != GIMPLE_PHI
5868           /* We can't allow re-allocating the stmt as it might be pointed
5869              to still.  */
5870           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5871               >= gimple_num_ops (gsi_stmt (bsi)))))
5872     {
5873       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5874                                        true, GSI_SAME_STMT);
5875       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5876         duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5877     }
5878
5879   if (gimple_code (use->stmt) == GIMPLE_PHI)
5880     {
5881       ass = gimple_build_assign (tgt, comp);
5882       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5883
5884       bsi = gsi_for_stmt (use->stmt);
5885       remove_phi_node (&bsi, false);
5886     }
5887   else
5888     {
5889       gimple_assign_set_rhs_from_tree (&bsi, comp);
5890       use->stmt = gsi_stmt (bsi);
5891     }
5892 }
5893
5894 /* Copies the reference information from OLD_REF to NEW_REF.  */
5895
5896 static void
5897 copy_ref_info (tree new_ref, tree old_ref)
5898 {
5899   tree new_ptr_base = NULL_TREE;
5900
5901   TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5902   TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5903
5904   if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5905     new_ptr_base = TMR_BASE (new_ref);
5906   else if (TREE_CODE (new_ref) == MEM_REF)
5907     new_ptr_base = TREE_OPERAND (new_ref, 0);
5908
5909   /* We can transfer points-to information from an old pointer
5910      or decl base to the new one.  */
5911   if (new_ptr_base
5912       && TREE_CODE (new_ptr_base) == SSA_NAME
5913       && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5914       && !SSA_NAME_PTR_INFO (new_ptr_base))
5915     {
5916       tree base = get_base_address (old_ref);
5917       if (!base)
5918         ;
5919       else if ((INDIRECT_REF_P (base)
5920                 || TREE_CODE (base) == MEM_REF)
5921                && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5922         duplicate_ssa_name_ptr_info
5923           (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5924       else if (TREE_CODE (base) == VAR_DECL
5925                || TREE_CODE (base) == PARM_DECL
5926                || TREE_CODE (base) == RESULT_DECL)
5927         {
5928           struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5929           pt_solution_set_var (&pi->pt, base);
5930         }
5931     }
5932 }
5933
5934 /* Performs a peephole optimization to reorder the iv update statement with
5935    a mem ref to enable instruction combining in later phases. The mem ref uses
5936    the iv value before the update, so the reordering transformation requires
5937    adjustment of the offset. CAND is the selected IV_CAND.
5938
5939    Example:
5940
5941    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
5942    iv2 = iv1 + 1;
5943
5944    if (t < val)      (1)
5945      goto L;
5946    goto Head;
5947
5948
5949    directly propagating t over to (1) will introduce overlapping live range
5950    thus increase register pressure. This peephole transform it into:
5951
5952
5953    iv2 = iv1 + 1;
5954    t = MEM_REF (base, iv2, 8, 8);
5955    if (t < val)
5956      goto L;
5957    goto Head;
5958 */
5959
5960 static void
5961 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5962 {
5963   tree var_after;
5964   gimple iv_update, stmt;
5965   basic_block bb;
5966   gimple_stmt_iterator gsi, gsi_iv;
5967
5968   if (cand->pos != IP_NORMAL)
5969     return;
5970
5971   var_after = cand->var_after;
5972   iv_update = SSA_NAME_DEF_STMT (var_after);
5973
5974   bb = gimple_bb (iv_update);
5975   gsi = gsi_last_nondebug_bb (bb);
5976   stmt = gsi_stmt (gsi);
5977
5978   /* Only handle conditional statement for now.  */
5979   if (gimple_code (stmt) != GIMPLE_COND)
5980     return;
5981
5982   gsi_prev_nondebug (&gsi);
5983   stmt = gsi_stmt (gsi);
5984   if (stmt != iv_update)
5985     return;
5986
5987   gsi_prev_nondebug (&gsi);
5988   if (gsi_end_p (gsi))
5989     return;
5990
5991   stmt = gsi_stmt (gsi);
5992   if (gimple_code (stmt) != GIMPLE_ASSIGN)
5993     return;
5994
5995   if (stmt != use->stmt)
5996     return;
5997
5998   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5999     return;
6000
6001   if (dump_file && (dump_flags & TDF_DETAILS))
6002     {
6003       fprintf (dump_file, "Reordering \n");
6004       print_gimple_stmt (dump_file, iv_update, 0, 0);
6005       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6006       fprintf (dump_file, "\n");
6007     }
6008
6009   gsi = gsi_for_stmt (use->stmt);
6010   gsi_iv = gsi_for_stmt (iv_update);
6011   gsi_move_before (&gsi_iv, &gsi);
6012
6013   cand->pos = IP_BEFORE_USE;
6014   cand->incremented_at = use->stmt;
6015 }
6016
6017 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6018
6019 static void
6020 rewrite_use_address (struct ivopts_data *data,
6021                      struct iv_use *use, struct iv_cand *cand)
6022 {
6023   aff_tree aff;
6024   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6025   tree base_hint = NULL_TREE;
6026   tree ref, iv;
6027   bool ok;
6028
6029   adjust_iv_update_pos (cand, use);
6030   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6031   gcc_assert (ok);
6032   unshare_aff_combination (&aff);
6033
6034   /* To avoid undefined overflow problems, all IV candidates use unsigned
6035      integer types.  The drawback is that this makes it impossible for
6036      create_mem_ref to distinguish an IV that is based on a memory object
6037      from one that represents simply an offset.
6038
6039      To work around this problem, we pass a hint to create_mem_ref that
6040      indicates which variable (if any) in aff is an IV based on a memory
6041      object.  Note that we only consider the candidate.  If this is not
6042      based on an object, the base of the reference is in some subexpression
6043      of the use -- but these will use pointer types, so they are recognized
6044      by the create_mem_ref heuristics anyway.  */
6045   if (cand->iv->base_object)
6046     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6047
6048   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6049   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6050                         reference_alias_ptr_type (*use->op_p),
6051                         iv, base_hint, data->speed);
6052   copy_ref_info (ref, *use->op_p);
6053   *use->op_p = ref;
6054 }
6055
6056 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6057    candidate CAND.  */
6058
6059 static void
6060 rewrite_use_compare (struct ivopts_data *data,
6061                      struct iv_use *use, struct iv_cand *cand)
6062 {
6063   tree comp, *var_p, op, bound;
6064   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6065   enum tree_code compare;
6066   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6067   bool ok;
6068
6069   bound = cp->value;
6070   if (bound)
6071     {
6072       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6073       tree var_type = TREE_TYPE (var);
6074       gimple_seq stmts;
6075
6076       if (dump_file && (dump_flags & TDF_DETAILS))
6077         {
6078           fprintf (dump_file, "Replacing exit test: ");
6079           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6080         }
6081       compare = iv_elimination_compare (data, use);
6082       bound = unshare_expr (fold_convert (var_type, bound));
6083       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6084       if (stmts)
6085         gsi_insert_seq_on_edge_immediate (
6086                 loop_preheader_edge (data->current_loop),
6087                 stmts);
6088
6089       gimple_cond_set_lhs (use->stmt, var);
6090       gimple_cond_set_code (use->stmt, compare);
6091       gimple_cond_set_rhs (use->stmt, op);
6092       return;
6093     }
6094
6095   /* The induction variable elimination failed; just express the original
6096      giv.  */
6097   comp = get_computation (data->current_loop, use, cand);
6098   gcc_assert (comp != NULL_TREE);
6099
6100   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6101   gcc_assert (ok);
6102
6103   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6104                                      true, GSI_SAME_STMT);
6105 }
6106
6107 /* Rewrites USE using candidate CAND.  */
6108
6109 static void
6110 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6111 {
6112   switch (use->type)
6113     {
6114       case USE_NONLINEAR_EXPR:
6115         rewrite_use_nonlinear_expr (data, use, cand);
6116         break;
6117
6118       case USE_ADDRESS:
6119         rewrite_use_address (data, use, cand);
6120         break;
6121
6122       case USE_COMPARE:
6123         rewrite_use_compare (data, use, cand);
6124         break;
6125
6126       default:
6127         gcc_unreachable ();
6128     }
6129
6130   update_stmt (use->stmt);
6131 }
6132
6133 /* Rewrite the uses using the selected induction variables.  */
6134
6135 static void
6136 rewrite_uses (struct ivopts_data *data)
6137 {
6138   unsigned i;
6139   struct iv_cand *cand;
6140   struct iv_use *use;
6141
6142   for (i = 0; i < n_iv_uses (data); i++)
6143     {
6144       use = iv_use (data, i);
6145       cand = use->selected;
6146       gcc_assert (cand);
6147
6148       rewrite_use (data, use, cand);
6149     }
6150 }
6151
6152 /* Removes the ivs that are not used after rewriting.  */
6153
6154 static void
6155 remove_unused_ivs (struct ivopts_data *data)
6156 {
6157   unsigned j;
6158   bitmap_iterator bi;
6159   bitmap toremove = BITMAP_ALLOC (NULL);
6160
6161   /* Figure out an order in which to release SSA DEFs so that we don't
6162      release something that we'd have to propagate into a debug stmt
6163      afterwards.  */
6164   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6165     {
6166       struct version_info *info;
6167
6168       info = ver_info (data, j);
6169       if (info->iv
6170           && !integer_zerop (info->iv->step)
6171           && !info->inv_id
6172           && !info->iv->have_use_for
6173           && !info->preserve_biv)
6174         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6175     }
6176
6177   release_defs_bitset (toremove);
6178
6179   BITMAP_FREE (toremove);
6180 }
6181
6182 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6183    for pointer_map_traverse.  */
6184
6185 static bool
6186 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6187                       void *data ATTRIBUTE_UNUSED)
6188 {
6189   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6190
6191   free (niter);
6192   return true;
6193 }
6194
6195 /* Frees data allocated by the optimization of a single loop.  */
6196
6197 static void
6198 free_loop_data (struct ivopts_data *data)
6199 {
6200   unsigned i, j;
6201   bitmap_iterator bi;
6202   tree obj;
6203
6204   if (data->niters)
6205     {
6206       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6207       pointer_map_destroy (data->niters);
6208       data->niters = NULL;
6209     }
6210
6211   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6212     {
6213       struct version_info *info;
6214
6215       info = ver_info (data, i);
6216       if (info->iv)
6217         free (info->iv);
6218       info->iv = NULL;
6219       info->has_nonlin_use = false;
6220       info->preserve_biv = false;
6221       info->inv_id = 0;
6222     }
6223   bitmap_clear (data->relevant);
6224   bitmap_clear (data->important_candidates);
6225
6226   for (i = 0; i < n_iv_uses (data); i++)
6227     {
6228       struct iv_use *use = iv_use (data, i);
6229
6230       free (use->iv);
6231       BITMAP_FREE (use->related_cands);
6232       for (j = 0; j < use->n_map_members; j++)
6233         if (use->cost_map[j].depends_on)
6234           BITMAP_FREE (use->cost_map[j].depends_on);
6235       free (use->cost_map);
6236       free (use);
6237     }
6238   VEC_truncate (iv_use_p, data->iv_uses, 0);
6239
6240   for (i = 0; i < n_iv_cands (data); i++)
6241     {
6242       struct iv_cand *cand = iv_cand (data, i);
6243
6244       if (cand->iv)
6245         free (cand->iv);
6246       if (cand->depends_on)
6247         BITMAP_FREE (cand->depends_on);
6248       free (cand);
6249     }
6250   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6251
6252   if (data->version_info_size < num_ssa_names)
6253     {
6254       data->version_info_size = 2 * num_ssa_names;
6255       free (data->version_info);
6256       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6257     }
6258
6259   data->max_inv_id = 0;
6260
6261   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6262     SET_DECL_RTL (obj, NULL_RTX);
6263
6264   VEC_truncate (tree, decl_rtl_to_reset, 0);
6265
6266   htab_empty (data->inv_expr_tab);
6267   data->inv_expr_id = 0;
6268 }
6269
6270 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6271    loop tree.  */
6272
6273 static void
6274 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6275 {
6276   free_loop_data (data);
6277   free (data->version_info);
6278   BITMAP_FREE (data->relevant);
6279   BITMAP_FREE (data->important_candidates);
6280
6281   VEC_free (tree, heap, decl_rtl_to_reset);
6282   VEC_free (iv_use_p, heap, data->iv_uses);
6283   VEC_free (iv_cand_p, heap, data->iv_candidates);
6284   htab_delete (data->inv_expr_tab);
6285 }
6286
6287 /* Returns true if the loop body BODY includes any function calls.  */
6288
6289 static bool
6290 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6291 {
6292   gimple_stmt_iterator gsi;
6293   unsigned i;
6294
6295   for (i = 0; i < num_nodes; i++)
6296     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6297       {
6298         gimple stmt = gsi_stmt (gsi);
6299         if (is_gimple_call (stmt)
6300             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6301           return true;
6302       }
6303   return false;
6304 }
6305
6306 /* Optimizes the LOOP.  Returns true if anything changed.  */
6307
6308 static bool
6309 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6310 {
6311   bool changed = false;
6312   struct iv_ca *iv_ca;
6313   edge exit;
6314   basic_block *body;
6315
6316   gcc_assert (!data->niters);
6317   data->current_loop = loop;
6318   data->speed = optimize_loop_for_speed_p (loop);
6319
6320   if (dump_file && (dump_flags & TDF_DETAILS))
6321     {
6322       fprintf (dump_file, "Processing loop %d\n", loop->num);
6323
6324       exit = single_dom_exit (loop);
6325       if (exit)
6326         {
6327           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6328                    exit->src->index, exit->dest->index);
6329           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6330           fprintf (dump_file, "\n");
6331         }
6332
6333       fprintf (dump_file, "\n");
6334     }
6335
6336   body = get_loop_body (loop);
6337   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6338   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6339   free (body);
6340
6341   /* For each ssa name determines whether it behaves as an induction variable
6342      in some loop.  */
6343   if (!find_induction_variables (data))
6344     goto finish;
6345
6346   /* Finds interesting uses (item 1).  */
6347   find_interesting_uses (data);
6348   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6349     goto finish;
6350
6351   /* Finds candidates for the induction variables (item 2).  */
6352   find_iv_candidates (data);
6353
6354   /* Calculates the costs (item 3, part 1).  */
6355   determine_iv_costs (data);
6356   determine_use_iv_costs (data);
6357   determine_set_costs (data);
6358
6359   /* Find the optimal set of induction variables (item 3, part 2).  */
6360   iv_ca = find_optimal_iv_set (data);
6361   if (!iv_ca)
6362     goto finish;
6363   changed = true;
6364
6365   /* Create the new induction variables (item 4, part 1).  */
6366   create_new_ivs (data, iv_ca);
6367   iv_ca_free (&iv_ca);
6368
6369   /* Rewrite the uses (item 4, part 2).  */
6370   rewrite_uses (data);
6371
6372   /* Remove the ivs that are unused after rewriting.  */
6373   remove_unused_ivs (data);
6374
6375   /* We have changed the structure of induction variables; it might happen
6376      that definitions in the scev database refer to some of them that were
6377      eliminated.  */
6378   scev_reset ();
6379
6380 finish:
6381   free_loop_data (data);
6382
6383   return changed;
6384 }
6385
6386 /* Main entry point.  Optimizes induction variables in loops.  */
6387
6388 void
6389 tree_ssa_iv_optimize (void)
6390 {
6391   struct loop *loop;
6392   struct ivopts_data data;
6393   loop_iterator li;
6394
6395   tree_ssa_iv_optimize_init (&data);
6396
6397   /* Optimize the loops starting with the innermost ones.  */
6398   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6399     {
6400       if (dump_file && (dump_flags & TDF_DETAILS))
6401         flow_loop_dump (loop, dump_file, NULL, 1);
6402
6403       tree_ssa_iv_optimize_loop (&data, loop);
6404     }
6405
6406   tree_ssa_iv_optimize_finalize (&data);
6407 }