gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "tree-pretty-print.h"
  74 #include "gimple-pretty-print.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "tree-pass.h"
  80 #include "ggc.h"
  81 #include "insn-config.h"
  82 #include "recog.h"
  83 #include "pointer-set.h"
  84 #include "hashtab.h"
  85 #include "tree-chrec.h"
  86 #include "tree-scalar-evolution.h"
  87 #include "cfgloop.h"
  88 #include "params.h"
  89 #include "langhooks.h"
  90 #include "tree-affine.h"
  91 #include "target.h"
  92 #include "tree-inline.h"
  93 #include "tree-ssa-propagate.h"
  94
  95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  96    cost of different addressing modes.  This should be moved to a TBD
  97    interface between the GIMPLE and RTL worlds.  */
  98 #include "expr.h"
  99
 100 /* The infinite cost.  */
 101 #define INFTY 10000000
 102
 103 #define AVG_LOOP_NITER(LOOP) 5
 104
 105 /* Returns the expected number of loop iterations for LOOP.
 106    The average trip count is computed from profile data if it
 107    exists. */
 108
 109 static inline HOST_WIDE_INT
 110 avg_loop_niter (struct loop *loop)
 111 {
 112   HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
 113   if (niter == -1)
 114     return AVG_LOOP_NITER (loop);
 115
 116   return niter;
 117 }
 118
 119 /* Representation of the induction variable.  */
 120 struct iv
 121 {
 122   tree base;            /* Initial value of the iv.  */
 123   tree base_object;     /* A memory object to that the induction variable points.  */
 124   tree step;            /* Step of the iv (constant only).  */
 125   tree ssa_name;        /* The ssa name with the value.  */
 126   bool biv_p;           /* Is it a biv?  */
 127   bool have_use_for;    /* Do we already have a use for it?  */
 128   unsigned use_id;      /* The identifier in the use if it is the case.  */
 129 };
 130
 131 /* Per-ssa version information (induction variable descriptions, etc.).  */
 132 struct version_info
 133 {
 134   tree name;            /* The ssa name.  */
 135   struct iv *iv;        /* Induction variable description.  */
 136   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 137                            an expression that is not an induction variable.  */
 138   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 139   unsigned inv_id;      /* Id of an invariant.  */
 140 };
 141
 142 /* Types of uses.  */
 143 enum use_type
 144 {
 145   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 146   USE_ADDRESS,          /* Use in an address.  */
 147   USE_COMPARE           /* Use is a compare.  */
 148 };
 149
 150 /* Cost of a computation.  */
 151 typedef struct
 152 {
 153   int cost;             /* The runtime cost.  */
 154   unsigned complexity;  /* The estimate of the complexity of the code for
 155                            the computation (in no concrete units --
 156                            complexity field should be larger for more
 157                            complex expressions and addressing modes).  */
 158 } comp_cost;
 159
 160 static const comp_cost zero_cost = {0, 0};
 161 static const comp_cost infinite_cost = {INFTY, INFTY};
 162
 163 /* The candidate - cost pair.  */
 164 struct cost_pair
 165 {
 166   struct iv_cand *cand; /* The candidate.  */
 167   comp_cost cost;       /* The cost.  */
 168   bitmap depends_on;    /* The list of invariants that have to be
 169                            preserved.  */
 170   tree value;           /* For final value elimination, the expression for
 171                            the final value of the iv.  For iv elimination,
 172                            the new bound to compare with.  */
 173   int inv_expr_id;      /* Loop invariant expression id.  */
 174 };
 175
 176 /* Use.  */
 177 struct iv_use
 178 {
 179   unsigned id;          /* The id of the use.  */
 180   enum use_type type;   /* Type of the use.  */
 181   struct iv *iv;        /* The induction variable it is based on.  */
 182   gimple stmt;          /* Statement in that it occurs.  */
 183   tree *op_p;           /* The place where it occurs.  */
 184   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 185                            important ones.  */
 186
 187   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 188   struct cost_pair *cost_map;
 189                         /* The costs wrto the iv candidates.  */
 190
 191   struct iv_cand *selected;
 192                         /* The selected candidate.  */
 193 };
 194
 195 /* The position where the iv is computed.  */
 196 enum iv_position
 197 {
 198   IP_NORMAL,            /* At the end, just before the exit condition.  */
 199   IP_END,               /* At the end of the latch block.  */
 200   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 201   IP_AFTER_USE,         /* Immediately after a specific use.  */
 202   IP_ORIGINAL           /* The original biv.  */
 203 };
 204
 205 /* The induction variable candidate.  */
 206 struct iv_cand
 207 {
 208   unsigned id;          /* The number of the candidate.  */
 209   bool important;       /* Whether this is an "important" candidate, i.e. such
 210                            that it should be considered by all uses.  */
 211   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 212   gimple incremented_at;/* For original biv, the statement where it is
 213                            incremented.  */
 214   tree var_before;      /* The variable used for it before increment.  */
 215   tree var_after;       /* The variable used for it after increment.  */
 216   struct iv *iv;        /* The value of the candidate.  NULL for
 217                            "pseudocandidate" used to indicate the possibility
 218                            to replace the final value of an iv by direct
 219                            computation of the value.  */
 220   unsigned cost;        /* Cost of the candidate.  */
 221   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 222   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 223                               where it is incremented.  */
 224   bitmap depends_on;    /* The list of invariants that are used in step of the
 225                            biv.  */
 226 };
 227
 228 /* Loop invariant expression hashtable entry.  */
 229 struct iv_inv_expr_ent
 230 {
 231   tree expr;
 232   int id;
 233   hashval_t hash;
 234 };
 235
 236 /* The data used by the induction variable optimizations.  */
 237
 238 typedef struct iv_use *iv_use_p;
 239 DEF_VEC_P(iv_use_p);
 240 DEF_VEC_ALLOC_P(iv_use_p,heap);
 241
 242 typedef struct iv_cand *iv_cand_p;
 243 DEF_VEC_P(iv_cand_p);
 244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 245
 246 struct ivopts_data
 247 {
 248   /* The currently optimized loop.  */
 249   struct loop *current_loop;
 250
 251   /* Numbers of iterations for all exits of the current loop.  */
 252   struct pointer_map_t *niters;
 253
 254   /* Number of registers used in it.  */
 255   unsigned regs_used;
 256
 257   /* The size of version_info array allocated.  */
 258   unsigned version_info_size;
 259
 260   /* The array of information for the ssa names.  */
 261   struct version_info *version_info;
 262
 263   /* The hashtable of loop invariant expressions created
 264      by ivopt.  */
 265   htab_t inv_expr_tab;
 266
 267   /* Loop invariant expression id.  */
 268   int inv_expr_id;
 269
 270   /* The bitmap of indices in version_info whose value was changed.  */
 271   bitmap relevant;
 272
 273   /* The uses of induction variables.  */
 274   VEC(iv_use_p,heap) *iv_uses;
 275
 276   /* The candidates.  */
 277   VEC(iv_cand_p,heap) *iv_candidates;
 278
 279   /* A bitmap of important candidates.  */
 280   bitmap important_candidates;
 281
 282   /* The maximum invariant id.  */
 283   unsigned max_inv_id;
 284
 285   /* Whether to consider just related and important candidates when replacing a
 286      use.  */
 287   bool consider_all_candidates;
 288
 289   /* Are we optimizing for speed?  */
 290   bool speed;
 291
 292   /* Whether the loop body includes any function calls.  */
 293   bool body_includes_call;
 294 };
 295
 296 /* An assignment of iv candidates to uses.  */
 297
 298 struct iv_ca
 299 {
 300   /* The number of uses covered by the assignment.  */
 301   unsigned upto;
 302
 303   /* Number of uses that cannot be expressed by the candidates in the set.  */
 304   unsigned bad_uses;
 305
 306   /* Candidate assigned to a use, together with the related costs.  */
 307   struct cost_pair **cand_for_use;
 308
 309   /* Number of times each candidate is used.  */
 310   unsigned *n_cand_uses;
 311
 312   /* The candidates used.  */
 313   bitmap cands;
 314
 315   /* The number of candidates in the set.  */
 316   unsigned n_cands;
 317
 318   /* Total number of registers needed.  */
 319   unsigned n_regs;
 320
 321   /* Total cost of expressing uses.  */
 322   comp_cost cand_use_cost;
 323
 324   /* Total cost of candidates.  */
 325   unsigned cand_cost;
 326
 327   /* Number of times each invariant is used.  */
 328   unsigned *n_invariant_uses;
 329
 330   /* Total cost of the assignment.  */
 331   comp_cost cost;
 332 };
 333
 334 /* Difference of two iv candidate assignments.  */
 335
 336 struct iv_ca_delta
 337 {
 338   /* Changed use.  */
 339   struct iv_use *use;
 340
 341   /* An old assignment (for rollback purposes).  */
 342   struct cost_pair *old_cp;
 343
 344   /* A new assignment.  */
 345   struct cost_pair *new_cp;
 346
 347   /* Next change in the list.  */
 348   struct iv_ca_delta *next_change;
 349 };
 350
 351 /* Bound on number of candidates below that all candidates are considered.  */
 352
 353 #define CONSIDER_ALL_CANDIDATES_BOUND \
 354   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 355
 356 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 357    optimizing such a loop would help, and it would take ages).  */
 358
 359 #define MAX_CONSIDERED_USES \
 360   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 361
 362 /* If there are at most this number of ivs in the set, try removing unnecessary
 363    ivs from the set always.  */
 364
 365 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 366   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 367
 368 /* The list of trees for that the decl_rtl field must be reset is stored
 369    here.  */
 370
 371 static VEC(tree,heap) *decl_rtl_to_reset;
 372
 373 /* Number of uses recorded in DATA.  */
 374
 375 static inline unsigned
 376 n_iv_uses (struct ivopts_data *data)
 377 {
 378   return VEC_length (iv_use_p, data->iv_uses);
 379 }
 380
 381 /* Ith use recorded in DATA.  */
 382
 383 static inline struct iv_use *
 384 iv_use (struct ivopts_data *data, unsigned i)
 385 {
 386   return VEC_index (iv_use_p, data->iv_uses, i);
 387 }
 388
 389 /* Number of candidates recorded in DATA.  */
 390
 391 static inline unsigned
 392 n_iv_cands (struct ivopts_data *data)
 393 {
 394   return VEC_length (iv_cand_p, data->iv_candidates);
 395 }
 396
 397 /* Ith candidate recorded in DATA.  */
 398
 399 static inline struct iv_cand *
 400 iv_cand (struct ivopts_data *data, unsigned i)
 401 {
 402   return VEC_index (iv_cand_p, data->iv_candidates, i);
 403 }
 404
 405 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 406
 407 edge
 408 single_dom_exit (struct loop *loop)
 409 {
 410   edge exit = single_exit (loop);
 411
 412   if (!exit)
 413     return NULL;
 414
 415   if (!just_once_each_iteration_p (loop, exit->src))
 416     return NULL;
 417
 418   return exit;
 419 }
 420
 421 /* Dumps information about the induction variable IV to FILE.  */
 422
 423 extern void dump_iv (FILE *, struct iv *);
 424 void
 425 dump_iv (FILE *file, struct iv *iv)
 426 {
 427   if (iv->ssa_name)
 428     {
 429       fprintf (file, "ssa name ");
 430       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 431       fprintf (file, "\n");
 432     }
 433
 434   fprintf (file, "  type ");
 435   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 436   fprintf (file, "\n");
 437
 438   if (iv->step)
 439     {
 440       fprintf (file, "  base ");
 441       print_generic_expr (file, iv->base, TDF_SLIM);
 442       fprintf (file, "\n");
 443
 444       fprintf (file, "  step ");
 445       print_generic_expr (file, iv->step, TDF_SLIM);
 446       fprintf (file, "\n");
 447     }
 448   else
 449     {
 450       fprintf (file, "  invariant ");
 451       print_generic_expr (file, iv->base, TDF_SLIM);
 452       fprintf (file, "\n");
 453     }
 454
 455   if (iv->base_object)
 456     {
 457       fprintf (file, "  base object ");
 458       print_generic_expr (file, iv->base_object, TDF_SLIM);
 459       fprintf (file, "\n");
 460     }
 461
 462   if (iv->biv_p)
 463     fprintf (file, "  is a biv\n");
 464 }
 465
 466 /* Dumps information about the USE to FILE.  */
 467
 468 extern void dump_use (FILE *, struct iv_use *);
 469 void
 470 dump_use (FILE *file, struct iv_use *use)
 471 {
 472   fprintf (file, "use %d\n", use->id);
 473
 474   switch (use->type)
 475     {
 476     case USE_NONLINEAR_EXPR:
 477       fprintf (file, "  generic\n");
 478       break;
 479
 480     case USE_ADDRESS:
 481       fprintf (file, "  address\n");
 482       break;
 483
 484     case USE_COMPARE:
 485       fprintf (file, "  compare\n");
 486       break;
 487
 488     default:
 489       gcc_unreachable ();
 490     }
 491
 492   fprintf (file, "  in statement ");
 493   print_gimple_stmt (file, use->stmt, 0, 0);
 494   fprintf (file, "\n");
 495
 496   fprintf (file, "  at position ");
 497   if (use->op_p)
 498     print_generic_expr (file, *use->op_p, TDF_SLIM);
 499   fprintf (file, "\n");
 500
 501   dump_iv (file, use->iv);
 502
 503   if (use->related_cands)
 504     {
 505       fprintf (file, "  related candidates ");
 506       dump_bitmap (file, use->related_cands);
 507     }
 508 }
 509
 510 /* Dumps information about the uses to FILE.  */
 511
 512 extern void dump_uses (FILE *, struct ivopts_data *);
 513 void
 514 dump_uses (FILE *file, struct ivopts_data *data)
 515 {
 516   unsigned i;
 517   struct iv_use *use;
 518
 519   for (i = 0; i < n_iv_uses (data); i++)
 520     {
 521       use = iv_use (data, i);
 522
 523       dump_use (file, use);
 524       fprintf (file, "\n");
 525     }
 526 }
 527
 528 /* Dumps information about induction variable candidate CAND to FILE.  */
 529
 530 extern void dump_cand (FILE *, struct iv_cand *);
 531 void
 532 dump_cand (FILE *file, struct iv_cand *cand)
 533 {
 534   struct iv *iv = cand->iv;
 535
 536   fprintf (file, "candidate %d%s\n",
 537            cand->id, cand->important ? " (important)" : "");
 538
 539   if (cand->depends_on)
 540     {
 541       fprintf (file, "  depends on ");
 542       dump_bitmap (file, cand->depends_on);
 543     }
 544
 545   if (!iv)
 546     {
 547       fprintf (file, "  final value replacement\n");
 548       return;
 549     }
 550
 551   if (cand->var_before)
 552     {
 553       fprintf (file, "  var_before ");
 554       print_generic_expr (file, cand->var_before, TDF_SLIM);
 555       fprintf (file, "\n");
 556     }
 557   if (cand->var_after)
 558     {
 559       fprintf (file, "  var_after ");
 560       print_generic_expr (file, cand->var_after, TDF_SLIM);
 561       fprintf (file, "\n");
 562     }
 563
 564   switch (cand->pos)
 565     {
 566     case IP_NORMAL:
 567       fprintf (file, "  incremented before exit test\n");
 568       break;
 569
 570     case IP_BEFORE_USE:
 571       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 572       break;
 573
 574     case IP_AFTER_USE:
 575       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 576       break;
 577
 578     case IP_END:
 579       fprintf (file, "  incremented at end\n");
 580       break;
 581
 582     case IP_ORIGINAL:
 583       fprintf (file, "  original biv\n");
 584       break;
 585     }
 586
 587   dump_iv (file, iv);
 588 }
 589
 590 /* Returns the info for ssa version VER.  */
 591
 592 static inline struct version_info *
 593 ver_info (struct ivopts_data *data, unsigned ver)
 594 {
 595   return data->version_info + ver;
 596 }
 597
 598 /* Returns the info for ssa name NAME.  */
 599
 600 static inline struct version_info *
 601 name_info (struct ivopts_data *data, tree name)
 602 {
 603   return ver_info (data, SSA_NAME_VERSION (name));
 604 }
 605
 606 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 607    emitted in LOOP.  */
 608
 609 static bool
 610 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 611 {
 612   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 613
 614   gcc_assert (bb);
 615
 616   if (sbb == loop->latch)
 617     return true;
 618
 619   if (sbb != bb)
 620     return false;
 621
 622   return stmt == last_stmt (bb);
 623 }
 624
 625 /* Returns true if STMT if after the place where the original induction
 626    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 627    if the positions are identical.  */
 628
 629 static bool
 630 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 631 {
 632   basic_block cand_bb = gimple_bb (cand->incremented_at);
 633   basic_block stmt_bb = gimple_bb (stmt);
 634
 635   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 636     return false;
 637
 638   if (stmt_bb != cand_bb)
 639     return true;
 640
 641   if (true_if_equal
 642       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 643     return true;
 644   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 645 }
 646
 647 /* Returns true if STMT if after the place where the induction variable
 648    CAND is incremented in LOOP.  */
 649
 650 static bool
 651 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 652 {
 653   switch (cand->pos)
 654     {
 655     case IP_END:
 656       return false;
 657
 658     case IP_NORMAL:
 659       return stmt_after_ip_normal_pos (loop, stmt);
 660
 661     case IP_ORIGINAL:
 662     case IP_AFTER_USE:
 663       return stmt_after_inc_pos (cand, stmt, false);
 664
 665     case IP_BEFORE_USE:
 666       return stmt_after_inc_pos (cand, stmt, true);
 667
 668     default:
 669       gcc_unreachable ();
 670     }
 671 }
 672
 673 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 674
 675 static bool
 676 abnormal_ssa_name_p (tree exp)
 677 {
 678   if (!exp)
 679     return false;
 680
 681   if (TREE_CODE (exp) != SSA_NAME)
 682     return false;
 683
 684   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 685 }
 686
 687 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 688    abnormal phi node.  Callback for for_each_index.  */
 689
 690 static bool
 691 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 692                                   void *data ATTRIBUTE_UNUSED)
 693 {
 694   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 695     {
 696       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 697         return false;
 698       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 699         return false;
 700     }
 701
 702   return !abnormal_ssa_name_p (*index);
 703 }
 704
 705 /* Returns true if EXPR contains a ssa name that occurs in an
 706    abnormal phi node.  */
 707
 708 bool
 709 contains_abnormal_ssa_name_p (tree expr)
 710 {
 711   enum tree_code code;
 712   enum tree_code_class codeclass;
 713
 714   if (!expr)
 715     return false;
 716
 717   code = TREE_CODE (expr);
 718   codeclass = TREE_CODE_CLASS (code);
 719
 720   if (code == SSA_NAME)
 721     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 722
 723   if (code == INTEGER_CST
 724       || is_gimple_min_invariant (expr))
 725     return false;
 726
 727   if (code == ADDR_EXPR)
 728     return !for_each_index (&TREE_OPERAND (expr, 0),
 729                             idx_contains_abnormal_ssa_name_p,
 730                             NULL);
 731
 732   if (code == COND_EXPR)
 733     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 734       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 735       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 736
 737   switch (codeclass)
 738     {
 739     case tcc_binary:
 740     case tcc_comparison:
 741       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 742         return true;
 743
 744       /* Fallthru.  */
 745     case tcc_unary:
 746       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 747         return true;
 748
 749       break;
 750
 751     default:
 752       gcc_unreachable ();
 753     }
 754
 755   return false;
 756 }
 757
 758 /*  Returns tree describing number of iterations determined from
 759     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 760
 761 static tree
 762 niter_for_exit (struct ivopts_data *data, edge exit,
 763                 struct tree_niter_desc **desc_p)
 764 {
 765   struct tree_niter_desc* desc = NULL;
 766   tree niter;
 767   void **slot;
 768
 769   if (!data->niters)
 770     {
 771       data->niters = pointer_map_create ();
 772       slot = NULL;
 773     }
 774   else
 775     slot = pointer_map_contains (data->niters, exit);
 776
 777   if (!slot)
 778     {
 779       /* Try to determine number of iterations.  We must know it
 780          unconditionally (i.e., without possibility of # of iterations
 781          being zero).  Also, we cannot safely work with ssa names that
 782          appear in phi nodes on abnormal edges, so that we do not create
 783          overlapping life ranges for them (PR 27283).  */
 784       desc = XNEW (struct tree_niter_desc);
 785       if (number_of_iterations_exit (data->current_loop,
 786                                      exit, desc, true)
 787           && integer_zerop (desc->may_be_zero)
 788           && !contains_abnormal_ssa_name_p (desc->niter))
 789         niter = desc->niter;
 790       else
 791         niter = NULL_TREE;
 792
 793       desc->niter = niter;
 794       slot = pointer_map_insert (data->niters, exit);
 795       *slot = desc;
 796     }
 797   else
 798     niter = ((struct tree_niter_desc *) *slot)->niter;
 799
 800   if (desc_p)
 801     *desc_p = (struct tree_niter_desc *) *slot;
 802   return niter;
 803 }
 804
 805 /* Returns tree describing number of iterations determined from
 806    single dominating exit of DATA->current_loop, or NULL if something
 807    goes wrong.  */
 808
 809 static tree
 810 niter_for_single_dom_exit (struct ivopts_data *data)
 811 {
 812   edge exit = single_dom_exit (data->current_loop);
 813
 814   if (!exit)
 815     return NULL;
 816
 817   return niter_for_exit (data, exit, NULL);
 818 }
 819
 820 /* Hash table equality function for expressions.  */
 821
 822 static int
 823 htab_inv_expr_eq (const void *ent1, const void *ent2)
 824 {
 825   const struct iv_inv_expr_ent *expr1 =
 826       (const struct iv_inv_expr_ent *)ent1;
 827   const struct iv_inv_expr_ent *expr2 =
 828       (const struct iv_inv_expr_ent *)ent2;
 829
 830   return operand_equal_p (expr1->expr, expr2->expr, 0);
 831 }
 832
 833 /* Hash function for loop invariant expressions.  */
 834
 835 static hashval_t
 836 htab_inv_expr_hash (const void *ent)
 837 {
 838   const struct iv_inv_expr_ent *expr =
 839       (const struct iv_inv_expr_ent *)ent;
 840   return expr->hash;
 841 }
 842
 843 /* Initializes data structures used by the iv optimization pass, stored
 844    in DATA.  */
 845
 846 static void
 847 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 848 {
 849   data->version_info_size = 2 * num_ssa_names;
 850   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 851   data->relevant = BITMAP_ALLOC (NULL);
 852   data->important_candidates = BITMAP_ALLOC (NULL);
 853   data->max_inv_id = 0;
 854   data->niters = NULL;
 855   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 856   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 857   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 858                                     htab_inv_expr_eq, free);
 859   data->inv_expr_id = 0;
 860   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 861 }
 862
 863 /* Returns a memory object to that EXPR points.  In case we are able to
 864    determine that it does not point to any such object, NULL is returned.  */
 865
 866 static tree
 867 determine_base_object (tree expr)
 868 {
 869   enum tree_code code = TREE_CODE (expr);
 870   tree base, obj;
 871
 872   /* If this is a pointer casted to any type, we need to determine
 873      the base object for the pointer; so handle conversions before
 874      throwing away non-pointer expressions.  */
 875   if (CONVERT_EXPR_P (expr))
 876     return determine_base_object (TREE_OPERAND (expr, 0));
 877
 878   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 879     return NULL_TREE;
 880
 881   switch (code)
 882     {
 883     case INTEGER_CST:
 884       return NULL_TREE;
 885
 886     case ADDR_EXPR:
 887       obj = TREE_OPERAND (expr, 0);
 888       base = get_base_address (obj);
 889
 890       if (!base)
 891         return expr;
 892
 893       if (TREE_CODE (base) == MEM_REF)
 894         return determine_base_object (TREE_OPERAND (base, 0));
 895
 896       return fold_convert (ptr_type_node,
 897                            build_fold_addr_expr (base));
 898
 899     case POINTER_PLUS_EXPR:
 900       return determine_base_object (TREE_OPERAND (expr, 0));
 901
 902     case PLUS_EXPR:
 903     case MINUS_EXPR:
 904       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 905       gcc_unreachable ();
 906
 907     default:
 908       return fold_convert (ptr_type_node, expr);
 909     }
 910 }
 911
 912 /* Allocates an induction variable with given initial value BASE and step STEP
 913    for loop LOOP.  */
 914
 915 static struct iv *
 916 alloc_iv (tree base, tree step)
 917 {
 918   struct iv *iv = XCNEW (struct iv);
 919   gcc_assert (step != NULL_TREE);
 920
 921   iv->base = base;
 922   iv->base_object = determine_base_object (base);
 923   iv->step = step;
 924   iv->biv_p = false;
 925   iv->have_use_for = false;
 926   iv->use_id = 0;
 927   iv->ssa_name = NULL_TREE;
 928
 929   return iv;
 930 }
 931
 932 /* Sets STEP and BASE for induction variable IV.  */
 933
 934 static void
 935 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 936 {
 937   struct version_info *info = name_info (data, iv);
 938
 939   gcc_assert (!info->iv);
 940
 941   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 942   info->iv = alloc_iv (base, step);
 943   info->iv->ssa_name = iv;
 944 }
 945
 946 /* Finds induction variable declaration for VAR.  */
 947
 948 static struct iv *
 949 get_iv (struct ivopts_data *data, tree var)
 950 {
 951   basic_block bb;
 952   tree type = TREE_TYPE (var);
 953
 954   if (!POINTER_TYPE_P (type)
 955       && !INTEGRAL_TYPE_P (type))
 956     return NULL;
 957
 958   if (!name_info (data, var)->iv)
 959     {
 960       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 961
 962       if (!bb
 963           || !flow_bb_inside_loop_p (data->current_loop, bb))
 964         set_iv (data, var, var, build_int_cst (type, 0));
 965     }
 966
 967   return name_info (data, var)->iv;
 968 }
 969
 970 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 971    not define a simple affine biv with nonzero step.  */
 972
 973 static tree
 974 determine_biv_step (gimple phi)
 975 {
 976   struct loop *loop = gimple_bb (phi)->loop_father;
 977   tree name = PHI_RESULT (phi);
 978   affine_iv iv;
 979
 980   if (!is_gimple_reg (name))
 981     return NULL_TREE;
 982
 983   if (!simple_iv (loop, loop, name, &iv, true))
 984     return NULL_TREE;
 985
 986   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 987 }
 988
 989 /* Finds basic ivs.  */
 990
 991 static bool
 992 find_bivs (struct ivopts_data *data)
 993 {
 994   gimple phi;
 995   tree step, type, base;
 996   bool found = false;
 997   struct loop *loop = data->current_loop;
 998   gimple_stmt_iterator psi;
 999
1000   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1001     {
1002       phi = gsi_stmt (psi);
1003
1004       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1005         continue;
1006
1007       step = determine_biv_step (phi);
1008       if (!step)
1009         continue;
1010
1011       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1012       base = expand_simple_operations (base);
1013       if (contains_abnormal_ssa_name_p (base)
1014           || contains_abnormal_ssa_name_p (step))
1015         continue;
1016
1017       type = TREE_TYPE (PHI_RESULT (phi));
1018       base = fold_convert (type, base);
1019       if (step)
1020         {
1021           if (POINTER_TYPE_P (type))
1022             step = fold_convert (sizetype, step);
1023           else
1024             step = fold_convert (type, step);
1025         }
1026
1027       set_iv (data, PHI_RESULT (phi), base, step);
1028       found = true;
1029     }
1030
1031   return found;
1032 }
1033
1034 /* Marks basic ivs.  */
1035
1036 static void
1037 mark_bivs (struct ivopts_data *data)
1038 {
1039   gimple phi;
1040   tree var;
1041   struct iv *iv, *incr_iv;
1042   struct loop *loop = data->current_loop;
1043   basic_block incr_bb;
1044   gimple_stmt_iterator psi;
1045
1046   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1047     {
1048       phi = gsi_stmt (psi);
1049
1050       iv = get_iv (data, PHI_RESULT (phi));
1051       if (!iv)
1052         continue;
1053
1054       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1055       incr_iv = get_iv (data, var);
1056       if (!incr_iv)
1057         continue;
1058
1059       /* If the increment is in the subloop, ignore it.  */
1060       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1061       if (incr_bb->loop_father != data->current_loop
1062           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1063         continue;
1064
1065       iv->biv_p = true;
1066       incr_iv->biv_p = true;
1067     }
1068 }
1069
1070 /* Checks whether STMT defines a linear induction variable and stores its
1071    parameters to IV.  */
1072
1073 static bool
1074 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1075 {
1076   tree lhs;
1077   struct loop *loop = data->current_loop;
1078
1079   iv->base = NULL_TREE;
1080   iv->step = NULL_TREE;
1081
1082   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1083     return false;
1084
1085   lhs = gimple_assign_lhs (stmt);
1086   if (TREE_CODE (lhs) != SSA_NAME)
1087     return false;
1088
1089   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1090     return false;
1091   iv->base = expand_simple_operations (iv->base);
1092
1093   if (contains_abnormal_ssa_name_p (iv->base)
1094       || contains_abnormal_ssa_name_p (iv->step))
1095     return false;
1096
1097   return true;
1098 }
1099
1100 /* Finds general ivs in statement STMT.  */
1101
1102 static void
1103 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1104 {
1105   affine_iv iv;
1106
1107   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1108     return;
1109
1110   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1111 }
1112
1113 /* Finds general ivs in basic block BB.  */
1114
1115 static void
1116 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1117 {
1118   gimple_stmt_iterator bsi;
1119
1120   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1121     find_givs_in_stmt (data, gsi_stmt (bsi));
1122 }
1123
1124 /* Finds general ivs.  */
1125
1126 static void
1127 find_givs (struct ivopts_data *data)
1128 {
1129   struct loop *loop = data->current_loop;
1130   basic_block *body = get_loop_body_in_dom_order (loop);
1131   unsigned i;
1132
1133   for (i = 0; i < loop->num_nodes; i++)
1134     find_givs_in_bb (data, body[i]);
1135   free (body);
1136 }
1137
1138 /* For each ssa name defined in LOOP determines whether it is an induction
1139    variable and if so, its initial value and step.  */
1140
1141 static bool
1142 find_induction_variables (struct ivopts_data *data)
1143 {
1144   unsigned i;
1145   bitmap_iterator bi;
1146
1147   if (!find_bivs (data))
1148     return false;
1149
1150   find_givs (data);
1151   mark_bivs (data);
1152
1153   if (dump_file && (dump_flags & TDF_DETAILS))
1154     {
1155       tree niter = niter_for_single_dom_exit (data);
1156
1157       if (niter)
1158         {
1159           fprintf (dump_file, "  number of iterations ");
1160           print_generic_expr (dump_file, niter, TDF_SLIM);
1161           fprintf (dump_file, "\n\n");
1162         };
1163
1164       fprintf (dump_file, "Induction variables:\n\n");
1165
1166       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1167         {
1168           if (ver_info (data, i)->iv)
1169             dump_iv (dump_file, ver_info (data, i)->iv);
1170         }
1171     }
1172
1173   return true;
1174 }
1175
1176 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1177
1178 static struct iv_use *
1179 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1180             gimple stmt, enum use_type use_type)
1181 {
1182   struct iv_use *use = XCNEW (struct iv_use);
1183
1184   use->id = n_iv_uses (data);
1185   use->type = use_type;
1186   use->iv = iv;
1187   use->stmt = stmt;
1188   use->op_p = use_p;
1189   use->related_cands = BITMAP_ALLOC (NULL);
1190
1191   /* To avoid showing ssa name in the dumps, if it was not reset by the
1192      caller.  */
1193   iv->ssa_name = NULL_TREE;
1194
1195   if (dump_file && (dump_flags & TDF_DETAILS))
1196     dump_use (dump_file, use);
1197
1198   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1199
1200   return use;
1201 }
1202
1203 /* Checks whether OP is a loop-level invariant and if so, records it.
1204    NONLINEAR_USE is true if the invariant is used in a way we do not
1205    handle specially.  */
1206
1207 static void
1208 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1209 {
1210   basic_block bb;
1211   struct version_info *info;
1212
1213   if (TREE_CODE (op) != SSA_NAME
1214       || !is_gimple_reg (op))
1215     return;
1216
1217   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1218   if (bb
1219       && flow_bb_inside_loop_p (data->current_loop, bb))
1220     return;
1221
1222   info = name_info (data, op);
1223   info->name = op;
1224   info->has_nonlin_use |= nonlinear_use;
1225   if (!info->inv_id)
1226     info->inv_id = ++data->max_inv_id;
1227   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1228 }
1229
1230 /* Checks whether the use OP is interesting and if so, records it.  */
1231
1232 static struct iv_use *
1233 find_interesting_uses_op (struct ivopts_data *data, tree op)
1234 {
1235   struct iv *iv;
1236   struct iv *civ;
1237   gimple stmt;
1238   struct iv_use *use;
1239
1240   if (TREE_CODE (op) != SSA_NAME)
1241     return NULL;
1242
1243   iv = get_iv (data, op);
1244   if (!iv)
1245     return NULL;
1246
1247   if (iv->have_use_for)
1248     {
1249       use = iv_use (data, iv->use_id);
1250
1251       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1252       return use;
1253     }
1254
1255   if (integer_zerop (iv->step))
1256     {
1257       record_invariant (data, op, true);
1258       return NULL;
1259     }
1260   iv->have_use_for = true;
1261
1262   civ = XNEW (struct iv);
1263   *civ = *iv;
1264
1265   stmt = SSA_NAME_DEF_STMT (op);
1266   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1267               || is_gimple_assign (stmt));
1268
1269   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1270   iv->use_id = use->id;
1271
1272   return use;
1273 }
1274
1275 /* Given a condition in statement STMT, checks whether it is a compare
1276    of an induction variable and an invariant.  If this is the case,
1277    CONTROL_VAR is set to location of the iv, BOUND to the location of
1278    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1279    induction variable descriptions, and true is returned.  If this is not
1280    the case, CONTROL_VAR and BOUND are set to the arguments of the
1281    condition and false is returned.  */
1282
1283 static bool
1284 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1285                        tree **control_var, tree **bound,
1286                        struct iv **iv_var, struct iv **iv_bound)
1287 {
1288   /* The objects returned when COND has constant operands.  */
1289   static struct iv const_iv;
1290   static tree zero;
1291   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1292   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1293   bool ret = false;
1294
1295   if (gimple_code (stmt) == GIMPLE_COND)
1296     {
1297       op0 = gimple_cond_lhs_ptr (stmt);
1298       op1 = gimple_cond_rhs_ptr (stmt);
1299     }
1300   else
1301     {
1302       op0 = gimple_assign_rhs1_ptr (stmt);
1303       op1 = gimple_assign_rhs2_ptr (stmt);
1304     }
1305
1306   zero = integer_zero_node;
1307   const_iv.step = integer_zero_node;
1308
1309   if (TREE_CODE (*op0) == SSA_NAME)
1310     iv0 = get_iv (data, *op0);
1311   if (TREE_CODE (*op1) == SSA_NAME)
1312     iv1 = get_iv (data, *op1);
1313
1314   /* Exactly one of the compared values must be an iv, and the other one must
1315      be an invariant.  */
1316   if (!iv0 || !iv1)
1317     goto end;
1318
1319   if (integer_zerop (iv0->step))
1320     {
1321       /* Control variable may be on the other side.  */
1322       tmp_op = op0; op0 = op1; op1 = tmp_op;
1323       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1324     }
1325   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1326
1327 end:
1328   if (control_var)
1329     *control_var = op0;;
1330   if (iv_var)
1331     *iv_var = iv0;;
1332   if (bound)
1333     *bound = op1;
1334   if (iv_bound)
1335     *iv_bound = iv1;
1336
1337   return ret;
1338 }
1339
1340 /* Checks whether the condition in STMT is interesting and if so,
1341    records it.  */
1342
1343 static void
1344 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1345 {
1346   tree *var_p, *bound_p;
1347   struct iv *var_iv, *civ;
1348
1349   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1350     {
1351       find_interesting_uses_op (data, *var_p);
1352       find_interesting_uses_op (data, *bound_p);
1353       return;
1354     }
1355
1356   civ = XNEW (struct iv);
1357   *civ = *var_iv;
1358   record_use (data, NULL, civ, stmt, USE_COMPARE);
1359 }
1360
1361 /* Returns true if expression EXPR is obviously invariant in LOOP,
1362    i.e. if all its operands are defined outside of the LOOP.  LOOP
1363    should not be the function body.  */
1364
1365 bool
1366 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1367 {
1368   basic_block def_bb;
1369   unsigned i, len;
1370
1371   gcc_assert (loop_depth (loop) > 0);
1372
1373   if (is_gimple_min_invariant (expr))
1374     return true;
1375
1376   if (TREE_CODE (expr) == SSA_NAME)
1377     {
1378       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1379       if (def_bb
1380           && flow_bb_inside_loop_p (loop, def_bb))
1381         return false;
1382
1383       return true;
1384     }
1385
1386   if (!EXPR_P (expr))
1387     return false;
1388
1389   len = TREE_OPERAND_LENGTH (expr);
1390   for (i = 0; i < len; i++)
1391     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1392       return false;
1393
1394   return true;
1395 }
1396
1397 /* Returns true if statement STMT is obviously invariant in LOOP,
1398    i.e. if all its operands on the RHS are defined outside of the LOOP.
1399    LOOP should not be the function body.  */
1400
1401 bool
1402 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1403 {
1404   unsigned i;
1405   tree lhs;
1406
1407   gcc_assert (loop_depth (loop) > 0);
1408
1409   lhs = gimple_get_lhs (stmt);
1410   for (i = 0; i < gimple_num_ops (stmt); i++)
1411     {
1412       tree op = gimple_op (stmt, i);
1413       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1414         return false;
1415     }
1416
1417   return true;
1418 }
1419
1420 /* Cumulates the steps of indices into DATA and replaces their values with the
1421    initial ones.  Returns false when the value of the index cannot be determined.
1422    Callback for for_each_index.  */
1423
1424 struct ifs_ivopts_data
1425 {
1426   struct ivopts_data *ivopts_data;
1427   gimple stmt;
1428   tree step;
1429 };
1430
1431 static bool
1432 idx_find_step (tree base, tree *idx, void *data)
1433 {
1434   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1435   struct iv *iv;
1436   tree step, iv_base, iv_step, lbound, off;
1437   struct loop *loop = dta->ivopts_data->current_loop;
1438
1439   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1440     return false;
1441
1442   /* If base is a component ref, require that the offset of the reference
1443      be invariant.  */
1444   if (TREE_CODE (base) == COMPONENT_REF)
1445     {
1446       off = component_ref_field_offset (base);
1447       return expr_invariant_in_loop_p (loop, off);
1448     }
1449
1450   /* If base is array, first check whether we will be able to move the
1451      reference out of the loop (in order to take its address in strength
1452      reduction).  In order for this to work we need both lower bound
1453      and step to be loop invariants.  */
1454   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1455     {
1456       /* Moreover, for a range, the size needs to be invariant as well.  */
1457       if (TREE_CODE (base) == ARRAY_RANGE_REF
1458           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1459         return false;
1460
1461       step = array_ref_element_size (base);
1462       lbound = array_ref_low_bound (base);
1463
1464       if (!expr_invariant_in_loop_p (loop, step)
1465           || !expr_invariant_in_loop_p (loop, lbound))
1466         return false;
1467     }
1468
1469   if (TREE_CODE (*idx) != SSA_NAME)
1470     return true;
1471
1472   iv = get_iv (dta->ivopts_data, *idx);
1473   if (!iv)
1474     return false;
1475
1476   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1477           *&x[0], which is not folded and does not trigger the
1478           ARRAY_REF path below.  */
1479   *idx = iv->base;
1480
1481   if (integer_zerop (iv->step))
1482     return true;
1483
1484   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1485     {
1486       step = array_ref_element_size (base);
1487
1488       /* We only handle addresses whose step is an integer constant.  */
1489       if (TREE_CODE (step) != INTEGER_CST)
1490         return false;
1491     }
1492   else
1493     /* The step for pointer arithmetics already is 1 byte.  */
1494     step = size_one_node;
1495
1496   iv_base = iv->base;
1497   iv_step = iv->step;
1498   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1499                             sizetype, &iv_base, &iv_step, dta->stmt,
1500                             false))
1501     {
1502       /* The index might wrap.  */
1503       return false;
1504     }
1505
1506   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1507   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1508
1509   return true;
1510 }
1511
1512 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1513    object is passed to it in DATA.  */
1514
1515 static bool
1516 idx_record_use (tree base, tree *idx,
1517                 void *vdata)
1518 {
1519   struct ivopts_data *data = (struct ivopts_data *) vdata;
1520   find_interesting_uses_op (data, *idx);
1521   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1522     {
1523       find_interesting_uses_op (data, array_ref_element_size (base));
1524       find_interesting_uses_op (data, array_ref_low_bound (base));
1525     }
1526   return true;
1527 }
1528
1529 /* If we can prove that TOP = cst * BOT for some constant cst,
1530    store cst to MUL and return true.  Otherwise return false.
1531    The returned value is always sign-extended, regardless of the
1532    signedness of TOP and BOT.  */
1533
1534 static bool
1535 constant_multiple_of (tree top, tree bot, double_int *mul)
1536 {
1537   tree mby;
1538   enum tree_code code;
1539   double_int res, p0, p1;
1540   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1541
1542   STRIP_NOPS (top);
1543   STRIP_NOPS (bot);
1544
1545   if (operand_equal_p (top, bot, 0))
1546     {
1547       *mul = double_int_one;
1548       return true;
1549     }
1550
1551   code = TREE_CODE (top);
1552   switch (code)
1553     {
1554     case MULT_EXPR:
1555       mby = TREE_OPERAND (top, 1);
1556       if (TREE_CODE (mby) != INTEGER_CST)
1557         return false;
1558
1559       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1560         return false;
1561
1562       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1563                               precision);
1564       return true;
1565
1566     case PLUS_EXPR:
1567     case MINUS_EXPR:
1568       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1569           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1570         return false;
1571
1572       if (code == MINUS_EXPR)
1573         p1 = double_int_neg (p1);
1574       *mul = double_int_sext (double_int_add (p0, p1), precision);
1575       return true;
1576
1577     case INTEGER_CST:
1578       if (TREE_CODE (bot) != INTEGER_CST)
1579         return false;
1580
1581       p0 = double_int_sext (tree_to_double_int (top), precision);
1582       p1 = double_int_sext (tree_to_double_int (bot), precision);
1583       if (double_int_zero_p (p1))
1584         return false;
1585       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1586                               precision);
1587       return double_int_zero_p (res);
1588
1589     default:
1590       return false;
1591     }
1592 }
1593
1594 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1595
1596 static bool
1597 may_be_unaligned_p (tree ref, tree step)
1598 {
1599   tree base;
1600   tree base_type;
1601   HOST_WIDE_INT bitsize;
1602   HOST_WIDE_INT bitpos;
1603   tree toffset;
1604   enum machine_mode mode;
1605   int unsignedp, volatilep;
1606   unsigned base_align;
1607
1608   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1609      thus they are not misaligned.  */
1610   if (TREE_CODE (ref) == TARGET_MEM_REF)
1611     return false;
1612
1613   /* The test below is basically copy of what expr.c:normal_inner_ref
1614      does to check whether the object must be loaded by parts when
1615      STRICT_ALIGNMENT is true.  */
1616   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1617                               &unsignedp, &volatilep, true);
1618   base_type = TREE_TYPE (base);
1619   base_align = TYPE_ALIGN (base_type);
1620
1621   if (mode != BLKmode)
1622     {
1623       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1624
1625       if (base_align < mode_align
1626           || (bitpos % mode_align) != 0
1627           || (bitpos % BITS_PER_UNIT) != 0)
1628         return true;
1629
1630       if (toffset
1631           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1632         return true;
1633
1634       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1635         return true;
1636     }
1637
1638   return false;
1639 }
1640
1641 /* Return true if EXPR may be non-addressable.   */
1642
1643 static bool
1644 may_be_nonaddressable_p (tree expr)
1645 {
1646   switch (TREE_CODE (expr))
1647     {
1648     case TARGET_MEM_REF:
1649       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1650          target, thus they are always addressable.  */
1651       return false;
1652
1653     case COMPONENT_REF:
1654       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1655              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1656
1657     case VIEW_CONVERT_EXPR:
1658       /* This kind of view-conversions may wrap non-addressable objects
1659          and make them look addressable.  After some processing the
1660          non-addressability may be uncovered again, causing ADDR_EXPRs
1661          of inappropriate objects to be built.  */
1662       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1663           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1664         return true;
1665
1666       /* ... fall through ... */
1667
1668     case ARRAY_REF:
1669     case ARRAY_RANGE_REF:
1670       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1671
1672     CASE_CONVERT:
1673       return true;
1674
1675     default:
1676       break;
1677     }
1678
1679   return false;
1680 }
1681
1682 /* Finds addresses in *OP_P inside STMT.  */
1683
1684 static void
1685 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1686 {
1687   tree base = *op_p, step = size_zero_node;
1688   struct iv *civ;
1689   struct ifs_ivopts_data ifs_ivopts_data;
1690
1691   /* Do not play with volatile memory references.  A bit too conservative,
1692      perhaps, but safe.  */
1693   if (gimple_has_volatile_ops (stmt))
1694     goto fail;
1695
1696   /* Ignore bitfields for now.  Not really something terribly complicated
1697      to handle.  TODO.  */
1698   if (TREE_CODE (base) == BIT_FIELD_REF)
1699     goto fail;
1700
1701   base = unshare_expr (base);
1702
1703   if (TREE_CODE (base) == TARGET_MEM_REF)
1704     {
1705       tree type = build_pointer_type (TREE_TYPE (base));
1706       tree astep;
1707
1708       if (TMR_BASE (base)
1709           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1710         {
1711           civ = get_iv (data, TMR_BASE (base));
1712           if (!civ)
1713             goto fail;
1714
1715           TMR_BASE (base) = civ->base;
1716           step = civ->step;
1717         }
1718       if (TMR_INDEX (base)
1719           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1720         {
1721           civ = get_iv (data, TMR_INDEX (base));
1722           if (!civ)
1723             goto fail;
1724
1725           TMR_INDEX (base) = civ->base;
1726           astep = civ->step;
1727
1728           if (astep)
1729             {
1730               if (TMR_STEP (base))
1731                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1732
1733               step = fold_build2 (PLUS_EXPR, type, step, astep);
1734             }
1735         }
1736
1737       if (integer_zerop (step))
1738         goto fail;
1739       base = tree_mem_ref_addr (type, base);
1740     }
1741   else
1742     {
1743       ifs_ivopts_data.ivopts_data = data;
1744       ifs_ivopts_data.stmt = stmt;
1745       ifs_ivopts_data.step = size_zero_node;
1746       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1747           || integer_zerop (ifs_ivopts_data.step))
1748         goto fail;
1749       step = ifs_ivopts_data.step;
1750
1751       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1752
1753       /* Check that the base expression is addressable.  This needs
1754          to be done after substituting bases of IVs into it.  */
1755       if (may_be_nonaddressable_p (base))
1756         goto fail;
1757
1758       /* Moreover, on strict alignment platforms, check that it is
1759          sufficiently aligned.  */
1760       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1761         goto fail;
1762
1763       base = build_fold_addr_expr (base);
1764
1765       /* Substituting bases of IVs into the base expression might
1766          have caused folding opportunities.  */
1767       if (TREE_CODE (base) == ADDR_EXPR)
1768         {
1769           tree *ref = &TREE_OPERAND (base, 0);
1770           while (handled_component_p (*ref))
1771             ref = &TREE_OPERAND (*ref, 0);
1772           if (TREE_CODE (*ref) == MEM_REF)
1773             {
1774               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1775                                       TREE_OPERAND (*ref, 0),
1776                                       TREE_OPERAND (*ref, 1));
1777               if (tem)
1778                 *ref = tem;
1779             }
1780         }
1781     }
1782
1783   civ = alloc_iv (base, step);
1784   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1785   return;
1786
1787 fail:
1788   for_each_index (op_p, idx_record_use, data);
1789 }
1790
1791 /* Finds and records invariants used in STMT.  */
1792
1793 static void
1794 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1795 {
1796   ssa_op_iter iter;
1797   use_operand_p use_p;
1798   tree op;
1799
1800   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1801     {
1802       op = USE_FROM_PTR (use_p);
1803       record_invariant (data, op, false);
1804     }
1805 }
1806
1807 /* Finds interesting uses of induction variables in the statement STMT.  */
1808
1809 static void
1810 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1811 {
1812   struct iv *iv;
1813   tree op, *lhs, *rhs;
1814   ssa_op_iter iter;
1815   use_operand_p use_p;
1816   enum tree_code code;
1817
1818   find_invariants_stmt (data, stmt);
1819
1820   if (gimple_code (stmt) == GIMPLE_COND)
1821     {
1822       find_interesting_uses_cond (data, stmt);
1823       return;
1824     }
1825
1826   if (is_gimple_assign (stmt))
1827     {
1828       lhs = gimple_assign_lhs_ptr (stmt);
1829       rhs = gimple_assign_rhs1_ptr (stmt);
1830
1831       if (TREE_CODE (*lhs) == SSA_NAME)
1832         {
1833           /* If the statement defines an induction variable, the uses are not
1834              interesting by themselves.  */
1835
1836           iv = get_iv (data, *lhs);
1837
1838           if (iv && !integer_zerop (iv->step))
1839             return;
1840         }
1841
1842       code = gimple_assign_rhs_code (stmt);
1843       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1844           && (REFERENCE_CLASS_P (*rhs)
1845               || is_gimple_val (*rhs)))
1846         {
1847           if (REFERENCE_CLASS_P (*rhs))
1848             find_interesting_uses_address (data, stmt, rhs);
1849           else
1850             find_interesting_uses_op (data, *rhs);
1851
1852           if (REFERENCE_CLASS_P (*lhs))
1853             find_interesting_uses_address (data, stmt, lhs);
1854           return;
1855         }
1856       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1857         {
1858           find_interesting_uses_cond (data, stmt);
1859           return;
1860         }
1861
1862       /* TODO -- we should also handle address uses of type
1863
1864          memory = call (whatever);
1865
1866          and
1867
1868          call (memory).  */
1869     }
1870
1871   if (gimple_code (stmt) == GIMPLE_PHI
1872       && gimple_bb (stmt) == data->current_loop->header)
1873     {
1874       iv = get_iv (data, PHI_RESULT (stmt));
1875
1876       if (iv && !integer_zerop (iv->step))
1877         return;
1878     }
1879
1880   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1881     {
1882       op = USE_FROM_PTR (use_p);
1883
1884       if (TREE_CODE (op) != SSA_NAME)
1885         continue;
1886
1887       iv = get_iv (data, op);
1888       if (!iv)
1889         continue;
1890
1891       find_interesting_uses_op (data, op);
1892     }
1893 }
1894
1895 /* Finds interesting uses of induction variables outside of loops
1896    on loop exit edge EXIT.  */
1897
1898 static void
1899 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1900 {
1901   gimple phi;
1902   gimple_stmt_iterator psi;
1903   tree def;
1904
1905   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1906     {
1907       phi = gsi_stmt (psi);
1908       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1909       if (is_gimple_reg (def))
1910         find_interesting_uses_op (data, def);
1911     }
1912 }
1913
1914 /* Finds uses of the induction variables that are interesting.  */
1915
1916 static void
1917 find_interesting_uses (struct ivopts_data *data)
1918 {
1919   basic_block bb;
1920   gimple_stmt_iterator bsi;
1921   basic_block *body = get_loop_body (data->current_loop);
1922   unsigned i;
1923   struct version_info *info;
1924   edge e;
1925
1926   if (dump_file && (dump_flags & TDF_DETAILS))
1927     fprintf (dump_file, "Uses:\n\n");
1928
1929   for (i = 0; i < data->current_loop->num_nodes; i++)
1930     {
1931       edge_iterator ei;
1932       bb = body[i];
1933
1934       FOR_EACH_EDGE (e, ei, bb->succs)
1935         if (e->dest != EXIT_BLOCK_PTR
1936             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1937           find_interesting_uses_outside (data, e);
1938
1939       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1940         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1941       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1942         if (!is_gimple_debug (gsi_stmt (bsi)))
1943           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1944     }
1945
1946   if (dump_file && (dump_flags & TDF_DETAILS))
1947     {
1948       bitmap_iterator bi;
1949
1950       fprintf (dump_file, "\n");
1951
1952       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1953         {
1954           info = ver_info (data, i);
1955           if (info->inv_id)
1956             {
1957               fprintf (dump_file, "  ");
1958               print_generic_expr (dump_file, info->name, TDF_SLIM);
1959               fprintf (dump_file, " is invariant (%d)%s\n",
1960                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1961             }
1962         }
1963
1964       fprintf (dump_file, "\n");
1965     }
1966
1967   free (body);
1968 }
1969
1970 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1971    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1972    we are at the top-level of the processed address.  */
1973
1974 static tree
1975 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1976                 unsigned HOST_WIDE_INT *offset)
1977 {
1978   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1979   enum tree_code code;
1980   tree type, orig_type = TREE_TYPE (expr);
1981   unsigned HOST_WIDE_INT off0, off1, st;
1982   tree orig_expr = expr;
1983
1984   STRIP_NOPS (expr);
1985
1986   type = TREE_TYPE (expr);
1987   code = TREE_CODE (expr);
1988   *offset = 0;
1989
1990   switch (code)
1991     {
1992     case INTEGER_CST:
1993       if (!cst_and_fits_in_hwi (expr)
1994           || integer_zerop (expr))
1995         return orig_expr;
1996
1997       *offset = int_cst_value (expr);
1998       return build_int_cst (orig_type, 0);
1999
2000     case POINTER_PLUS_EXPR:
2001     case PLUS_EXPR:
2002     case MINUS_EXPR:
2003       op0 = TREE_OPERAND (expr, 0);
2004       op1 = TREE_OPERAND (expr, 1);
2005
2006       op0 = strip_offset_1 (op0, false, false, &off0);
2007       op1 = strip_offset_1 (op1, false, false, &off1);
2008
2009       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2010       if (op0 == TREE_OPERAND (expr, 0)
2011           && op1 == TREE_OPERAND (expr, 1))
2012         return orig_expr;
2013
2014       if (integer_zerop (op1))
2015         expr = op0;
2016       else if (integer_zerop (op0))
2017         {
2018           if (code == MINUS_EXPR)
2019             expr = fold_build1 (NEGATE_EXPR, type, op1);
2020           else
2021             expr = op1;
2022         }
2023       else
2024         expr = fold_build2 (code, type, op0, op1);
2025
2026       return fold_convert (orig_type, expr);
2027
2028     case MULT_EXPR:
2029       op1 = TREE_OPERAND (expr, 1);
2030       if (!cst_and_fits_in_hwi (op1))
2031         return orig_expr;
2032
2033       op0 = TREE_OPERAND (expr, 0);
2034       op0 = strip_offset_1 (op0, false, false, &off0);
2035       if (op0 == TREE_OPERAND (expr, 0))
2036         return orig_expr;
2037
2038       *offset = off0 * int_cst_value (op1);
2039       if (integer_zerop (op0))
2040         expr = op0;
2041       else
2042         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2043
2044       return fold_convert (orig_type, expr);
2045
2046     case ARRAY_REF:
2047     case ARRAY_RANGE_REF:
2048       if (!inside_addr)
2049         return orig_expr;
2050
2051       step = array_ref_element_size (expr);
2052       if (!cst_and_fits_in_hwi (step))
2053         break;
2054
2055       st = int_cst_value (step);
2056       op1 = TREE_OPERAND (expr, 1);
2057       op1 = strip_offset_1 (op1, false, false, &off1);
2058       *offset = off1 * st;
2059
2060       if (top_compref
2061           && integer_zerop (op1))
2062         {
2063           /* Strip the component reference completely.  */
2064           op0 = TREE_OPERAND (expr, 0);
2065           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2066           *offset += off0;
2067           return op0;
2068         }
2069       break;
2070
2071     case COMPONENT_REF:
2072       if (!inside_addr)
2073         return orig_expr;
2074
2075       tmp = component_ref_field_offset (expr);
2076       if (top_compref
2077           && cst_and_fits_in_hwi (tmp))
2078         {
2079           /* Strip the component reference completely.  */
2080           op0 = TREE_OPERAND (expr, 0);
2081           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2082           *offset = off0 + int_cst_value (tmp);
2083           return op0;
2084         }
2085       break;
2086
2087     case ADDR_EXPR:
2088       op0 = TREE_OPERAND (expr, 0);
2089       op0 = strip_offset_1 (op0, true, true, &off0);
2090       *offset += off0;
2091
2092       if (op0 == TREE_OPERAND (expr, 0))
2093         return orig_expr;
2094
2095       expr = build_fold_addr_expr (op0);
2096       return fold_convert (orig_type, expr);
2097
2098     case MEM_REF:
2099       /* ???  Offset operand?  */
2100       inside_addr = false;
2101       break;
2102
2103     default:
2104       return orig_expr;
2105     }
2106
2107   /* Default handling of expressions for that we want to recurse into
2108      the first operand.  */
2109   op0 = TREE_OPERAND (expr, 0);
2110   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2111   *offset += off0;
2112
2113   if (op0 == TREE_OPERAND (expr, 0)
2114       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2115     return orig_expr;
2116
2117   expr = copy_node (expr);
2118   TREE_OPERAND (expr, 0) = op0;
2119   if (op1)
2120     TREE_OPERAND (expr, 1) = op1;
2121
2122   /* Inside address, we might strip the top level component references,
2123      thus changing type of the expression.  Handling of ADDR_EXPR
2124      will fix that.  */
2125   expr = fold_convert (orig_type, expr);
2126
2127   return expr;
2128 }
2129
2130 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2131
2132 static tree
2133 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2134 {
2135   return strip_offset_1 (expr, false, false, offset);
2136 }
2137
2138 /* Returns variant of TYPE that can be used as base for different uses.
2139    We return unsigned type with the same precision, which avoids problems
2140    with overflows.  */
2141
2142 static tree
2143 generic_type_for (tree type)
2144 {
2145   if (POINTER_TYPE_P (type))
2146     return unsigned_type_for (type);
2147
2148   if (TYPE_UNSIGNED (type))
2149     return type;
2150
2151   return unsigned_type_for (type);
2152 }
2153
2154 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2155    the bitmap to that we should store it.  */
2156
2157 static struct ivopts_data *fd_ivopts_data;
2158 static tree
2159 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2160 {
2161   bitmap *depends_on = (bitmap *) data;
2162   struct version_info *info;
2163
2164   if (TREE_CODE (*expr_p) != SSA_NAME)
2165     return NULL_TREE;
2166   info = name_info (fd_ivopts_data, *expr_p);
2167
2168   if (!info->inv_id || info->has_nonlin_use)
2169     return NULL_TREE;
2170
2171   if (!*depends_on)
2172     *depends_on = BITMAP_ALLOC (NULL);
2173   bitmap_set_bit (*depends_on, info->inv_id);
2174
2175   return NULL_TREE;
2176 }
2177
2178 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2179    position to POS.  If USE is not NULL, the candidate is set as related to
2180    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2181    replacement of the final value of the iv by a direct computation.  */
2182
2183 static struct iv_cand *
2184 add_candidate_1 (struct ivopts_data *data,
2185                  tree base, tree step, bool important, enum iv_position pos,
2186                  struct iv_use *use, gimple incremented_at)
2187 {
2188   unsigned i;
2189   struct iv_cand *cand = NULL;
2190   tree type, orig_type;
2191
2192   if (base)
2193     {
2194       orig_type = TREE_TYPE (base);
2195       type = generic_type_for (orig_type);
2196       if (type != orig_type)
2197         {
2198           base = fold_convert (type, base);
2199           step = fold_convert (type, step);
2200         }
2201     }
2202
2203   for (i = 0; i < n_iv_cands (data); i++)
2204     {
2205       cand = iv_cand (data, i);
2206
2207       if (cand->pos != pos)
2208         continue;
2209
2210       if (cand->incremented_at != incremented_at
2211           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2212               && cand->ainc_use != use))
2213         continue;
2214
2215       if (!cand->iv)
2216         {
2217           if (!base && !step)
2218             break;
2219
2220           continue;
2221         }
2222
2223       if (!base && !step)
2224         continue;
2225
2226       if (operand_equal_p (base, cand->iv->base, 0)
2227           && operand_equal_p (step, cand->iv->step, 0)
2228           && (TYPE_PRECISION (TREE_TYPE (base))
2229               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2230         break;
2231     }
2232
2233   if (i == n_iv_cands (data))
2234     {
2235       cand = XCNEW (struct iv_cand);
2236       cand->id = i;
2237
2238       if (!base && !step)
2239         cand->iv = NULL;
2240       else
2241         cand->iv = alloc_iv (base, step);
2242
2243       cand->pos = pos;
2244       if (pos != IP_ORIGINAL && cand->iv)
2245         {
2246           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2247           cand->var_after = cand->var_before;
2248         }
2249       cand->important = important;
2250       cand->incremented_at = incremented_at;
2251       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2252
2253       if (step
2254           && TREE_CODE (step) != INTEGER_CST)
2255         {
2256           fd_ivopts_data = data;
2257           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2258         }
2259
2260       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2261         cand->ainc_use = use;
2262       else
2263         cand->ainc_use = NULL;
2264
2265       if (dump_file && (dump_flags & TDF_DETAILS))
2266         dump_cand (dump_file, cand);
2267     }
2268
2269   if (important && !cand->important)
2270     {
2271       cand->important = true;
2272       if (dump_file && (dump_flags & TDF_DETAILS))
2273         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2274     }
2275
2276   if (use)
2277     {
2278       bitmap_set_bit (use->related_cands, i);
2279       if (dump_file && (dump_flags & TDF_DETAILS))
2280         fprintf (dump_file, "Candidate %d is related to use %d\n",
2281                  cand->id, use->id);
2282     }
2283
2284   return cand;
2285 }
2286
2287 /* Returns true if incrementing the induction variable at the end of the LOOP
2288    is allowed.
2289
2290    The purpose is to avoid splitting latch edge with a biv increment, thus
2291    creating a jump, possibly confusing other optimization passes and leaving
2292    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2293    is not available (so we do not have a better alternative), or if the latch
2294    edge is already nonempty.  */
2295
2296 static bool
2297 allow_ip_end_pos_p (struct loop *loop)
2298 {
2299   if (!ip_normal_pos (loop))
2300     return true;
2301
2302   if (!empty_block_p (ip_end_pos (loop)))
2303     return true;
2304
2305   return false;
2306 }
2307
2308 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2309    Important field is set to IMPORTANT.  */
2310
2311 static void
2312 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2313                         bool important, struct iv_use *use)
2314 {
2315   basic_block use_bb = gimple_bb (use->stmt);
2316   enum machine_mode mem_mode;
2317   unsigned HOST_WIDE_INT cstepi;
2318
2319   /* If we insert the increment in any position other than the standard
2320      ones, we must ensure that it is incremented once per iteration.
2321      It must not be in an inner nested loop, or one side of an if
2322      statement.  */
2323   if (use_bb->loop_father != data->current_loop
2324       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2325       || stmt_could_throw_p (use->stmt)
2326       || !cst_and_fits_in_hwi (step))
2327     return;
2328
2329   cstepi = int_cst_value (step);
2330
2331   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2332   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2333       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2334     {
2335       enum tree_code code = MINUS_EXPR;
2336       tree new_base;
2337       tree new_step = step;
2338
2339       if (POINTER_TYPE_P (TREE_TYPE (base)))
2340         {
2341           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2342           code = POINTER_PLUS_EXPR;
2343         }
2344       else
2345         new_step = fold_convert (TREE_TYPE (base), new_step);
2346       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2347       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2348                        use->stmt);
2349     }
2350   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2351       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2352     {
2353       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2354                        use->stmt);
2355     }
2356 }
2357
2358 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2359    position to POS.  If USE is not NULL, the candidate is set as related to
2360    it.  The candidate computation is scheduled on all available positions.  */
2361
2362 static void
2363 add_candidate (struct ivopts_data *data,
2364                tree base, tree step, bool important, struct iv_use *use)
2365 {
2366   if (ip_normal_pos (data->current_loop))
2367     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2368   if (ip_end_pos (data->current_loop)
2369       && allow_ip_end_pos_p (data->current_loop))
2370     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2371
2372   if (use != NULL && use->type == USE_ADDRESS)
2373     add_autoinc_candidates (data, base, step, important, use);
2374 }
2375
2376 /* Add a standard "0 + 1 * iteration" iv candidate for a
2377    type with SIZE bits.  */
2378
2379 static void
2380 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2381                                      unsigned int size)
2382 {
2383   tree type = lang_hooks.types.type_for_size (size, true);
2384   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2385                  true, NULL);
2386 }
2387
2388 /* Adds standard iv candidates.  */
2389
2390 static void
2391 add_standard_iv_candidates (struct ivopts_data *data)
2392 {
2393   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2394
2395   /* The same for a double-integer type if it is still fast enough.  */
2396   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2397     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2398 }
2399
2400
2401 /* Adds candidates bases on the old induction variable IV.  */
2402
2403 static void
2404 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2405 {
2406   gimple phi;
2407   tree def;
2408   struct iv_cand *cand;
2409
2410   add_candidate (data, iv->base, iv->step, true, NULL);
2411
2412   /* The same, but with initial value zero.  */
2413   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2414     add_candidate (data, size_int (0), iv->step, true, NULL);
2415   else
2416     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2417                    iv->step, true, NULL);
2418
2419   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2420   if (gimple_code (phi) == GIMPLE_PHI)
2421     {
2422       /* Additionally record the possibility of leaving the original iv
2423          untouched.  */
2424       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2425       cand = add_candidate_1 (data,
2426                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2427                               SSA_NAME_DEF_STMT (def));
2428       cand->var_before = iv->ssa_name;
2429       cand->var_after = def;
2430     }
2431 }
2432
2433 /* Adds candidates based on the old induction variables.  */
2434
2435 static void
2436 add_old_ivs_candidates (struct ivopts_data *data)
2437 {
2438   unsigned i;
2439   struct iv *iv;
2440   bitmap_iterator bi;
2441
2442   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2443     {
2444       iv = ver_info (data, i)->iv;
2445       if (iv && iv->biv_p && !integer_zerop (iv->step))
2446         add_old_iv_candidates (data, iv);
2447     }
2448 }
2449
2450 /* Adds candidates based on the value of the induction variable IV and USE.  */
2451
2452 static void
2453 add_iv_value_candidates (struct ivopts_data *data,
2454                          struct iv *iv, struct iv_use *use)
2455 {
2456   unsigned HOST_WIDE_INT offset;
2457   tree base;
2458   tree basetype;
2459
2460   add_candidate (data, iv->base, iv->step, false, use);
2461
2462   /* The same, but with initial value zero.  Make such variable important,
2463      since it is generic enough so that possibly many uses may be based
2464      on it.  */
2465   basetype = TREE_TYPE (iv->base);
2466   if (POINTER_TYPE_P (basetype))
2467     basetype = sizetype;
2468   add_candidate (data, build_int_cst (basetype, 0),
2469                  iv->step, true, use);
2470
2471   /* Third, try removing the constant offset.  Make sure to even
2472      add a candidate for &a[0] vs. (T *)&a.  */
2473   base = strip_offset (iv->base, &offset);
2474   if (offset
2475       || base != iv->base)
2476     add_candidate (data, base, iv->step, false, use);
2477 }
2478
2479 /* Adds candidates based on the uses.  */
2480
2481 static void
2482 add_derived_ivs_candidates (struct ivopts_data *data)
2483 {
2484   unsigned i;
2485
2486   for (i = 0; i < n_iv_uses (data); i++)
2487     {
2488       struct iv_use *use = iv_use (data, i);
2489
2490       if (!use)
2491         continue;
2492
2493       switch (use->type)
2494         {
2495         case USE_NONLINEAR_EXPR:
2496         case USE_COMPARE:
2497         case USE_ADDRESS:
2498           /* Just add the ivs based on the value of the iv used here.  */
2499           add_iv_value_candidates (data, use->iv, use);
2500           break;
2501
2502         default:
2503           gcc_unreachable ();
2504         }
2505     }
2506 }
2507
2508 /* Record important candidates and add them to related_cands bitmaps
2509    if needed.  */
2510
2511 static void
2512 record_important_candidates (struct ivopts_data *data)
2513 {
2514   unsigned i;
2515   struct iv_use *use;
2516
2517   for (i = 0; i < n_iv_cands (data); i++)
2518     {
2519       struct iv_cand *cand = iv_cand (data, i);
2520
2521       if (cand->important)
2522         bitmap_set_bit (data->important_candidates, i);
2523     }
2524
2525   data->consider_all_candidates = (n_iv_cands (data)
2526                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2527
2528   if (data->consider_all_candidates)
2529     {
2530       /* We will not need "related_cands" bitmaps in this case,
2531          so release them to decrease peak memory consumption.  */
2532       for (i = 0; i < n_iv_uses (data); i++)
2533         {
2534           use = iv_use (data, i);
2535           BITMAP_FREE (use->related_cands);
2536         }
2537     }
2538   else
2539     {
2540       /* Add important candidates to the related_cands bitmaps.  */
2541       for (i = 0; i < n_iv_uses (data); i++)
2542         bitmap_ior_into (iv_use (data, i)->related_cands,
2543                          data->important_candidates);
2544     }
2545 }
2546
2547 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2548    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2549    we allocate a simple list to every use.  */
2550
2551 static void
2552 alloc_use_cost_map (struct ivopts_data *data)
2553 {
2554   unsigned i, size, s, j;
2555
2556   for (i = 0; i < n_iv_uses (data); i++)
2557     {
2558       struct iv_use *use = iv_use (data, i);
2559       bitmap_iterator bi;
2560
2561       if (data->consider_all_candidates)
2562         size = n_iv_cands (data);
2563       else
2564         {
2565           s = 0;
2566           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2567             {
2568               s++;
2569             }
2570
2571           /* Round up to the power of two, so that moduling by it is fast.  */
2572           for (size = 1; size < s; size <<= 1)
2573             continue;
2574         }
2575
2576       use->n_map_members = size;
2577       use->cost_map = XCNEWVEC (struct cost_pair, size);
2578     }
2579 }
2580
2581 /* Returns description of computation cost of expression whose runtime
2582    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2583
2584 static comp_cost
2585 new_cost (unsigned runtime, unsigned complexity)
2586 {
2587   comp_cost cost;
2588
2589   cost.cost = runtime;
2590   cost.complexity = complexity;
2591
2592   return cost;
2593 }
2594
2595 /* Adds costs COST1 and COST2.  */
2596
2597 static comp_cost
2598 add_costs (comp_cost cost1, comp_cost cost2)
2599 {
2600   cost1.cost += cost2.cost;
2601   cost1.complexity += cost2.complexity;
2602
2603   return cost1;
2604 }
2605 /* Subtracts costs COST1 and COST2.  */
2606
2607 static comp_cost
2608 sub_costs (comp_cost cost1, comp_cost cost2)
2609 {
2610   cost1.cost -= cost2.cost;
2611   cost1.complexity -= cost2.complexity;
2612
2613   return cost1;
2614 }
2615
2616 /* Returns a negative number if COST1 < COST2, a positive number if
2617    COST1 > COST2, and 0 if COST1 = COST2.  */
2618
2619 static int
2620 compare_costs (comp_cost cost1, comp_cost cost2)
2621 {
2622   if (cost1.cost == cost2.cost)
2623     return cost1.complexity - cost2.complexity;
2624
2625   return cost1.cost - cost2.cost;
2626 }
2627
2628 /* Returns true if COST is infinite.  */
2629
2630 static bool
2631 infinite_cost_p (comp_cost cost)
2632 {
2633   return cost.cost == INFTY;
2634 }
2635
2636 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2637    on invariants DEPENDS_ON and that the value used in expressing it
2638    is VALUE.  */
2639
2640 static void
2641 set_use_iv_cost (struct ivopts_data *data,
2642                  struct iv_use *use, struct iv_cand *cand,
2643                  comp_cost cost, bitmap depends_on, tree value,
2644                  int inv_expr_id)
2645 {
2646   unsigned i, s;
2647
2648   if (infinite_cost_p (cost))
2649     {
2650       BITMAP_FREE (depends_on);
2651       return;
2652     }
2653
2654   if (data->consider_all_candidates)
2655     {
2656       use->cost_map[cand->id].cand = cand;
2657       use->cost_map[cand->id].cost = cost;
2658       use->cost_map[cand->id].depends_on = depends_on;
2659       use->cost_map[cand->id].value = value;
2660       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2661       return;
2662     }
2663
2664   /* n_map_members is a power of two, so this computes modulo.  */
2665   s = cand->id & (use->n_map_members - 1);
2666   for (i = s; i < use->n_map_members; i++)
2667     if (!use->cost_map[i].cand)
2668       goto found;
2669   for (i = 0; i < s; i++)
2670     if (!use->cost_map[i].cand)
2671       goto found;
2672
2673   gcc_unreachable ();
2674
2675 found:
2676   use->cost_map[i].cand = cand;
2677   use->cost_map[i].cost = cost;
2678   use->cost_map[i].depends_on = depends_on;
2679   use->cost_map[i].value = value;
2680   use->cost_map[i].inv_expr_id = inv_expr_id;
2681 }
2682
2683 /* Gets cost of (USE, CANDIDATE) pair.  */
2684
2685 static struct cost_pair *
2686 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2687                  struct iv_cand *cand)
2688 {
2689   unsigned i, s;
2690   struct cost_pair *ret;
2691
2692   if (!cand)
2693     return NULL;
2694
2695   if (data->consider_all_candidates)
2696     {
2697       ret = use->cost_map + cand->id;
2698       if (!ret->cand)
2699         return NULL;
2700
2701       return ret;
2702     }
2703
2704   /* n_map_members is a power of two, so this computes modulo.  */
2705   s = cand->id & (use->n_map_members - 1);
2706   for (i = s; i < use->n_map_members; i++)
2707     if (use->cost_map[i].cand == cand)
2708       return use->cost_map + i;
2709
2710   for (i = 0; i < s; i++)
2711     if (use->cost_map[i].cand == cand)
2712       return use->cost_map + i;
2713
2714   return NULL;
2715 }
2716
2717 /* Returns estimate on cost of computing SEQ.  */
2718
2719 static unsigned
2720 seq_cost (rtx seq, bool speed)
2721 {
2722   unsigned cost = 0;
2723   rtx set;
2724
2725   for (; seq; seq = NEXT_INSN (seq))
2726     {
2727       set = single_set (seq);
2728       if (set)
2729         cost += rtx_cost (set, SET,speed);
2730       else
2731         cost++;
2732     }
2733
2734   return cost;
2735 }
2736
2737 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2738 static rtx
2739 produce_memory_decl_rtl (tree obj, int *regno)
2740 {
2741   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2742   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2743   rtx x;
2744
2745   gcc_assert (obj);
2746   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2747     {
2748       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2749       x = gen_rtx_SYMBOL_REF (address_mode, name);
2750       SET_SYMBOL_REF_DECL (x, obj);
2751       x = gen_rtx_MEM (DECL_MODE (obj), x);
2752       set_mem_addr_space (x, as);
2753       targetm.encode_section_info (obj, x, true);
2754     }
2755   else
2756     {
2757       x = gen_raw_REG (address_mode, (*regno)++);
2758       x = gen_rtx_MEM (DECL_MODE (obj), x);
2759       set_mem_addr_space (x, as);
2760     }
2761
2762   return x;
2763 }
2764
2765 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2766    walk_tree.  DATA contains the actual fake register number.  */
2767
2768 static tree
2769 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2770 {
2771   tree obj = NULL_TREE;
2772   rtx x = NULL_RTX;
2773   int *regno = (int *) data;
2774
2775   switch (TREE_CODE (*expr_p))
2776     {
2777     case ADDR_EXPR:
2778       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2779            handled_component_p (*expr_p);
2780            expr_p = &TREE_OPERAND (*expr_p, 0))
2781         continue;
2782       obj = *expr_p;
2783       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2784         x = produce_memory_decl_rtl (obj, regno);
2785       break;
2786
2787     case SSA_NAME:
2788       *ws = 0;
2789       obj = SSA_NAME_VAR (*expr_p);
2790       if (!DECL_RTL_SET_P (obj))
2791         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2792       break;
2793
2794     case VAR_DECL:
2795     case PARM_DECL:
2796     case RESULT_DECL:
2797       *ws = 0;
2798       obj = *expr_p;
2799
2800       if (DECL_RTL_SET_P (obj))
2801         break;
2802
2803       if (DECL_MODE (obj) == BLKmode)
2804         x = produce_memory_decl_rtl (obj, regno);
2805       else
2806         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2807
2808       break;
2809
2810     default:
2811       break;
2812     }
2813
2814   if (x)
2815     {
2816       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2817       SET_DECL_RTL (obj, x);
2818     }
2819
2820   return NULL_TREE;
2821 }
2822
2823 /* Determines cost of the computation of EXPR.  */
2824
2825 static unsigned
2826 computation_cost (tree expr, bool speed)
2827 {
2828   rtx seq, rslt;
2829   tree type = TREE_TYPE (expr);
2830   unsigned cost;
2831   /* Avoid using hard regs in ways which may be unsupported.  */
2832   int regno = LAST_VIRTUAL_REGISTER + 1;
2833   struct cgraph_node *node = cgraph_node (current_function_decl);
2834   enum node_frequency real_frequency = node->frequency;
2835
2836   node->frequency = NODE_FREQUENCY_NORMAL;
2837   crtl->maybe_hot_insn_p = speed;
2838   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2839   start_sequence ();
2840   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2841   seq = get_insns ();
2842   end_sequence ();
2843   default_rtl_profile ();
2844   node->frequency = real_frequency;
2845
2846   cost = seq_cost (seq, speed);
2847   if (MEM_P (rslt))
2848     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2849                           TYPE_ADDR_SPACE (type), speed);
2850
2851   return cost;
2852 }
2853
2854 /* Returns variable containing the value of candidate CAND at statement AT.  */
2855
2856 static tree
2857 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2858 {
2859   if (stmt_after_increment (loop, cand, stmt))
2860     return cand->var_after;
2861   else
2862     return cand->var_before;
2863 }
2864
2865 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2866    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2867
2868 int
2869 tree_int_cst_sign_bit (const_tree t)
2870 {
2871   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2872   unsigned HOST_WIDE_INT w;
2873
2874   if (bitno < HOST_BITS_PER_WIDE_INT)
2875     w = TREE_INT_CST_LOW (t);
2876   else
2877     {
2878       w = TREE_INT_CST_HIGH (t);
2879       bitno -= HOST_BITS_PER_WIDE_INT;
2880     }
2881
2882   return (w >> bitno) & 1;
2883 }
2884
2885 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2886    same precision that is at least as wide as the precision of TYPE, stores
2887    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2888    type of A and B.  */
2889
2890 static tree
2891 determine_common_wider_type (tree *a, tree *b)
2892 {
2893   tree wider_type = NULL;
2894   tree suba, subb;
2895   tree atype = TREE_TYPE (*a);
2896
2897   if (CONVERT_EXPR_P (*a))
2898     {
2899       suba = TREE_OPERAND (*a, 0);
2900       wider_type = TREE_TYPE (suba);
2901       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2902         return atype;
2903     }
2904   else
2905     return atype;
2906
2907   if (CONVERT_EXPR_P (*b))
2908     {
2909       subb = TREE_OPERAND (*b, 0);
2910       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2911         return atype;
2912     }
2913   else
2914     return atype;
2915
2916   *a = suba;
2917   *b = subb;
2918   return wider_type;
2919 }
2920
2921 /* Determines the expression by that USE is expressed from induction variable
2922    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2923    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2924
2925 static bool
2926 get_computation_aff (struct loop *loop,
2927                      struct iv_use *use, struct iv_cand *cand, gimple at,
2928                      struct affine_tree_combination *aff)
2929 {
2930   tree ubase = use->iv->base;
2931   tree ustep = use->iv->step;
2932   tree cbase = cand->iv->base;
2933   tree cstep = cand->iv->step, cstep_common;
2934   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2935   tree common_type, var;
2936   tree uutype;
2937   aff_tree cbase_aff, var_aff;
2938   double_int rat;
2939
2940   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2941     {
2942       /* We do not have a precision to express the values of use.  */
2943       return false;
2944     }
2945
2946   var = var_at_stmt (loop, cand, at);
2947   uutype = unsigned_type_for (utype);
2948
2949   /* If the conversion is not noop, perform it.  */
2950   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2951     {
2952       cstep = fold_convert (uutype, cstep);
2953       cbase = fold_convert (uutype, cbase);
2954       var = fold_convert (uutype, var);
2955     }
2956
2957   if (!constant_multiple_of (ustep, cstep, &rat))
2958     return false;
2959
2960   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2961      type, we achieve better folding by computing their difference in this
2962      wider type, and cast the result to UUTYPE.  We do not need to worry about
2963      overflows, as all the arithmetics will in the end be performed in UUTYPE
2964      anyway.  */
2965   common_type = determine_common_wider_type (&ubase, &cbase);
2966
2967   /* use = ubase - ratio * cbase + ratio * var.  */
2968   tree_to_aff_combination (ubase, common_type, aff);
2969   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2970   tree_to_aff_combination (var, uutype, &var_aff);
2971
2972   /* We need to shift the value if we are after the increment.  */
2973   if (stmt_after_increment (loop, cand, at))
2974     {
2975       aff_tree cstep_aff;
2976
2977       if (common_type != uutype)
2978         cstep_common = fold_convert (common_type, cstep);
2979       else
2980         cstep_common = cstep;
2981
2982       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2983       aff_combination_add (&cbase_aff, &cstep_aff);
2984     }
2985
2986   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2987   aff_combination_add (aff, &cbase_aff);
2988   if (common_type != uutype)
2989     aff_combination_convert (aff, uutype);
2990
2991   aff_combination_scale (&var_aff, rat);
2992   aff_combination_add (aff, &var_aff);
2993
2994   return true;
2995 }
2996
2997 /* Determines the expression by that USE is expressed from induction variable
2998    CAND at statement AT in LOOP.  The computation is unshared.  */
2999
3000 static tree
3001 get_computation_at (struct loop *loop,
3002                     struct iv_use *use, struct iv_cand *cand, gimple at)
3003 {
3004   aff_tree aff;
3005   tree type = TREE_TYPE (use->iv->base);
3006
3007   if (!get_computation_aff (loop, use, cand, at, &aff))
3008     return NULL_TREE;
3009   unshare_aff_combination (&aff);
3010   return fold_convert (type, aff_combination_to_tree (&aff));
3011 }
3012
3013 /* Determines the expression by that USE is expressed from induction variable
3014    CAND in LOOP.  The computation is unshared.  */
3015
3016 static tree
3017 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3018 {
3019   return get_computation_at (loop, use, cand, use->stmt);
3020 }
3021
3022 /* Adjust the cost COST for being in loop setup rather than loop body.
3023    If we're optimizing for space, the loop setup overhead is constant;
3024    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3025 static unsigned
3026 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3027 {
3028   if (cost == INFTY)
3029     return cost;
3030   else if (optimize_loop_for_speed_p (data->current_loop))
3031     return cost / avg_loop_niter (data->current_loop);
3032   else
3033     return cost;
3034 }
3035
3036 /* Returns cost of addition in MODE.  */
3037
3038 static unsigned
3039 add_cost (enum machine_mode mode, bool speed)
3040 {
3041   static unsigned costs[NUM_MACHINE_MODES];
3042   rtx seq;
3043   unsigned cost;
3044
3045   if (costs[mode])
3046     return costs[mode];
3047
3048   start_sequence ();
3049   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3050                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3051                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3052                  NULL_RTX);
3053   seq = get_insns ();
3054   end_sequence ();
3055
3056   cost = seq_cost (seq, speed);
3057   if (!cost)
3058     cost = 1;
3059
3060   costs[mode] = cost;
3061
3062   if (dump_file && (dump_flags & TDF_DETAILS))
3063     fprintf (dump_file, "Addition in %s costs %d\n",
3064              GET_MODE_NAME (mode), cost);
3065   return cost;
3066 }
3067
3068 /* Entry in a hashtable of already known costs for multiplication.  */
3069 struct mbc_entry
3070 {
3071   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3072   enum machine_mode mode;       /* In mode.  */
3073   unsigned cost;                /* The cost.  */
3074 };
3075
3076 /* Counts hash value for the ENTRY.  */
3077
3078 static hashval_t
3079 mbc_entry_hash (const void *entry)
3080 {
3081   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3082
3083   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3084 }
3085
3086 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3087
3088 static int
3089 mbc_entry_eq (const void *entry1, const void *entry2)
3090 {
3091   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3092   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3093
3094   return (e1->mode == e2->mode
3095           && e1->cst == e2->cst);
3096 }
3097
3098 /* Returns cost of multiplication by constant CST in MODE.  */
3099
3100 unsigned
3101 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3102 {
3103   static htab_t costs;
3104   struct mbc_entry **cached, act;
3105   rtx seq;
3106   unsigned cost;
3107
3108   if (!costs)
3109     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3110
3111   act.mode = mode;
3112   act.cst = cst;
3113   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3114   if (*cached)
3115     return (*cached)->cost;
3116
3117   *cached = XNEW (struct mbc_entry);
3118   (*cached)->mode = mode;
3119   (*cached)->cst = cst;
3120
3121   start_sequence ();
3122   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3123                gen_int_mode (cst, mode), NULL_RTX, 0);
3124   seq = get_insns ();
3125   end_sequence ();
3126
3127   cost = seq_cost (seq, speed);
3128
3129   if (dump_file && (dump_flags & TDF_DETAILS))
3130     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3131              (int) cst, GET_MODE_NAME (mode), cost);
3132
3133   (*cached)->cost = cost;
3134
3135   return cost;
3136 }
3137
3138 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3139    validity for a memory reference accessing memory of mode MODE in
3140    address space AS.  */
3141
3142 DEF_VEC_P (sbitmap);
3143 DEF_VEC_ALLOC_P (sbitmap, heap);
3144
3145 bool
3146 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3147                                  addr_space_t as)
3148 {
3149 #define MAX_RATIO 128
3150   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3151   static VEC (sbitmap, heap) *valid_mult_list;
3152   sbitmap valid_mult;
3153
3154   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3155     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3156
3157   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3158   if (!valid_mult)
3159     {
3160       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3161       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3162       rtx addr;
3163       HOST_WIDE_INT i;
3164
3165       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3166       sbitmap_zero (valid_mult);
3167       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3168       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3169         {
3170           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3171           if (memory_address_addr_space_p (mode, addr, as))
3172             SET_BIT (valid_mult, i + MAX_RATIO);
3173         }
3174
3175       if (dump_file && (dump_flags & TDF_DETAILS))
3176         {
3177           fprintf (dump_file, "  allowed multipliers:");
3178           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3179             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3180               fprintf (dump_file, " %d", (int) i);
3181           fprintf (dump_file, "\n");
3182           fprintf (dump_file, "\n");
3183         }
3184
3185       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3186     }
3187
3188   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3189     return false;
3190
3191   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3192 }
3193
3194 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3195    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3196    variable is omitted.  Compute the cost for a memory reference that accesses
3197    a memory location of mode MEM_MODE in address space AS.
3198
3199    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3200    size of MEM_MODE / RATIO) is available.  To make this determination, we
3201    look at the size of the increment to be made, which is given in CSTEP.
3202    CSTEP may be zero if the step is unknown.
3203    STMT_AFTER_INC is true iff the statement we're looking at is after the
3204    increment of the original biv.
3205
3206    TODO -- there must be some better way.  This all is quite crude.  */
3207
3208 typedef struct
3209 {
3210   HOST_WIDE_INT min_offset, max_offset;
3211   unsigned costs[2][2][2][2];
3212 } *address_cost_data;
3213
3214 DEF_VEC_P (address_cost_data);
3215 DEF_VEC_ALLOC_P (address_cost_data, heap);
3216
3217 static comp_cost
3218 get_address_cost (bool symbol_present, bool var_present,
3219                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3220                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3221                   addr_space_t as, bool speed,
3222                   bool stmt_after_inc, bool *may_autoinc)
3223 {
3224   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3225   static VEC(address_cost_data, heap) *address_cost_data_list;
3226   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3227   address_cost_data data;
3228   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3229   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3230   unsigned cost, acost, complexity;
3231   bool offset_p, ratio_p, autoinc;
3232   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3233   unsigned HOST_WIDE_INT mask;
3234   unsigned bits;
3235
3236   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3237     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3238                            data_index + 1);
3239
3240   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3241   if (!data)
3242     {
3243       HOST_WIDE_INT i;
3244       HOST_WIDE_INT rat, off = 0;
3245       int old_cse_not_expected, width;
3246       unsigned sym_p, var_p, off_p, rat_p, add_c;
3247       rtx seq, addr, base;
3248       rtx reg0, reg1;
3249
3250       data = (address_cost_data) xcalloc (1, sizeof (*data));
3251
3252       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3253
3254       width = GET_MODE_BITSIZE (address_mode) - 1;
3255       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3256         width = HOST_BITS_PER_WIDE_INT - 1;
3257       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3258
3259       for (i = width; i >= 0; i--)
3260         {
3261           off = -((HOST_WIDE_INT) 1 << i);
3262           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3263           if (memory_address_addr_space_p (mem_mode, addr, as))
3264             break;
3265         }
3266       data->min_offset = (i == -1? 0 : off);
3267
3268       for (i = width; i >= 0; i--)
3269         {
3270           off = ((HOST_WIDE_INT) 1 << i) - 1;
3271           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3272           if (memory_address_addr_space_p (mem_mode, addr, as))
3273             break;
3274         }
3275       if (i == -1)
3276         off = 0;
3277       data->max_offset = off;
3278
3279       if (dump_file && (dump_flags & TDF_DETAILS))
3280         {
3281           fprintf (dump_file, "get_address_cost:\n");
3282           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3283                    GET_MODE_NAME (mem_mode),
3284                    data->min_offset);
3285           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3286                    GET_MODE_NAME (mem_mode),
3287                    data->max_offset);
3288         }
3289
3290       rat = 1;
3291       for (i = 2; i <= MAX_RATIO; i++)
3292         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3293           {
3294             rat = i;
3295             break;
3296           }
3297
3298       /* Compute the cost of various addressing modes.  */
3299       acost = 0;
3300       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3301       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3302
3303       if (HAVE_PRE_DECREMENT)
3304         {
3305           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3306           has_predec[mem_mode]
3307             = memory_address_addr_space_p (mem_mode, addr, as);
3308         }
3309       if (HAVE_POST_DECREMENT)
3310         {
3311           addr = gen_rtx_POST_DEC (address_mode, reg0);
3312           has_postdec[mem_mode]
3313             = memory_address_addr_space_p (mem_mode, addr, as);
3314         }
3315       if (HAVE_PRE_INCREMENT)
3316         {
3317           addr = gen_rtx_PRE_INC (address_mode, reg0);
3318           has_preinc[mem_mode]
3319             = memory_address_addr_space_p (mem_mode, addr, as);
3320         }
3321       if (HAVE_POST_INCREMENT)
3322         {
3323           addr = gen_rtx_POST_INC (address_mode, reg0);
3324           has_postinc[mem_mode]
3325             = memory_address_addr_space_p (mem_mode, addr, as);
3326         }
3327       for (i = 0; i < 16; i++)
3328         {
3329           sym_p = i & 1;
3330           var_p = (i >> 1) & 1;
3331           off_p = (i >> 2) & 1;
3332           rat_p = (i >> 3) & 1;
3333
3334           addr = reg0;
3335           if (rat_p)
3336             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3337                                    gen_int_mode (rat, address_mode));
3338
3339           if (var_p)
3340             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3341
3342           if (sym_p)
3343             {
3344               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3345               /* ??? We can run into trouble with some backends by presenting
3346                  it with symbols which haven't been properly passed through
3347                  targetm.encode_section_info.  By setting the local bit, we
3348                  enhance the probability of things working.  */
3349               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3350
3351               if (off_p)
3352                 base = gen_rtx_fmt_e (CONST, address_mode,
3353                                       gen_rtx_fmt_ee
3354                                         (PLUS, address_mode, base,
3355                                          gen_int_mode (off, address_mode)));
3356             }
3357           else if (off_p)
3358             base = gen_int_mode (off, address_mode);
3359           else
3360             base = NULL_RTX;
3361
3362           if (base)
3363             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3364
3365           start_sequence ();
3366           /* To avoid splitting addressing modes, pretend that no cse will
3367              follow.  */
3368           old_cse_not_expected = cse_not_expected;
3369           cse_not_expected = true;
3370           addr = memory_address_addr_space (mem_mode, addr, as);
3371           cse_not_expected = old_cse_not_expected;
3372           seq = get_insns ();
3373           end_sequence ();
3374
3375           acost = seq_cost (seq, speed);
3376           acost += address_cost (addr, mem_mode, as, speed);
3377
3378           if (!acost)
3379             acost = 1;
3380           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3381         }
3382
3383       /* On some targets, it is quite expensive to load symbol to a register,
3384          which makes addresses that contain symbols look much more expensive.
3385          However, the symbol will have to be loaded in any case before the
3386          loop (and quite likely we have it in register already), so it does not
3387          make much sense to penalize them too heavily.  So make some final
3388          tweaks for the SYMBOL_PRESENT modes:
3389
3390          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3391          var is cheaper, use this mode with small penalty.
3392          If VAR_PRESENT is true, try whether the mode with
3393          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3394          if this is the case, use it.  */
3395       add_c = add_cost (address_mode, speed);
3396       for (i = 0; i < 8; i++)
3397         {
3398           var_p = i & 1;
3399           off_p = (i >> 1) & 1;
3400           rat_p = (i >> 2) & 1;
3401
3402           acost = data->costs[0][1][off_p][rat_p] + 1;
3403           if (var_p)
3404             acost += add_c;
3405
3406           if (acost < data->costs[1][var_p][off_p][rat_p])
3407             data->costs[1][var_p][off_p][rat_p] = acost;
3408         }
3409
3410       if (dump_file && (dump_flags & TDF_DETAILS))
3411         {
3412           fprintf (dump_file, "Address costs:\n");
3413
3414           for (i = 0; i < 16; i++)
3415             {
3416               sym_p = i & 1;
3417               var_p = (i >> 1) & 1;
3418               off_p = (i >> 2) & 1;
3419               rat_p = (i >> 3) & 1;
3420
3421               fprintf (dump_file, "  ");
3422               if (sym_p)
3423                 fprintf (dump_file, "sym + ");
3424               if (var_p)
3425                 fprintf (dump_file, "var + ");
3426               if (off_p)
3427                 fprintf (dump_file, "cst + ");
3428               if (rat_p)
3429                 fprintf (dump_file, "rat * ");
3430
3431               acost = data->costs[sym_p][var_p][off_p][rat_p];
3432               fprintf (dump_file, "index costs %d\n", acost);
3433             }
3434           if (has_predec[mem_mode] || has_postdec[mem_mode]
3435               || has_preinc[mem_mode] || has_postinc[mem_mode])
3436             fprintf (dump_file, "  May include autoinc/dec\n");
3437           fprintf (dump_file, "\n");
3438         }
3439
3440       VEC_replace (address_cost_data, address_cost_data_list,
3441                    data_index, data);
3442     }
3443
3444   bits = GET_MODE_BITSIZE (address_mode);
3445   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3446   offset &= mask;
3447   if ((offset >> (bits - 1) & 1))
3448     offset |= ~mask;
3449   s_offset = offset;
3450
3451   autoinc = false;
3452   msize = GET_MODE_SIZE (mem_mode);
3453   autoinc_offset = offset;
3454   if (stmt_after_inc)
3455     autoinc_offset += ratio * cstep;
3456   if (symbol_present || var_present || ratio != 1)
3457     autoinc = false;
3458   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3459                && msize == cstep)
3460            || (has_postdec[mem_mode] && autoinc_offset == 0
3461                && msize == -cstep)
3462            || (has_preinc[mem_mode] && autoinc_offset == msize
3463                && msize == cstep)
3464            || (has_predec[mem_mode] && autoinc_offset == -msize
3465                && msize == -cstep))
3466     autoinc = true;
3467
3468   cost = 0;
3469   offset_p = (s_offset != 0
3470               && data->min_offset <= s_offset
3471               && s_offset <= data->max_offset);
3472   ratio_p = (ratio != 1
3473              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3474
3475   if (ratio != 1 && !ratio_p)
3476     cost += multiply_by_cost (ratio, address_mode, speed);
3477
3478   if (s_offset && !offset_p && !symbol_present)
3479     cost += add_cost (address_mode, speed);
3480
3481   if (may_autoinc)
3482     *may_autoinc = autoinc;
3483   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3484   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3485   return new_cost (cost + acost, complexity);
3486 }
3487
3488 /* Estimates cost of forcing expression EXPR into a variable.  */
3489
3490 static comp_cost
3491 force_expr_to_var_cost (tree expr, bool speed)
3492 {
3493   static bool costs_initialized = false;
3494   static unsigned integer_cost [2];
3495   static unsigned symbol_cost [2];
3496   static unsigned address_cost [2];
3497   tree op0, op1;
3498   comp_cost cost0, cost1, cost;
3499   enum machine_mode mode;
3500
3501   if (!costs_initialized)
3502     {
3503       tree type = build_pointer_type (integer_type_node);
3504       tree var, addr;
3505       rtx x;
3506       int i;
3507
3508       var = create_tmp_var_raw (integer_type_node, "test_var");
3509       TREE_STATIC (var) = 1;
3510       x = produce_memory_decl_rtl (var, NULL);
3511       SET_DECL_RTL (var, x);
3512
3513       addr = build1 (ADDR_EXPR, type, var);
3514
3515
3516       for (i = 0; i < 2; i++)
3517         {
3518           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3519                                                              2000), i);
3520
3521           symbol_cost[i] = computation_cost (addr, i) + 1;
3522
3523           address_cost[i]
3524             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3525                                         addr,
3526                                         build_int_cst (sizetype, 2000)), i) + 1;
3527           if (dump_file && (dump_flags & TDF_DETAILS))
3528             {
3529               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3530               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3531               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3532               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3533               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3534               fprintf (dump_file, "\n");
3535             }
3536         }
3537
3538       costs_initialized = true;
3539     }
3540
3541   STRIP_NOPS (expr);
3542
3543   if (SSA_VAR_P (expr))
3544     return zero_cost;
3545
3546   if (is_gimple_min_invariant (expr))
3547     {
3548       if (TREE_CODE (expr) == INTEGER_CST)
3549         return new_cost (integer_cost [speed], 0);
3550
3551       if (TREE_CODE (expr) == ADDR_EXPR)
3552         {
3553           tree obj = TREE_OPERAND (expr, 0);
3554
3555           if (TREE_CODE (obj) == VAR_DECL
3556               || TREE_CODE (obj) == PARM_DECL
3557               || TREE_CODE (obj) == RESULT_DECL)
3558             return new_cost (symbol_cost [speed], 0);
3559         }
3560
3561       return new_cost (address_cost [speed], 0);
3562     }
3563
3564   switch (TREE_CODE (expr))
3565     {
3566     case POINTER_PLUS_EXPR:
3567     case PLUS_EXPR:
3568     case MINUS_EXPR:
3569     case MULT_EXPR:
3570       op0 = TREE_OPERAND (expr, 0);
3571       op1 = TREE_OPERAND (expr, 1);
3572       STRIP_NOPS (op0);
3573       STRIP_NOPS (op1);
3574
3575       if (is_gimple_val (op0))
3576         cost0 = zero_cost;
3577       else
3578         cost0 = force_expr_to_var_cost (op0, speed);
3579
3580       if (is_gimple_val (op1))
3581         cost1 = zero_cost;
3582       else
3583         cost1 = force_expr_to_var_cost (op1, speed);
3584
3585       break;
3586
3587     case NEGATE_EXPR:
3588       op0 = TREE_OPERAND (expr, 0);
3589       STRIP_NOPS (op0);
3590       op1 = NULL_TREE;
3591
3592       if (is_gimple_val (op0))
3593         cost0 = zero_cost;
3594       else
3595         cost0 = force_expr_to_var_cost (op0, speed);
3596
3597       cost1 = zero_cost;
3598       break;
3599
3600     default:
3601       /* Just an arbitrary value, FIXME.  */
3602       return new_cost (target_spill_cost[speed], 0);
3603     }
3604
3605   mode = TYPE_MODE (TREE_TYPE (expr));
3606   switch (TREE_CODE (expr))
3607     {
3608     case POINTER_PLUS_EXPR:
3609     case PLUS_EXPR:
3610     case MINUS_EXPR:
3611     case NEGATE_EXPR:
3612       cost = new_cost (add_cost (mode, speed), 0);
3613       break;
3614
3615     case MULT_EXPR:
3616       if (cst_and_fits_in_hwi (op0))
3617         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3618       else if (cst_and_fits_in_hwi (op1))
3619         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3620       else
3621         return new_cost (target_spill_cost [speed], 0);
3622       break;
3623
3624     default:
3625       gcc_unreachable ();
3626     }
3627
3628   cost = add_costs (cost, cost0);
3629   cost = add_costs (cost, cost1);
3630
3631   /* Bound the cost by target_spill_cost.  The parts of complicated
3632      computations often are either loop invariant or at least can
3633      be shared between several iv uses, so letting this grow without
3634      limits would not give reasonable results.  */
3635   if (cost.cost > (int) target_spill_cost [speed])
3636     cost.cost = target_spill_cost [speed];
3637
3638   return cost;
3639 }
3640
3641 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3642    invariants the computation depends on.  */
3643
3644 static comp_cost
3645 force_var_cost (struct ivopts_data *data,
3646                 tree expr, bitmap *depends_on)
3647 {
3648   if (depends_on)
3649     {
3650       fd_ivopts_data = data;
3651       walk_tree (&expr, find_depends, depends_on, NULL);
3652     }
3653
3654   return force_expr_to_var_cost (expr, data->speed);
3655 }
3656
3657 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3658    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3659    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3660    invariants the computation depends on.  */
3661
3662 static comp_cost
3663 split_address_cost (struct ivopts_data *data,
3664                     tree addr, bool *symbol_present, bool *var_present,
3665                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3666 {
3667   tree core;
3668   HOST_WIDE_INT bitsize;
3669   HOST_WIDE_INT bitpos;
3670   tree toffset;
3671   enum machine_mode mode;
3672   int unsignedp, volatilep;
3673
3674   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3675                               &unsignedp, &volatilep, false);
3676
3677   if (toffset != 0
3678       || bitpos % BITS_PER_UNIT != 0
3679       || TREE_CODE (core) != VAR_DECL)
3680     {
3681       *symbol_present = false;
3682       *var_present = true;
3683       fd_ivopts_data = data;
3684       walk_tree (&addr, find_depends, depends_on, NULL);
3685       return new_cost (target_spill_cost[data->speed], 0);
3686     }
3687
3688   *offset += bitpos / BITS_PER_UNIT;
3689   if (TREE_STATIC (core)
3690       || DECL_EXTERNAL (core))
3691     {
3692       *symbol_present = true;
3693       *var_present = false;
3694       return zero_cost;
3695     }
3696
3697   *symbol_present = false;
3698   *var_present = true;
3699   return zero_cost;
3700 }
3701
3702 /* Estimates cost of expressing difference of addresses E1 - E2 as
3703    var + symbol + offset.  The value of offset is added to OFFSET,
3704    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3705    part is missing.  DEPENDS_ON is a set of the invariants the computation
3706    depends on.  */
3707
3708 static comp_cost
3709 ptr_difference_cost (struct ivopts_data *data,
3710                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3711                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3712 {
3713   HOST_WIDE_INT diff = 0;
3714   aff_tree aff_e1, aff_e2;
3715   tree type;
3716
3717   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3718
3719   if (ptr_difference_const (e1, e2, &diff))
3720     {
3721       *offset += diff;
3722       *symbol_present = false;
3723       *var_present = false;
3724       return zero_cost;
3725     }
3726
3727   if (integer_zerop (e2))
3728     return split_address_cost (data, TREE_OPERAND (e1, 0),
3729                                symbol_present, var_present, offset, depends_on);
3730
3731   *symbol_present = false;
3732   *var_present = true;
3733
3734   type = signed_type_for (TREE_TYPE (e1));
3735   tree_to_aff_combination (e1, type, &aff_e1);
3736   tree_to_aff_combination (e2, type, &aff_e2);
3737   aff_combination_scale (&aff_e2, double_int_minus_one);
3738   aff_combination_add (&aff_e1, &aff_e2);
3739
3740   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3741 }
3742
3743 /* Estimates cost of expressing difference E1 - E2 as
3744    var + symbol + offset.  The value of offset is added to OFFSET,
3745    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3746    part is missing.  DEPENDS_ON is a set of the invariants the computation
3747    depends on.  */
3748
3749 static comp_cost
3750 difference_cost (struct ivopts_data *data,
3751                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3752                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3753 {
3754   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3755   unsigned HOST_WIDE_INT off1, off2;
3756   aff_tree aff_e1, aff_e2;
3757   tree type;
3758
3759   e1 = strip_offset (e1, &off1);
3760   e2 = strip_offset (e2, &off2);
3761   *offset += off1 - off2;
3762
3763   STRIP_NOPS (e1);
3764   STRIP_NOPS (e2);
3765
3766   if (TREE_CODE (e1) == ADDR_EXPR)
3767     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3768                                 offset, depends_on);
3769   *symbol_present = false;
3770
3771   if (operand_equal_p (e1, e2, 0))
3772     {
3773       *var_present = false;
3774       return zero_cost;
3775     }
3776
3777   *var_present = true;
3778
3779   if (integer_zerop (e2))
3780     return force_var_cost (data, e1, depends_on);
3781
3782   if (integer_zerop (e1))
3783     {
3784       comp_cost cost = force_var_cost (data, e2, depends_on);
3785       cost.cost += multiply_by_cost (-1, mode, data->speed);
3786       return cost;
3787     }
3788
3789   type = signed_type_for (TREE_TYPE (e1));
3790   tree_to_aff_combination (e1, type, &aff_e1);
3791   tree_to_aff_combination (e2, type, &aff_e2);
3792   aff_combination_scale (&aff_e2, double_int_minus_one);
3793   aff_combination_add (&aff_e1, &aff_e2);
3794
3795   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3796 }
3797
3798 /* Returns true if AFF1 and AFF2 are identical.  */
3799
3800 static bool
3801 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3802 {
3803   unsigned i;
3804
3805   if (aff1->n != aff2->n)
3806     return false;
3807
3808   for (i = 0; i < aff1->n; i++)
3809     {
3810       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3811         return false;
3812
3813       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3814         return false;
3815     }
3816   return true;
3817 }
3818
3819 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3820    requires a new compiler generated temporary.  Returns -1 otherwise.
3821    ADDRESS_P is a flag indicating if the expression is for address
3822    computation.  */
3823
3824 static int
3825 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3826                             tree cbase, HOST_WIDE_INT ratio,
3827                             bool address_p)
3828 {
3829   aff_tree ubase_aff, cbase_aff;
3830   tree expr, ub, cb;
3831   struct iv_inv_expr_ent ent;
3832   struct iv_inv_expr_ent **slot;
3833
3834   STRIP_NOPS (ubase);
3835   STRIP_NOPS (cbase);
3836   ub = ubase;
3837   cb = cbase;
3838
3839   if ((TREE_CODE (ubase) == INTEGER_CST)
3840       && (TREE_CODE (cbase) == INTEGER_CST))
3841     return -1;
3842
3843   /* Strips the constant part. */
3844   if (TREE_CODE (ubase) == PLUS_EXPR
3845       || TREE_CODE (ubase) == MINUS_EXPR
3846       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3847     {
3848       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3849         ubase = TREE_OPERAND (ubase, 0);
3850     }
3851
3852   /* Strips the constant part. */
3853   if (TREE_CODE (cbase) == PLUS_EXPR
3854       || TREE_CODE (cbase) == MINUS_EXPR
3855       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3856     {
3857       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3858         cbase = TREE_OPERAND (cbase, 0);
3859     }
3860
3861   if (address_p)
3862     {
3863       if (((TREE_CODE (ubase) == SSA_NAME)
3864            || (TREE_CODE (ubase) == ADDR_EXPR
3865                && is_gimple_min_invariant (ubase)))
3866           && (TREE_CODE (cbase) == INTEGER_CST))
3867         return -1;
3868
3869       if (((TREE_CODE (cbase) == SSA_NAME)
3870            || (TREE_CODE (cbase) == ADDR_EXPR
3871                && is_gimple_min_invariant (cbase)))
3872           && (TREE_CODE (ubase) == INTEGER_CST))
3873         return -1;
3874     }
3875
3876   if (ratio == 1)
3877     {
3878       if(operand_equal_p (ubase, cbase, 0))
3879         return -1;
3880
3881       if (TREE_CODE (ubase) == ADDR_EXPR
3882           && TREE_CODE (cbase) == ADDR_EXPR)
3883         {
3884           tree usym, csym;
3885
3886           usym = TREE_OPERAND (ubase, 0);
3887           csym = TREE_OPERAND (cbase, 0);
3888           if (TREE_CODE (usym) == ARRAY_REF)
3889             {
3890               tree ind = TREE_OPERAND (usym, 1);
3891               if (TREE_CODE (ind) == INTEGER_CST
3892                   && host_integerp (ind, 0)
3893                   && TREE_INT_CST_LOW (ind) == 0)
3894                 usym = TREE_OPERAND (usym, 0);
3895             }
3896           if (TREE_CODE (csym) == ARRAY_REF)
3897             {
3898               tree ind = TREE_OPERAND (csym, 1);
3899               if (TREE_CODE (ind) == INTEGER_CST
3900                   && host_integerp (ind, 0)
3901                   && TREE_INT_CST_LOW (ind) == 0)
3902                 csym = TREE_OPERAND (csym, 0);
3903             }
3904           if (operand_equal_p (usym, csym, 0))
3905             return -1;
3906         }
3907       /* Now do more complex comparison  */
3908       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3909       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3910       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3911         return -1;
3912     }
3913
3914   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3915   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3916
3917   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3918   aff_combination_add (&ubase_aff, &cbase_aff);
3919   expr = aff_combination_to_tree (&ubase_aff);
3920   ent.expr = expr;
3921   ent.hash = iterative_hash_expr (expr, 0);
3922   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3923                                                      &ent, INSERT);
3924   if (*slot)
3925     return (*slot)->id;
3926
3927   *slot = XNEW (struct iv_inv_expr_ent);
3928   (*slot)->expr = expr;
3929   (*slot)->hash = ent.hash;
3930   (*slot)->id = data->inv_expr_id++;
3931   return  (*slot)->id;
3932 }
3933
3934
3935
3936 /* Determines the cost of the computation by that USE is expressed
3937    from induction variable CAND.  If ADDRESS_P is true, we just need
3938    to create an address from it, otherwise we want to get it into
3939    register.  A set of invariants we depend on is stored in
3940    DEPENDS_ON.  AT is the statement at that the value is computed.
3941    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3942    addressing is likely.  */
3943
3944 static comp_cost
3945 get_computation_cost_at (struct ivopts_data *data,
3946                          struct iv_use *use, struct iv_cand *cand,
3947                          bool address_p, bitmap *depends_on, gimple at,
3948                          bool *can_autoinc,
3949                          int *inv_expr_id)
3950 {
3951   tree ubase = use->iv->base, ustep = use->iv->step;
3952   tree cbase, cstep;
3953   tree utype = TREE_TYPE (ubase), ctype;
3954   unsigned HOST_WIDE_INT cstepi, offset = 0;
3955   HOST_WIDE_INT ratio, aratio;
3956   bool var_present, symbol_present, stmt_is_after_inc;
3957   comp_cost cost;
3958   double_int rat;
3959   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3960
3961   *depends_on = NULL;
3962
3963   /* Only consider real candidates.  */
3964   if (!cand->iv)
3965     return infinite_cost;
3966
3967   cbase = cand->iv->base;
3968   cstep = cand->iv->step;
3969   ctype = TREE_TYPE (cbase);
3970
3971   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3972     {
3973       /* We do not have a precision to express the values of use.  */
3974       return infinite_cost;
3975     }
3976
3977   if (address_p)
3978     {
3979       /* Do not try to express address of an object with computation based
3980          on address of a different object.  This may cause problems in rtl
3981          level alias analysis (that does not expect this to be happening,
3982          as this is illegal in C), and would be unlikely to be useful
3983          anyway.  */
3984       if (use->iv->base_object
3985           && cand->iv->base_object
3986           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3987         return infinite_cost;
3988     }
3989
3990   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3991     {
3992       /* TODO -- add direct handling of this case.  */
3993       goto fallback;
3994     }
3995
3996   /* CSTEPI is removed from the offset in case statement is after the
3997      increment.  If the step is not constant, we use zero instead.
3998      This is a bit imprecise (there is the extra addition), but
3999      redundancy elimination is likely to transform the code so that
4000      it uses value of the variable before increment anyway,
4001      so it is not that much unrealistic.  */
4002   if (cst_and_fits_in_hwi (cstep))
4003     cstepi = int_cst_value (cstep);
4004   else
4005     cstepi = 0;
4006
4007   if (!constant_multiple_of (ustep, cstep, &rat))
4008     return infinite_cost;
4009
4010   if (double_int_fits_in_shwi_p (rat))
4011     ratio = double_int_to_shwi (rat);
4012   else
4013     return infinite_cost;
4014
4015   STRIP_NOPS (cbase);
4016   ctype = TREE_TYPE (cbase);
4017
4018   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4019      or ratio == 1, it is better to handle this like
4020
4021      ubase - ratio * cbase + ratio * var
4022
4023      (also holds in the case ratio == -1, TODO.  */
4024
4025   if (cst_and_fits_in_hwi (cbase))
4026     {
4027       offset = - ratio * int_cst_value (cbase);
4028       cost = difference_cost (data,
4029                               ubase, build_int_cst (utype, 0),
4030                               &symbol_present, &var_present, &offset,
4031                               depends_on);
4032       cost.cost /= avg_loop_niter (data->current_loop);
4033     }
4034   else if (ratio == 1)
4035     {
4036       cost = difference_cost (data,
4037                               ubase, cbase,
4038                               &symbol_present, &var_present, &offset,
4039                               depends_on);
4040       cost.cost /= avg_loop_niter (data->current_loop);
4041     }
4042   else if (address_p
4043            && !POINTER_TYPE_P (ctype)
4044            && multiplier_allowed_in_address_p
4045                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4046                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4047     {
4048       cbase
4049         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4050       cost = difference_cost (data,
4051                               ubase, cbase,
4052                               &symbol_present, &var_present, &offset,
4053                               depends_on);
4054       cost.cost /= avg_loop_niter (data->current_loop);
4055     }
4056   else
4057     {
4058       cost = force_var_cost (data, cbase, depends_on);
4059       cost = add_costs (cost,
4060                         difference_cost (data,
4061                                          ubase, build_int_cst (utype, 0),
4062                                          &symbol_present, &var_present,
4063                                          &offset, depends_on));
4064       cost.cost /= avg_loop_niter (data->current_loop);
4065       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4066     }
4067
4068   if (inv_expr_id)
4069     {
4070       *inv_expr_id =
4071           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4072       /* Clear depends on.  */
4073       if (*inv_expr_id != -1 && depends_on && *depends_on)
4074         bitmap_clear (*depends_on);
4075     }
4076
4077   /* If we are after the increment, the value of the candidate is higher by
4078      one iteration.  */
4079   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4080   if (stmt_is_after_inc)
4081     offset -= ratio * cstepi;
4082
4083   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4084      (symbol/var1/const parts may be omitted).  If we are looking for an
4085      address, find the cost of addressing this.  */
4086   if (address_p)
4087     return add_costs (cost,
4088                       get_address_cost (symbol_present, var_present,
4089                                         offset, ratio, cstepi,
4090                                         TYPE_MODE (TREE_TYPE (utype)),
4091                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4092                                         speed, stmt_is_after_inc,
4093                                         can_autoinc));
4094
4095   /* Otherwise estimate the costs for computing the expression.  */
4096   if (!symbol_present && !var_present && !offset)
4097     {
4098       if (ratio != 1)
4099         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4100       return cost;
4101     }
4102
4103   /* Symbol + offset should be compile-time computable so consider that they
4104       are added once to the variable, if present.  */
4105   if (var_present && (symbol_present || offset))
4106     cost.cost += adjust_setup_cost (data,
4107                                     add_cost (TYPE_MODE (ctype), speed));
4108
4109   /* Having offset does not affect runtime cost in case it is added to
4110      symbol, but it increases complexity.  */
4111   if (offset)
4112     cost.complexity++;
4113
4114   cost.cost += add_cost (TYPE_MODE (ctype), speed);
4115
4116   aratio = ratio > 0 ? ratio : -ratio;
4117   if (aratio != 1)
4118     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4119   return cost;
4120
4121 fallback:
4122   if (can_autoinc)
4123     *can_autoinc = false;
4124
4125   {
4126     /* Just get the expression, expand it and measure the cost.  */
4127     tree comp = get_computation_at (data->current_loop, use, cand, at);
4128
4129     if (!comp)
4130       return infinite_cost;
4131
4132     if (address_p)
4133       comp = build_simple_mem_ref (comp);
4134
4135     return new_cost (computation_cost (comp, speed), 0);
4136   }
4137 }
4138
4139 /* Determines the cost of the computation by that USE is expressed
4140    from induction variable CAND.  If ADDRESS_P is true, we just need
4141    to create an address from it, otherwise we want to get it into
4142    register.  A set of invariants we depend on is stored in
4143    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4144    autoinc addressing is likely.  */
4145
4146 static comp_cost
4147 get_computation_cost (struct ivopts_data *data,
4148                       struct iv_use *use, struct iv_cand *cand,
4149                       bool address_p, bitmap *depends_on,
4150                       bool *can_autoinc, int *inv_expr_id)
4151 {
4152   return get_computation_cost_at (data,
4153                                   use, cand, address_p, depends_on, use->stmt,
4154                                   can_autoinc, inv_expr_id);
4155 }
4156
4157 /* Determines cost of basing replacement of USE on CAND in a generic
4158    expression.  */
4159
4160 static bool
4161 determine_use_iv_cost_generic (struct ivopts_data *data,
4162                                struct iv_use *use, struct iv_cand *cand)
4163 {
4164   bitmap depends_on;
4165   comp_cost cost;
4166   int inv_expr_id = -1;
4167
4168   /* The simple case first -- if we need to express value of the preserved
4169      original biv, the cost is 0.  This also prevents us from counting the
4170      cost of increment twice -- once at this use and once in the cost of
4171      the candidate.  */
4172   if (cand->pos == IP_ORIGINAL
4173       && cand->incremented_at == use->stmt)
4174     {
4175       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4176       return true;
4177     }
4178
4179   cost = get_computation_cost (data, use, cand, false, &depends_on,
4180                                NULL, &inv_expr_id);
4181
4182   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4183                    inv_expr_id);
4184
4185   return !infinite_cost_p (cost);
4186 }
4187
4188 /* Determines cost of basing replacement of USE on CAND in an address.  */
4189
4190 static bool
4191 determine_use_iv_cost_address (struct ivopts_data *data,
4192                                struct iv_use *use, struct iv_cand *cand)
4193 {
4194   bitmap depends_on;
4195   bool can_autoinc;
4196   int inv_expr_id = -1;
4197   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4198                                          &can_autoinc, &inv_expr_id);
4199
4200   if (cand->ainc_use == use)
4201     {
4202       if (can_autoinc)
4203         cost.cost -= cand->cost_step;
4204       /* If we generated the candidate solely for exploiting autoincrement
4205          opportunities, and it turns out it can't be used, set the cost to
4206          infinity to make sure we ignore it.  */
4207       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4208         cost = infinite_cost;
4209     }
4210   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4211                    inv_expr_id);
4212
4213   return !infinite_cost_p (cost);
4214 }
4215
4216 /* Computes value of candidate CAND at position AT in iteration NITER, and
4217    stores it to VAL.  */
4218
4219 static void
4220 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4221                aff_tree *val)
4222 {
4223   aff_tree step, delta, nit;
4224   struct iv *iv = cand->iv;
4225   tree type = TREE_TYPE (iv->base);
4226   tree steptype = type;
4227   if (POINTER_TYPE_P (type))
4228     steptype = sizetype;
4229
4230   tree_to_aff_combination (iv->step, steptype, &step);
4231   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4232   aff_combination_convert (&nit, steptype);
4233   aff_combination_mult (&nit, &step, &delta);
4234   if (stmt_after_increment (loop, cand, at))
4235     aff_combination_add (&delta, &step);
4236
4237   tree_to_aff_combination (iv->base, type, val);
4238   aff_combination_add (val, &delta);
4239 }
4240
4241 /* Returns period of induction variable iv.  */
4242
4243 static tree
4244 iv_period (struct iv *iv)
4245 {
4246   tree step = iv->step, period, type;
4247   tree pow2div;
4248
4249   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4250
4251   type = unsigned_type_for (TREE_TYPE (step));
4252   /* Period of the iv is lcm (step, type_range)/step -1,
4253      i.e., N*type_range/step - 1. Since type range is power
4254      of two, N == (step >> num_of_ending_zeros_binary (step),
4255      so the final result is
4256
4257        (type_range >> num_of_ending_zeros_binary (step)) - 1
4258
4259   */
4260   pow2div = num_ending_zeros (step);
4261
4262   period = build_low_bits_mask (type,
4263                                 (TYPE_PRECISION (type)
4264                                  - tree_low_cst (pow2div, 1)));
4265
4266   return period;
4267 }
4268
4269 /* Returns the comparison operator used when eliminating the iv USE.  */
4270
4271 static enum tree_code
4272 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4273 {
4274   struct loop *loop = data->current_loop;
4275   basic_block ex_bb;
4276   edge exit;
4277
4278   ex_bb = gimple_bb (use->stmt);
4279   exit = EDGE_SUCC (ex_bb, 0);
4280   if (flow_bb_inside_loop_p (loop, exit->dest))
4281     exit = EDGE_SUCC (ex_bb, 1);
4282
4283   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4284 }
4285
4286 /* Check whether it is possible to express the condition in USE by comparison
4287    of candidate CAND.  If so, store the value compared with to BOUND.  */
4288
4289 static bool
4290 may_eliminate_iv (struct ivopts_data *data,
4291                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4292 {
4293   basic_block ex_bb;
4294   edge exit;
4295   tree nit, period;
4296   struct loop *loop = data->current_loop;
4297   aff_tree bnd;
4298   struct tree_niter_desc *desc = NULL;
4299
4300   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4301     return false;
4302
4303   /* For now works only for exits that dominate the loop latch.
4304      TODO: extend to other conditions inside loop body.  */
4305   ex_bb = gimple_bb (use->stmt);
4306   if (use->stmt != last_stmt (ex_bb)
4307       || gimple_code (use->stmt) != GIMPLE_COND
4308       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4309     return false;
4310
4311   exit = EDGE_SUCC (ex_bb, 0);
4312   if (flow_bb_inside_loop_p (loop, exit->dest))
4313     exit = EDGE_SUCC (ex_bb, 1);
4314   if (flow_bb_inside_loop_p (loop, exit->dest))
4315     return false;
4316
4317   nit = niter_for_exit (data, exit, &desc);
4318   if (!nit)
4319     return false;
4320
4321   /* Determine whether we can use the variable to test the exit condition.
4322      This is the case iff the period of the induction variable is greater
4323      than the number of iterations for which the exit condition is true.  */
4324   period = iv_period (cand->iv);
4325
4326   /* If the number of iterations is constant, compare against it directly.  */
4327   if (TREE_CODE (nit) == INTEGER_CST)
4328     {
4329       /* See cand_value_at.  */
4330       if (stmt_after_increment (loop, cand, use->stmt))
4331         {
4332           if (!tree_int_cst_lt (nit, period))
4333             return false;
4334         }
4335       else
4336         {
4337           if (tree_int_cst_lt (period, nit))
4338             return false;
4339         }
4340     }
4341
4342   /* If not, and if this is the only possible exit of the loop, see whether
4343      we can get a conservative estimate on the number of iterations of the
4344      entire loop and compare against that instead.  */
4345   else
4346     {
4347       double_int period_value, max_niter;
4348
4349       max_niter = desc->max;
4350       if (stmt_after_increment (loop, cand, use->stmt))
4351         max_niter = double_int_add (max_niter, double_int_one);
4352       period_value = tree_to_double_int (period);
4353       if (double_int_ucmp (max_niter, period_value) > 0)
4354         {
4355           /* See if we can take advantage of infered loop bound information.  */
4356           if (loop_only_exit_p (loop, exit))
4357             {
4358               if (!estimated_loop_iterations (loop, true, &max_niter))
4359                 return false;
4360               /* The loop bound is already adjusted by adding 1.  */
4361               if (double_int_ucmp (max_niter, period_value) > 0)
4362                 return false;
4363             }
4364           else
4365             return false;
4366         }
4367     }
4368
4369   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4370
4371   *bound = aff_combination_to_tree (&bnd);
4372   /* It is unlikely that computing the number of iterations using division
4373      would be more profitable than keeping the original induction variable.  */
4374   if (expression_expensive_p (*bound))
4375     return false;
4376   return true;
4377 }
4378
4379
4380 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4381
4382 static bool
4383 determine_use_iv_cost_condition (struct ivopts_data *data,
4384                                  struct iv_use *use, struct iv_cand *cand)
4385 {
4386   tree bound = NULL_TREE;
4387   struct iv *cmp_iv;
4388   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4389   comp_cost elim_cost, express_cost, cost;
4390   bool ok;
4391   int inv_expr_id = -1;
4392   tree *control_var, *bound_cst;
4393
4394   /* Only consider real candidates.  */
4395   if (!cand->iv)
4396     {
4397       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4398       return false;
4399     }
4400
4401   /* Try iv elimination.  */
4402   if (may_eliminate_iv (data, use, cand, &bound))
4403     {
4404       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4405       /* The bound is a loop invariant, so it will be only computed
4406          once.  */
4407       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4408     }
4409   else
4410     elim_cost = infinite_cost;
4411
4412   /* Try expressing the original giv.  If it is compared with an invariant,
4413      note that we cannot get rid of it.  */
4414   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4415                               NULL, &cmp_iv);
4416   gcc_assert (ok);
4417
4418   /* When the condition is a comparison of the candidate IV against
4419      zero, prefer this IV.
4420
4421      TODO: The constant that we're substracting from the cost should
4422      be target-dependent.  This information should be added to the
4423      target costs for each backend.  */
4424   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4425       && integer_zerop (*bound_cst)
4426       && (operand_equal_p (*control_var, cand->var_after, 0)
4427           || operand_equal_p (*control_var, cand->var_before, 0)))
4428     elim_cost.cost -= 1;
4429
4430   express_cost = get_computation_cost (data, use, cand, false,
4431                                        &depends_on_express, NULL,
4432                                        &inv_expr_id);
4433   fd_ivopts_data = data;
4434   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4435
4436   /* Choose the better approach, preferring the eliminated IV. */
4437   if (compare_costs (elim_cost, express_cost) <= 0)
4438     {
4439       cost = elim_cost;
4440       depends_on = depends_on_elim;
4441       depends_on_elim = NULL;
4442     }
4443   else
4444     {
4445       cost = express_cost;
4446       depends_on = depends_on_express;
4447       depends_on_express = NULL;
4448       bound = NULL_TREE;
4449     }
4450
4451   set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4452
4453   if (depends_on_elim)
4454     BITMAP_FREE (depends_on_elim);
4455   if (depends_on_express)
4456     BITMAP_FREE (depends_on_express);
4457
4458   return !infinite_cost_p (cost);
4459 }
4460
4461 /* Determines cost of basing replacement of USE on CAND.  Returns false
4462    if USE cannot be based on CAND.  */
4463
4464 static bool
4465 determine_use_iv_cost (struct ivopts_data *data,
4466                        struct iv_use *use, struct iv_cand *cand)
4467 {
4468   switch (use->type)
4469     {
4470     case USE_NONLINEAR_EXPR:
4471       return determine_use_iv_cost_generic (data, use, cand);
4472
4473     case USE_ADDRESS:
4474       return determine_use_iv_cost_address (data, use, cand);
4475
4476     case USE_COMPARE:
4477       return determine_use_iv_cost_condition (data, use, cand);
4478
4479     default:
4480       gcc_unreachable ();
4481     }
4482 }
4483
4484 /* Return true if get_computation_cost indicates that autoincrement is
4485    a possibility for the pair of USE and CAND, false otherwise.  */
4486
4487 static bool
4488 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4489                            struct iv_cand *cand)
4490 {
4491   bitmap depends_on;
4492   bool can_autoinc;
4493   comp_cost cost;
4494
4495   if (use->type != USE_ADDRESS)
4496     return false;
4497
4498   cost = get_computation_cost (data, use, cand, true, &depends_on,
4499                                &can_autoinc, NULL);
4500
4501   BITMAP_FREE (depends_on);
4502
4503   return !infinite_cost_p (cost) && can_autoinc;
4504 }
4505
4506 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4507    use that allows autoincrement, and set their AINC_USE if possible.  */
4508
4509 static void
4510 set_autoinc_for_original_candidates (struct ivopts_data *data)
4511 {
4512   unsigned i, j;
4513
4514   for (i = 0; i < n_iv_cands (data); i++)
4515     {
4516       struct iv_cand *cand = iv_cand (data, i);
4517       struct iv_use *closest = NULL;
4518       if (cand->pos != IP_ORIGINAL)
4519         continue;
4520       for (j = 0; j < n_iv_uses (data); j++)
4521         {
4522           struct iv_use *use = iv_use (data, j);
4523           unsigned uid = gimple_uid (use->stmt);
4524           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4525               || uid > gimple_uid (cand->incremented_at))
4526             continue;
4527           if (closest == NULL || uid > gimple_uid (closest->stmt))
4528             closest = use;
4529         }
4530       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4531         continue;
4532       cand->ainc_use = closest;
4533     }
4534 }
4535
4536 /* Finds the candidates for the induction variables.  */
4537
4538 static void
4539 find_iv_candidates (struct ivopts_data *data)
4540 {
4541   /* Add commonly used ivs.  */
4542   add_standard_iv_candidates (data);
4543
4544   /* Add old induction variables.  */
4545   add_old_ivs_candidates (data);
4546
4547   /* Add induction variables derived from uses.  */
4548   add_derived_ivs_candidates (data);
4549
4550   set_autoinc_for_original_candidates (data);
4551
4552   /* Record the important candidates.  */
4553   record_important_candidates (data);
4554 }
4555
4556 /* Determines costs of basing the use of the iv on an iv candidate.  */
4557
4558 static void
4559 determine_use_iv_costs (struct ivopts_data *data)
4560 {
4561   unsigned i, j;
4562   struct iv_use *use;
4563   struct iv_cand *cand;
4564   bitmap to_clear = BITMAP_ALLOC (NULL);
4565
4566   alloc_use_cost_map (data);
4567
4568   for (i = 0; i < n_iv_uses (data); i++)
4569     {
4570       use = iv_use (data, i);
4571
4572       if (data->consider_all_candidates)
4573         {
4574           for (j = 0; j < n_iv_cands (data); j++)
4575             {
4576               cand = iv_cand (data, j);
4577               determine_use_iv_cost (data, use, cand);
4578             }
4579         }
4580       else
4581         {
4582           bitmap_iterator bi;
4583
4584           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4585             {
4586               cand = iv_cand (data, j);
4587               if (!determine_use_iv_cost (data, use, cand))
4588                 bitmap_set_bit (to_clear, j);
4589             }
4590
4591           /* Remove the candidates for that the cost is infinite from
4592              the list of related candidates.  */
4593           bitmap_and_compl_into (use->related_cands, to_clear);
4594           bitmap_clear (to_clear);
4595         }
4596     }
4597
4598   BITMAP_FREE (to_clear);
4599
4600   if (dump_file && (dump_flags & TDF_DETAILS))
4601     {
4602       fprintf (dump_file, "Use-candidate costs:\n");
4603
4604       for (i = 0; i < n_iv_uses (data); i++)
4605         {
4606           use = iv_use (data, i);
4607
4608           fprintf (dump_file, "Use %d:\n", i);
4609           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4610           for (j = 0; j < use->n_map_members; j++)
4611             {
4612               if (!use->cost_map[j].cand
4613                   || infinite_cost_p (use->cost_map[j].cost))
4614                 continue;
4615
4616               fprintf (dump_file, "  %d\t%d\t%d\t",
4617                        use->cost_map[j].cand->id,
4618                        use->cost_map[j].cost.cost,
4619                        use->cost_map[j].cost.complexity);
4620               if (use->cost_map[j].depends_on)
4621                 bitmap_print (dump_file,
4622                               use->cost_map[j].depends_on, "","");
4623               if (use->cost_map[j].inv_expr_id != -1)
4624                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4625               fprintf (dump_file, "\n");
4626             }
4627
4628           fprintf (dump_file, "\n");
4629         }
4630       fprintf (dump_file, "\n");
4631     }
4632 }
4633
4634 /* Determines cost of the candidate CAND.  */
4635
4636 static void
4637 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4638 {
4639   comp_cost cost_base;
4640   unsigned cost, cost_step;
4641   tree base;
4642
4643   if (!cand->iv)
4644     {
4645       cand->cost = 0;
4646       return;
4647     }
4648
4649   /* There are two costs associated with the candidate -- its increment
4650      and its initialization.  The second is almost negligible for any loop
4651      that rolls enough, so we take it just very little into account.  */
4652
4653   base = cand->iv->base;
4654   cost_base = force_var_cost (data, base, NULL);
4655   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4656
4657   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4658
4659   /* Prefer the original ivs unless we may gain something by replacing it.
4660      The reason is to make debugging simpler; so this is not relevant for
4661      artificial ivs created by other optimization passes.  */
4662   if (cand->pos != IP_ORIGINAL
4663       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4664     cost++;
4665
4666   /* Prefer not to insert statements into latch unless there are some
4667      already (so that we do not create unnecessary jumps).  */
4668   if (cand->pos == IP_END
4669       && empty_block_p (ip_end_pos (data->current_loop)))
4670     cost++;
4671
4672   cand->cost = cost;
4673   cand->cost_step = cost_step;
4674 }
4675
4676 /* Determines costs of computation of the candidates.  */
4677
4678 static void
4679 determine_iv_costs (struct ivopts_data *data)
4680 {
4681   unsigned i;
4682
4683   if (dump_file && (dump_flags & TDF_DETAILS))
4684     {
4685       fprintf (dump_file, "Candidate costs:\n");
4686       fprintf (dump_file, "  cand\tcost\n");
4687     }
4688
4689   for (i = 0; i < n_iv_cands (data); i++)
4690     {
4691       struct iv_cand *cand = iv_cand (data, i);
4692
4693       determine_iv_cost (data, cand);
4694
4695       if (dump_file && (dump_flags & TDF_DETAILS))
4696         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4697     }
4698
4699   if (dump_file && (dump_flags & TDF_DETAILS))
4700     fprintf (dump_file, "\n");
4701 }
4702
4703 /* Calculates cost for having SIZE induction variables.  */
4704
4705 static unsigned
4706 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4707 {
4708   /* We add size to the cost, so that we prefer eliminating ivs
4709      if possible.  */
4710   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4711                                             data->body_includes_call);
4712 }
4713
4714 /* For each size of the induction variable set determine the penalty.  */
4715
4716 static void
4717 determine_set_costs (struct ivopts_data *data)
4718 {
4719   unsigned j, n;
4720   gimple phi;
4721   gimple_stmt_iterator psi;
4722   tree op;
4723   struct loop *loop = data->current_loop;
4724   bitmap_iterator bi;
4725
4726   if (dump_file && (dump_flags & TDF_DETAILS))
4727     {
4728       fprintf (dump_file, "Global costs:\n");
4729       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4730       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
4731       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4732       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4733     }
4734
4735   n = 0;
4736   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4737     {
4738       phi = gsi_stmt (psi);
4739       op = PHI_RESULT (phi);
4740
4741       if (!is_gimple_reg (op))
4742         continue;
4743
4744       if (get_iv (data, op))
4745         continue;
4746
4747       n++;
4748     }
4749
4750   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4751     {
4752       struct version_info *info = ver_info (data, j);
4753
4754       if (info->inv_id && info->has_nonlin_use)
4755         n++;
4756     }
4757
4758   data->regs_used = n;
4759   if (dump_file && (dump_flags & TDF_DETAILS))
4760     fprintf (dump_file, "  regs_used %d\n", n);
4761
4762   if (dump_file && (dump_flags & TDF_DETAILS))
4763     {
4764       fprintf (dump_file, "  cost for size:\n");
4765       fprintf (dump_file, "  ivs\tcost\n");
4766       for (j = 0; j <= 2 * target_avail_regs; j++)
4767         fprintf (dump_file, "  %d\t%d\n", j,
4768                  ivopts_global_cost_for_size (data, j));
4769       fprintf (dump_file, "\n");
4770     }
4771 }
4772
4773 /* Returns true if A is a cheaper cost pair than B.  */
4774
4775 static bool
4776 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4777 {
4778   int cmp;
4779
4780   if (!a)
4781     return false;
4782
4783   if (!b)
4784     return true;
4785
4786   cmp = compare_costs (a->cost, b->cost);
4787   if (cmp < 0)
4788     return true;
4789
4790   if (cmp > 0)
4791     return false;
4792
4793   /* In case the costs are the same, prefer the cheaper candidate.  */
4794   if (a->cand->cost < b->cand->cost)
4795     return true;
4796
4797   return false;
4798 }
4799
4800
4801 /* Returns candidate by that USE is expressed in IVS.  */
4802
4803 static struct cost_pair *
4804 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4805 {
4806   return ivs->cand_for_use[use->id];
4807 }
4808
4809
4810 /* Returns the number of temps needed for new loop invariant
4811    expressions.  */
4812
4813 static int
4814 iv_ca_get_num_inv_exprs (struct ivopts_data *data, struct iv_ca *ivs)
4815 {
4816   unsigned i, n = 0;
4817   unsigned *used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
4818
4819   for (i = 0; i < ivs->upto; i++)
4820     {
4821       struct iv_use *use = iv_use (data, i);
4822       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
4823       if (cp && cp->inv_expr_id != -1)
4824         {
4825           used_inv_expr[cp->inv_expr_id]++;
4826           if (used_inv_expr[cp->inv_expr_id] == 1)
4827             n++;
4828         }
4829     }
4830
4831   free (used_inv_expr);
4832   return n;
4833 }
4834
4835 /* Computes the cost field of IVS structure.  */
4836
4837 static void
4838 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4839 {
4840   unsigned n_inv_exprs = 0;
4841   comp_cost cost = ivs->cand_use_cost;
4842
4843   cost.cost += ivs->cand_cost;
4844
4845   n_inv_exprs = iv_ca_get_num_inv_exprs (data, ivs);
4846   cost.cost += ivopts_global_cost_for_size (data,
4847                                             ivs->n_regs + n_inv_exprs);
4848
4849   ivs->cost = cost;
4850 }
4851
4852 /* Remove invariants in set INVS to set IVS.  */
4853
4854 static void
4855 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4856 {
4857   bitmap_iterator bi;
4858   unsigned iid;
4859
4860   if (!invs)
4861     return;
4862
4863   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4864     {
4865       ivs->n_invariant_uses[iid]--;
4866       if (ivs->n_invariant_uses[iid] == 0)
4867         ivs->n_regs--;
4868     }
4869 }
4870
4871 /* Set USE not to be expressed by any candidate in IVS.  */
4872
4873 static void
4874 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4875                  struct iv_use *use)
4876 {
4877   unsigned uid = use->id, cid;
4878   struct cost_pair *cp;
4879
4880   cp = ivs->cand_for_use[uid];
4881   if (!cp)
4882     return;
4883   cid = cp->cand->id;
4884
4885   ivs->bad_uses++;
4886   ivs->cand_for_use[uid] = NULL;
4887   ivs->n_cand_uses[cid]--;
4888
4889   if (ivs->n_cand_uses[cid] == 0)
4890     {
4891       bitmap_clear_bit (ivs->cands, cid);
4892       /* Do not count the pseudocandidates.  */
4893       if (cp->cand->iv)
4894         ivs->n_regs--;
4895       ivs->n_cands--;
4896       ivs->cand_cost -= cp->cand->cost;
4897
4898       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4899     }
4900
4901   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4902
4903   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4904   iv_ca_recount_cost (data, ivs);
4905 }
4906
4907 /* Add invariants in set INVS to set IVS.  */
4908
4909 static void
4910 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4911 {
4912   bitmap_iterator bi;
4913   unsigned iid;
4914
4915   if (!invs)
4916     return;
4917
4918   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4919     {
4920       ivs->n_invariant_uses[iid]++;
4921       if (ivs->n_invariant_uses[iid] == 1)
4922         ivs->n_regs++;
4923     }
4924 }
4925
4926 /* Set cost pair for USE in set IVS to CP.  */
4927
4928 static void
4929 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4930               struct iv_use *use, struct cost_pair *cp)
4931 {
4932   unsigned uid = use->id, cid;
4933
4934   if (ivs->cand_for_use[uid] == cp)
4935     return;
4936
4937   if (ivs->cand_for_use[uid])
4938     iv_ca_set_no_cp (data, ivs, use);
4939
4940   if (cp)
4941     {
4942       cid = cp->cand->id;
4943
4944       ivs->bad_uses--;
4945       ivs->cand_for_use[uid] = cp;
4946       ivs->n_cand_uses[cid]++;
4947       if (ivs->n_cand_uses[cid] == 1)
4948         {
4949           bitmap_set_bit (ivs->cands, cid);
4950           /* Do not count the pseudocandidates.  */
4951           if (cp->cand->iv)
4952             ivs->n_regs++;
4953           ivs->n_cands++;
4954           ivs->cand_cost += cp->cand->cost;
4955
4956           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4957         }
4958
4959       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4960       iv_ca_set_add_invariants (ivs, cp->depends_on);
4961       iv_ca_recount_cost (data, ivs);
4962     }
4963 }
4964
4965 /* Extend set IVS by expressing USE by some of the candidates in it
4966    if possible. All important candidates will be considered
4967    if IMPORTANT_CANDIDATES is true.  */
4968
4969 static void
4970 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4971                struct iv_use *use, bool important_candidates)
4972 {
4973   struct cost_pair *best_cp = NULL, *cp;
4974   bitmap_iterator bi;
4975   bitmap cands;
4976   unsigned i;
4977
4978   gcc_assert (ivs->upto >= use->id);
4979
4980   if (ivs->upto == use->id)
4981     {
4982       ivs->upto++;
4983       ivs->bad_uses++;
4984     }
4985
4986   cands = (important_candidates ? data->important_candidates : ivs->cands);
4987   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
4988     {
4989       struct iv_cand *cand = iv_cand (data, i);
4990
4991       cp = get_use_iv_cost (data, use, cand);
4992
4993       if (cheaper_cost_pair (cp, best_cp))
4994         best_cp = cp;
4995     }
4996
4997   iv_ca_set_cp (data, ivs, use, best_cp);
4998 }
4999
5000 /* Get cost for assignment IVS.  */
5001
5002 static comp_cost
5003 iv_ca_cost (struct iv_ca *ivs)
5004 {
5005   /* This was a conditional expression but it triggered a bug in
5006      Sun C 5.5.  */
5007   if (ivs->bad_uses)
5008     return infinite_cost;
5009   else
5010     return ivs->cost;
5011 }
5012
5013 /* Returns true if all dependences of CP are among invariants in IVS.  */
5014
5015 static bool
5016 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5017 {
5018   unsigned i;
5019   bitmap_iterator bi;
5020
5021   if (!cp->depends_on)
5022     return true;
5023
5024   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5025     {
5026       if (ivs->n_invariant_uses[i] == 0)
5027         return false;
5028     }
5029
5030   return true;
5031 }
5032
5033 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5034    it before NEXT_CHANGE.  */
5035
5036 static struct iv_ca_delta *
5037 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5038                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5039 {
5040   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5041
5042   change->use = use;
5043   change->old_cp = old_cp;
5044   change->new_cp = new_cp;
5045   change->next_change = next_change;
5046
5047   return change;
5048 }
5049
5050 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5051    are rewritten.  */
5052
5053 static struct iv_ca_delta *
5054 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5055 {
5056   struct iv_ca_delta *last;
5057
5058   if (!l2)
5059     return l1;
5060
5061   if (!l1)
5062     return l2;
5063
5064   for (last = l1; last->next_change; last = last->next_change)
5065     continue;
5066   last->next_change = l2;
5067
5068   return l1;
5069 }
5070
5071 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5072
5073 static struct iv_ca_delta *
5074 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5075 {
5076   struct iv_ca_delta *act, *next, *prev = NULL;
5077   struct cost_pair *tmp;
5078
5079   for (act = delta; act; act = next)
5080     {
5081       next = act->next_change;
5082       act->next_change = prev;
5083       prev = act;
5084
5085       tmp = act->old_cp;
5086       act->old_cp = act->new_cp;
5087       act->new_cp = tmp;
5088     }
5089
5090   return prev;
5091 }
5092
5093 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5094    reverted instead.  */
5095
5096 static void
5097 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5098                     struct iv_ca_delta *delta, bool forward)
5099 {
5100   struct cost_pair *from, *to;
5101   struct iv_ca_delta *act;
5102
5103   if (!forward)
5104     delta = iv_ca_delta_reverse (delta);
5105
5106   for (act = delta; act; act = act->next_change)
5107     {
5108       from = act->old_cp;
5109       to = act->new_cp;
5110       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5111       iv_ca_set_cp (data, ivs, act->use, to);
5112     }
5113
5114   if (!forward)
5115     iv_ca_delta_reverse (delta);
5116 }
5117
5118 /* Returns true if CAND is used in IVS.  */
5119
5120 static bool
5121 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5122 {
5123   return ivs->n_cand_uses[cand->id] > 0;
5124 }
5125
5126 /* Returns number of induction variable candidates in the set IVS.  */
5127
5128 static unsigned
5129 iv_ca_n_cands (struct iv_ca *ivs)
5130 {
5131   return ivs->n_cands;
5132 }
5133
5134 /* Free the list of changes DELTA.  */
5135
5136 static void
5137 iv_ca_delta_free (struct iv_ca_delta **delta)
5138 {
5139   struct iv_ca_delta *act, *next;
5140
5141   for (act = *delta; act; act = next)
5142     {
5143       next = act->next_change;
5144       free (act);
5145     }
5146
5147   *delta = NULL;
5148 }
5149
5150 /* Allocates new iv candidates assignment.  */
5151
5152 static struct iv_ca *
5153 iv_ca_new (struct ivopts_data *data)
5154 {
5155   struct iv_ca *nw = XNEW (struct iv_ca);
5156
5157   nw->upto = 0;
5158   nw->bad_uses = 0;
5159   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5160   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5161   nw->cands = BITMAP_ALLOC (NULL);
5162   nw->n_cands = 0;
5163   nw->n_regs = 0;
5164   nw->cand_use_cost = zero_cost;
5165   nw->cand_cost = 0;
5166   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5167   nw->cost = zero_cost;
5168
5169   return nw;
5170 }
5171
5172 /* Free memory occupied by the set IVS.  */
5173
5174 static void
5175 iv_ca_free (struct iv_ca **ivs)
5176 {
5177   free ((*ivs)->cand_for_use);
5178   free ((*ivs)->n_cand_uses);
5179   BITMAP_FREE ((*ivs)->cands);
5180   free ((*ivs)->n_invariant_uses);
5181   free (*ivs);
5182   *ivs = NULL;
5183 }
5184
5185 /* Dumps IVS to FILE.  */
5186
5187 static void
5188 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5189 {
5190   const char *pref = "  invariants ";
5191   unsigned i;
5192   comp_cost cost = iv_ca_cost (ivs);
5193
5194   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5195   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5196            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5197   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5198
5199    for (i = 0; i < ivs->upto; i++)
5200     {
5201       struct iv_use *use = iv_use (data, i);
5202       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5203       if (cp)
5204         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5205                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5206       else
5207         fprintf (file, "   use:%d --> ??\n", use->id);
5208     }
5209
5210   for (i = 1; i <= data->max_inv_id; i++)
5211     if (ivs->n_invariant_uses[i])
5212       {
5213         fprintf (file, "%s%d", pref, i);
5214         pref = ", ";
5215       }
5216   fprintf (file, "\n\n");
5217 }
5218
5219 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5220    new set, and store differences in DELTA.  Number of induction variables
5221    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5222    the function will try to find a solution with mimimal iv candidates.  */
5223
5224 static comp_cost
5225 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5226               struct iv_cand *cand, struct iv_ca_delta **delta,
5227               unsigned *n_ivs, bool min_ncand)
5228 {
5229   unsigned i;
5230   comp_cost cost;
5231   struct iv_use *use;
5232   struct cost_pair *old_cp, *new_cp;
5233
5234   *delta = NULL;
5235   for (i = 0; i < ivs->upto; i++)
5236     {
5237       use = iv_use (data, i);
5238       old_cp = iv_ca_cand_for_use (ivs, use);
5239
5240       if (old_cp
5241           && old_cp->cand == cand)
5242         continue;
5243
5244       new_cp = get_use_iv_cost (data, use, cand);
5245       if (!new_cp)
5246         continue;
5247
5248       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5249         continue;
5250
5251       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5252         continue;
5253
5254       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5255     }
5256
5257   iv_ca_delta_commit (data, ivs, *delta, true);
5258   cost = iv_ca_cost (ivs);
5259   if (n_ivs)
5260     *n_ivs = iv_ca_n_cands (ivs);
5261   iv_ca_delta_commit (data, ivs, *delta, false);
5262
5263   return cost;
5264 }
5265
5266 /* Try narrowing set IVS by removing CAND.  Return the cost of
5267    the new set and store the differences in DELTA.  */
5268
5269 static comp_cost
5270 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5271               struct iv_cand *cand, struct iv_ca_delta **delta)
5272 {
5273   unsigned i, ci;
5274   struct iv_use *use;
5275   struct cost_pair *old_cp, *new_cp, *cp;
5276   bitmap_iterator bi;
5277   struct iv_cand *cnd;
5278   comp_cost cost;
5279
5280   *delta = NULL;
5281   for (i = 0; i < n_iv_uses (data); i++)
5282     {
5283       use = iv_use (data, i);
5284
5285       old_cp = iv_ca_cand_for_use (ivs, use);
5286       if (old_cp->cand != cand)
5287         continue;
5288
5289       new_cp = NULL;
5290
5291       if (data->consider_all_candidates)
5292         {
5293           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5294             {
5295               if (ci == cand->id)
5296                 continue;
5297
5298               cnd = iv_cand (data, ci);
5299
5300               cp = get_use_iv_cost (data, use, cnd);
5301               if (!cp)
5302                 continue;
5303
5304               if (!iv_ca_has_deps (ivs, cp))
5305                 continue;
5306
5307               if (!cheaper_cost_pair (cp, new_cp))
5308                 continue;
5309
5310               new_cp = cp;
5311             }
5312         }
5313       else
5314         {
5315           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5316             {
5317               if (ci == cand->id)
5318                 continue;
5319
5320               cnd = iv_cand (data, ci);
5321
5322               cp = get_use_iv_cost (data, use, cnd);
5323               if (!cp)
5324                 continue;
5325               if (!iv_ca_has_deps (ivs, cp))
5326                 continue;
5327
5328               if (!cheaper_cost_pair (cp, new_cp))
5329                 continue;
5330
5331               new_cp = cp;
5332             }
5333         }
5334
5335       if (!new_cp)
5336         {
5337           iv_ca_delta_free (delta);
5338           return infinite_cost;
5339         }
5340
5341       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5342     }
5343
5344   iv_ca_delta_commit (data, ivs, *delta, true);
5345   cost = iv_ca_cost (ivs);
5346   iv_ca_delta_commit (data, ivs, *delta, false);
5347
5348   return cost;
5349 }
5350
5351 /* Try optimizing the set of candidates IVS by removing candidates different
5352    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5353    differences in DELTA.  */
5354
5355 static comp_cost
5356 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5357              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5358 {
5359   bitmap_iterator bi;
5360   struct iv_ca_delta *act_delta, *best_delta;
5361   unsigned i;
5362   comp_cost best_cost, acost;
5363   struct iv_cand *cand;
5364
5365   best_delta = NULL;
5366   best_cost = iv_ca_cost (ivs);
5367
5368   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5369     {
5370       cand = iv_cand (data, i);
5371
5372       if (cand == except_cand)
5373         continue;
5374
5375       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5376
5377       if (compare_costs (acost, best_cost) < 0)
5378         {
5379           best_cost = acost;
5380           iv_ca_delta_free (&best_delta);
5381           best_delta = act_delta;
5382         }
5383       else
5384         iv_ca_delta_free (&act_delta);
5385     }
5386
5387   if (!best_delta)
5388     {
5389       *delta = NULL;
5390       return best_cost;
5391     }
5392
5393   /* Recurse to possibly remove other unnecessary ivs.  */
5394   iv_ca_delta_commit (data, ivs, best_delta, true);
5395   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5396   iv_ca_delta_commit (data, ivs, best_delta, false);
5397   *delta = iv_ca_delta_join (best_delta, *delta);
5398   return best_cost;
5399 }
5400
5401 /* Tries to extend the sets IVS in the best possible way in order
5402    to express the USE.  If ORIGINALP is true, prefer candidates from
5403    the original set of IVs, otherwise favor important candidates not
5404    based on any memory object.  */
5405
5406 static bool
5407 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5408                   struct iv_use *use, bool originalp)
5409 {
5410   comp_cost best_cost, act_cost;
5411   unsigned i;
5412   bitmap_iterator bi;
5413   struct iv_cand *cand;
5414   struct iv_ca_delta *best_delta = NULL, *act_delta;
5415   struct cost_pair *cp;
5416
5417   iv_ca_add_use (data, ivs, use, false);
5418   best_cost = iv_ca_cost (ivs);
5419
5420   cp = iv_ca_cand_for_use (ivs, use);
5421   if (!cp)
5422     {
5423       ivs->upto--;
5424       ivs->bad_uses--;
5425       iv_ca_add_use (data, ivs, use, true);
5426       best_cost = iv_ca_cost (ivs);
5427       cp = iv_ca_cand_for_use (ivs, use);
5428     }
5429   if (cp)
5430     {
5431       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5432       iv_ca_set_no_cp (data, ivs, use);
5433     }
5434
5435   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5436      first try important candidates not based on any memory object.  Only if
5437      this fails, try the specific ones.  Rationale -- in loops with many
5438      variables the best choice often is to use just one generic biv.  If we
5439      added here many ivs specific to the uses, the optimization algorithm later
5440      would be likely to get stuck in a local minimum, thus causing us to create
5441      too many ivs.  The approach from few ivs to more seems more likely to be
5442      successful -- starting from few ivs, replacing an expensive use by a
5443      specific iv should always be a win.  */
5444   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5445     {
5446       cand = iv_cand (data, i);
5447
5448       if (originalp && cand->pos !=IP_ORIGINAL)
5449         continue;
5450
5451       if (!originalp && cand->iv->base_object != NULL_TREE)
5452         continue;
5453
5454       if (iv_ca_cand_used_p (ivs, cand))
5455         continue;
5456
5457       cp = get_use_iv_cost (data, use, cand);
5458       if (!cp)
5459         continue;
5460
5461       iv_ca_set_cp (data, ivs, use, cp);
5462       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5463                                true);
5464       iv_ca_set_no_cp (data, ivs, use);
5465       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5466
5467       if (compare_costs (act_cost, best_cost) < 0)
5468         {
5469           best_cost = act_cost;
5470
5471           iv_ca_delta_free (&best_delta);
5472           best_delta = act_delta;
5473         }
5474       else
5475         iv_ca_delta_free (&act_delta);
5476     }
5477
5478   if (infinite_cost_p (best_cost))
5479     {
5480       for (i = 0; i < use->n_map_members; i++)
5481         {
5482           cp = use->cost_map + i;
5483           cand = cp->cand;
5484           if (!cand)
5485             continue;
5486
5487           /* Already tried this.  */
5488           if (cand->important)
5489             {
5490               if (originalp && cand->pos == IP_ORIGINAL)
5491                 continue;
5492               if (!originalp && cand->iv->base_object == NULL_TREE)
5493                 continue;
5494             }
5495
5496           if (iv_ca_cand_used_p (ivs, cand))
5497             continue;
5498
5499           act_delta = NULL;
5500           iv_ca_set_cp (data, ivs, use, cp);
5501           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5502           iv_ca_set_no_cp (data, ivs, use);
5503           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5504                                        cp, act_delta);
5505
5506           if (compare_costs (act_cost, best_cost) < 0)
5507             {
5508               best_cost = act_cost;
5509
5510               if (best_delta)
5511                 iv_ca_delta_free (&best_delta);
5512               best_delta = act_delta;
5513             }
5514           else
5515             iv_ca_delta_free (&act_delta);
5516         }
5517     }
5518
5519   iv_ca_delta_commit (data, ivs, best_delta, true);
5520   iv_ca_delta_free (&best_delta);
5521
5522   return !infinite_cost_p (best_cost);
5523 }
5524
5525 /* Finds an initial assignment of candidates to uses.  */
5526
5527 static struct iv_ca *
5528 get_initial_solution (struct ivopts_data *data, bool originalp)
5529 {
5530   struct iv_ca *ivs = iv_ca_new (data);
5531   unsigned i;
5532
5533   for (i = 0; i < n_iv_uses (data); i++)
5534     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5535       {
5536         iv_ca_free (&ivs);
5537         return NULL;
5538       }
5539
5540   return ivs;
5541 }
5542
5543 /* Tries to improve set of induction variables IVS.  */
5544
5545 static bool
5546 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5547 {
5548   unsigned i, n_ivs;
5549   comp_cost acost, best_cost = iv_ca_cost (ivs);
5550   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5551   struct iv_cand *cand;
5552
5553   /* Try extending the set of induction variables by one.  */
5554   for (i = 0; i < n_iv_cands (data); i++)
5555     {
5556       cand = iv_cand (data, i);
5557
5558       if (iv_ca_cand_used_p (ivs, cand))
5559         continue;
5560
5561       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5562       if (!act_delta)
5563         continue;
5564
5565       /* If we successfully added the candidate and the set is small enough,
5566          try optimizing it by removing other candidates.  */
5567       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5568         {
5569           iv_ca_delta_commit (data, ivs, act_delta, true);
5570           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5571           iv_ca_delta_commit (data, ivs, act_delta, false);
5572           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5573         }
5574
5575       if (compare_costs (acost, best_cost) < 0)
5576         {
5577           best_cost = acost;
5578           iv_ca_delta_free (&best_delta);
5579           best_delta = act_delta;
5580         }
5581       else
5582         iv_ca_delta_free (&act_delta);
5583     }
5584
5585   if (!best_delta)
5586     {
5587       /* Try removing the candidates from the set instead.  */
5588       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5589
5590       /* Nothing more we can do.  */
5591       if (!best_delta)
5592         return false;
5593     }
5594
5595   iv_ca_delta_commit (data, ivs, best_delta, true);
5596   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5597   iv_ca_delta_free (&best_delta);
5598   return true;
5599 }
5600
5601 /* Attempts to find the optimal set of induction variables.  We do simple
5602    greedy heuristic -- we try to replace at most one candidate in the selected
5603    solution and remove the unused ivs while this improves the cost.  */
5604
5605 static struct iv_ca *
5606 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5607 {
5608   struct iv_ca *set;
5609
5610   /* Get the initial solution.  */
5611   set = get_initial_solution (data, originalp);
5612   if (!set)
5613     {
5614       if (dump_file && (dump_flags & TDF_DETAILS))
5615         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5616       return NULL;
5617     }
5618
5619   if (dump_file && (dump_flags & TDF_DETAILS))
5620     {
5621       fprintf (dump_file, "Initial set of candidates:\n");
5622       iv_ca_dump (data, dump_file, set);
5623     }
5624
5625   while (try_improve_iv_set (data, set))
5626     {
5627       if (dump_file && (dump_flags & TDF_DETAILS))
5628         {
5629           fprintf (dump_file, "Improved to:\n");
5630           iv_ca_dump (data, dump_file, set);
5631         }
5632     }
5633
5634   return set;
5635 }
5636
5637 static struct iv_ca *
5638 find_optimal_iv_set (struct ivopts_data *data)
5639 {
5640   unsigned i;
5641   struct iv_ca *set, *origset;
5642   struct iv_use *use;
5643   comp_cost cost, origcost;
5644
5645   /* Determine the cost based on a strategy that starts with original IVs,
5646      and try again using a strategy that prefers candidates not based
5647      on any IVs.  */
5648   origset = find_optimal_iv_set_1 (data, true);
5649   set = find_optimal_iv_set_1 (data, false);
5650
5651   if (!origset && !set)
5652     return NULL;
5653
5654   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5655   cost = set ? iv_ca_cost (set) : infinite_cost;
5656
5657   if (dump_file && (dump_flags & TDF_DETAILS))
5658     {
5659       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5660                origcost.cost, origcost.complexity);
5661       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5662                cost.cost, cost.complexity);
5663     }
5664
5665   /* Choose the one with the best cost.  */
5666   if (compare_costs (origcost, cost) <= 0)
5667     {
5668       if (set)
5669         iv_ca_free (&set);
5670       set = origset;
5671     }
5672   else if (origset)
5673     iv_ca_free (&origset);
5674
5675   for (i = 0; i < n_iv_uses (data); i++)
5676     {
5677       use = iv_use (data, i);
5678       use->selected = iv_ca_cand_for_use (set, use)->cand;
5679     }
5680
5681   return set;
5682 }
5683
5684 /* Creates a new induction variable corresponding to CAND.  */
5685
5686 static void
5687 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5688 {
5689   gimple_stmt_iterator incr_pos;
5690   tree base;
5691   bool after = false;
5692
5693   if (!cand->iv)
5694     return;
5695
5696   switch (cand->pos)
5697     {
5698     case IP_NORMAL:
5699       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5700       break;
5701
5702     case IP_END:
5703       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5704       after = true;
5705       break;
5706
5707     case IP_AFTER_USE:
5708       after = true;
5709       /* fall through */
5710     case IP_BEFORE_USE:
5711       incr_pos = gsi_for_stmt (cand->incremented_at);
5712       break;
5713
5714     case IP_ORIGINAL:
5715       /* Mark that the iv is preserved.  */
5716       name_info (data, cand->var_before)->preserve_biv = true;
5717       name_info (data, cand->var_after)->preserve_biv = true;
5718
5719       /* Rewrite the increment so that it uses var_before directly.  */
5720       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5721       return;
5722     }
5723
5724   gimple_add_tmp_var (cand->var_before);
5725   add_referenced_var (cand->var_before);
5726
5727   base = unshare_expr (cand->iv->base);
5728
5729   create_iv (base, unshare_expr (cand->iv->step),
5730              cand->var_before, data->current_loop,
5731              &incr_pos, after, &cand->var_before, &cand->var_after);
5732 }
5733
5734 /* Creates new induction variables described in SET.  */
5735
5736 static void
5737 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5738 {
5739   unsigned i;
5740   struct iv_cand *cand;
5741   bitmap_iterator bi;
5742
5743   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5744     {
5745       cand = iv_cand (data, i);
5746       create_new_iv (data, cand);
5747     }
5748
5749   if (dump_file && (dump_flags & TDF_DETAILS))
5750     {
5751       fprintf (dump_file, "\nSelected IV set: \n");
5752       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5753         {
5754           cand = iv_cand (data, i);
5755           dump_cand (dump_file, cand);
5756         }
5757       fprintf (dump_file, "\n");
5758     }
5759 }
5760
5761 /* Rewrites USE (definition of iv used in a nonlinear expression)
5762    using candidate CAND.  */
5763
5764 static void
5765 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5766                             struct iv_use *use, struct iv_cand *cand)
5767 {
5768   tree comp;
5769   tree op, tgt;
5770   gimple ass;
5771   gimple_stmt_iterator bsi;
5772
5773   /* An important special case -- if we are asked to express value of
5774      the original iv by itself, just exit; there is no need to
5775      introduce a new computation (that might also need casting the
5776      variable to unsigned and back).  */
5777   if (cand->pos == IP_ORIGINAL
5778       && cand->incremented_at == use->stmt)
5779     {
5780       tree step, ctype, utype;
5781       enum tree_code incr_code = PLUS_EXPR, old_code;
5782
5783       gcc_assert (is_gimple_assign (use->stmt));
5784       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5785
5786       step = cand->iv->step;
5787       ctype = TREE_TYPE (step);
5788       utype = TREE_TYPE (cand->var_after);
5789       if (TREE_CODE (step) == NEGATE_EXPR)
5790         {
5791           incr_code = MINUS_EXPR;
5792           step = TREE_OPERAND (step, 0);
5793         }
5794
5795       /* Check whether we may leave the computation unchanged.
5796          This is the case only if it does not rely on other
5797          computations in the loop -- otherwise, the computation
5798          we rely upon may be removed in remove_unused_ivs,
5799          thus leading to ICE.  */
5800       old_code = gimple_assign_rhs_code (use->stmt);
5801       if (old_code == PLUS_EXPR
5802           || old_code == MINUS_EXPR
5803           || old_code == POINTER_PLUS_EXPR)
5804         {
5805           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5806             op = gimple_assign_rhs2 (use->stmt);
5807           else if (old_code != MINUS_EXPR
5808                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5809             op = gimple_assign_rhs1 (use->stmt);
5810           else
5811             op = NULL_TREE;
5812         }
5813       else
5814         op = NULL_TREE;
5815
5816       if (op
5817           && (TREE_CODE (op) == INTEGER_CST
5818               || operand_equal_p (op, step, 0)))
5819         return;
5820
5821       /* Otherwise, add the necessary computations to express
5822          the iv.  */
5823       op = fold_convert (ctype, cand->var_before);
5824       comp = fold_convert (utype,
5825                            build2 (incr_code, ctype, op,
5826                                    unshare_expr (step)));
5827     }
5828   else
5829     {
5830       comp = get_computation (data->current_loop, use, cand);
5831       gcc_assert (comp != NULL_TREE);
5832     }
5833
5834   switch (gimple_code (use->stmt))
5835     {
5836     case GIMPLE_PHI:
5837       tgt = PHI_RESULT (use->stmt);
5838
5839       /* If we should keep the biv, do not replace it.  */
5840       if (name_info (data, tgt)->preserve_biv)
5841         return;
5842
5843       bsi = gsi_after_labels (gimple_bb (use->stmt));
5844       break;
5845
5846     case GIMPLE_ASSIGN:
5847       tgt = gimple_assign_lhs (use->stmt);
5848       bsi = gsi_for_stmt (use->stmt);
5849       break;
5850
5851     default:
5852       gcc_unreachable ();
5853     }
5854
5855   if (!valid_gimple_rhs_p (comp)
5856       || (gimple_code (use->stmt) != GIMPLE_PHI
5857           /* We can't allow re-allocating the stmt as it might be pointed
5858              to still.  */
5859           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5860               >= gimple_num_ops (gsi_stmt (bsi)))))
5861     {
5862       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5863                                        true, GSI_SAME_STMT);
5864       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5865         duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5866     }
5867
5868   if (gimple_code (use->stmt) == GIMPLE_PHI)
5869     {
5870       ass = gimple_build_assign (tgt, comp);
5871       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5872
5873       bsi = gsi_for_stmt (use->stmt);
5874       remove_phi_node (&bsi, false);
5875     }
5876   else
5877     {
5878       gimple_assign_set_rhs_from_tree (&bsi, comp);
5879       use->stmt = gsi_stmt (bsi);
5880     }
5881 }
5882
5883 /* Copies the reference information from OLD_REF to NEW_REF.  */
5884
5885 static void
5886 copy_ref_info (tree new_ref, tree old_ref)
5887 {
5888   tree new_ptr_base = NULL_TREE;
5889
5890   TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5891   TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5892
5893   if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5894     new_ptr_base = TMR_BASE (new_ref);
5895   else if (TREE_CODE (new_ref) == MEM_REF)
5896     new_ptr_base = TREE_OPERAND (new_ref, 0);
5897
5898   /* We can transfer points-to information from an old pointer
5899      or decl base to the new one.  */
5900   if (new_ptr_base
5901       && TREE_CODE (new_ptr_base) == SSA_NAME
5902       && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5903       && !SSA_NAME_PTR_INFO (new_ptr_base))
5904     {
5905       tree base = get_base_address (old_ref);
5906       if (!base)
5907         ;
5908       else if ((INDIRECT_REF_P (base)
5909                 || TREE_CODE (base) == MEM_REF)
5910                && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5911         duplicate_ssa_name_ptr_info
5912           (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5913       else if (TREE_CODE (base) == VAR_DECL
5914                || TREE_CODE (base) == PARM_DECL
5915                || TREE_CODE (base) == RESULT_DECL)
5916         {
5917           struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5918           pt_solution_set_var (&pi->pt, base);
5919         }
5920     }
5921 }
5922
5923 /* Performs a peephole optimization to reorder the iv update statement with
5924    a mem ref to enable instruction combining in later phases. The mem ref uses
5925    the iv value before the update, so the reordering transformation requires
5926    adjustment of the offset. CAND is the selected IV_CAND.
5927
5928    Example:
5929
5930    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
5931    iv2 = iv1 + 1;
5932
5933    if (t < val)      (1)
5934      goto L;
5935    goto Head;
5936
5937
5938    directly propagating t over to (1) will introduce overlapping live range
5939    thus increase register pressure. This peephole transform it into:
5940
5941
5942    iv2 = iv1 + 1;
5943    t = MEM_REF (base, iv2, 8, 8);
5944    if (t < val)
5945      goto L;
5946    goto Head;
5947 */
5948
5949 static void
5950 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5951 {
5952   tree var_after;
5953   gimple iv_update, stmt;
5954   basic_block bb;
5955   gimple_stmt_iterator gsi, gsi_iv;
5956
5957   if (cand->pos != IP_NORMAL)
5958     return;
5959
5960   var_after = cand->var_after;
5961   iv_update = SSA_NAME_DEF_STMT (var_after);
5962
5963   bb = gimple_bb (iv_update);
5964   gsi = gsi_last_nondebug_bb (bb);
5965   stmt = gsi_stmt (gsi);
5966
5967   /* Only handle conditional statement for now.  */
5968   if (gimple_code (stmt) != GIMPLE_COND)
5969     return;
5970
5971   gsi_prev_nondebug (&gsi);
5972   stmt = gsi_stmt (gsi);
5973   if (stmt != iv_update)
5974     return;
5975
5976   gsi_prev_nondebug (&gsi);
5977   if (gsi_end_p (gsi))
5978     return;
5979
5980   stmt = gsi_stmt (gsi);
5981   if (gimple_code (stmt) != GIMPLE_ASSIGN)
5982     return;
5983
5984   if (stmt != use->stmt)
5985     return;
5986
5987   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5988     return;
5989
5990   if (dump_file && (dump_flags & TDF_DETAILS))
5991     {
5992       fprintf (dump_file, "Reordering \n");
5993       print_gimple_stmt (dump_file, iv_update, 0, 0);
5994       print_gimple_stmt (dump_file, use->stmt, 0, 0);
5995       fprintf (dump_file, "\n");
5996     }
5997
5998   gsi = gsi_for_stmt (use->stmt);
5999   gsi_iv = gsi_for_stmt (iv_update);
6000   gsi_move_before (&gsi_iv, &gsi);
6001
6002   cand->pos = IP_BEFORE_USE;
6003   cand->incremented_at = use->stmt;
6004 }
6005
6006 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6007
6008 static void
6009 rewrite_use_address (struct ivopts_data *data,
6010                      struct iv_use *use, struct iv_cand *cand)
6011 {
6012   aff_tree aff;
6013   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6014   tree base_hint = NULL_TREE;
6015   tree ref, iv;
6016   bool ok;
6017
6018   adjust_iv_update_pos (cand, use);
6019   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6020   gcc_assert (ok);
6021   unshare_aff_combination (&aff);
6022
6023   /* To avoid undefined overflow problems, all IV candidates use unsigned
6024      integer types.  The drawback is that this makes it impossible for
6025      create_mem_ref to distinguish an IV that is based on a memory object
6026      from one that represents simply an offset.
6027
6028      To work around this problem, we pass a hint to create_mem_ref that
6029      indicates which variable (if any) in aff is an IV based on a memory
6030      object.  Note that we only consider the candidate.  If this is not
6031      based on an object, the base of the reference is in some subexpression
6032      of the use -- but these will use pointer types, so they are recognized
6033      by the create_mem_ref heuristics anyway.  */
6034   if (cand->iv->base_object)
6035     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6036
6037   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6038   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6039                         reference_alias_ptr_type (*use->op_p),
6040                         iv, base_hint, data->speed);
6041   copy_ref_info (ref, *use->op_p);
6042   *use->op_p = ref;
6043 }
6044
6045 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6046    candidate CAND.  */
6047
6048 static void
6049 rewrite_use_compare (struct ivopts_data *data,
6050                      struct iv_use *use, struct iv_cand *cand)
6051 {
6052   tree comp, *var_p, op, bound;
6053   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6054   enum tree_code compare;
6055   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6056   bool ok;
6057
6058   bound = cp->value;
6059   if (bound)
6060     {
6061       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6062       tree var_type = TREE_TYPE (var);
6063       gimple_seq stmts;
6064
6065       if (dump_file && (dump_flags & TDF_DETAILS))
6066         {
6067           fprintf (dump_file, "Replacing exit test: ");
6068           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6069         }
6070       compare = iv_elimination_compare (data, use);
6071       bound = unshare_expr (fold_convert (var_type, bound));
6072       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6073       if (stmts)
6074         gsi_insert_seq_on_edge_immediate (
6075                 loop_preheader_edge (data->current_loop),
6076                 stmts);
6077
6078       gimple_cond_set_lhs (use->stmt, var);
6079       gimple_cond_set_code (use->stmt, compare);
6080       gimple_cond_set_rhs (use->stmt, op);
6081       return;
6082     }
6083
6084   /* The induction variable elimination failed; just express the original
6085      giv.  */
6086   comp = get_computation (data->current_loop, use, cand);
6087   gcc_assert (comp != NULL_TREE);
6088
6089   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6090   gcc_assert (ok);
6091
6092   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6093                                      true, GSI_SAME_STMT);
6094 }
6095
6096 /* Rewrites USE using candidate CAND.  */
6097
6098 static void
6099 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6100 {
6101   switch (use->type)
6102     {
6103       case USE_NONLINEAR_EXPR:
6104         rewrite_use_nonlinear_expr (data, use, cand);
6105         break;
6106
6107       case USE_ADDRESS:
6108         rewrite_use_address (data, use, cand);
6109         break;
6110
6111       case USE_COMPARE:
6112         rewrite_use_compare (data, use, cand);
6113         break;
6114
6115       default:
6116         gcc_unreachable ();
6117     }
6118
6119   update_stmt (use->stmt);
6120 }
6121
6122 /* Rewrite the uses using the selected induction variables.  */
6123
6124 static void
6125 rewrite_uses (struct ivopts_data *data)
6126 {
6127   unsigned i;
6128   struct iv_cand *cand;
6129   struct iv_use *use;
6130
6131   for (i = 0; i < n_iv_uses (data); i++)
6132     {
6133       use = iv_use (data, i);
6134       cand = use->selected;
6135       gcc_assert (cand);
6136
6137       rewrite_use (data, use, cand);
6138     }
6139 }
6140
6141 /* Removes the ivs that are not used after rewriting.  */
6142
6143 static void
6144 remove_unused_ivs (struct ivopts_data *data)
6145 {
6146   unsigned j;
6147   bitmap_iterator bi;
6148   bitmap toremove = BITMAP_ALLOC (NULL);
6149
6150   /* Figure out an order in which to release SSA DEFs so that we don't
6151      release something that we'd have to propagate into a debug stmt
6152      afterwards.  */
6153   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6154     {
6155       struct version_info *info;
6156
6157       info = ver_info (data, j);
6158       if (info->iv
6159           && !integer_zerop (info->iv->step)
6160           && !info->inv_id
6161           && !info->iv->have_use_for
6162           && !info->preserve_biv)
6163         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6164     }
6165
6166   release_defs_bitset (toremove);
6167
6168   BITMAP_FREE (toremove);
6169 }
6170
6171 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6172    for pointer_map_traverse.  */
6173
6174 static bool
6175 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6176                       void *data ATTRIBUTE_UNUSED)
6177 {
6178   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6179
6180   free (niter);
6181   return true;
6182 }
6183
6184 /* Frees data allocated by the optimization of a single loop.  */
6185
6186 static void
6187 free_loop_data (struct ivopts_data *data)
6188 {
6189   unsigned i, j;
6190   bitmap_iterator bi;
6191   tree obj;
6192
6193   if (data->niters)
6194     {
6195       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6196       pointer_map_destroy (data->niters);
6197       data->niters = NULL;
6198     }
6199
6200   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6201     {
6202       struct version_info *info;
6203
6204       info = ver_info (data, i);
6205       if (info->iv)
6206         free (info->iv);
6207       info->iv = NULL;
6208       info->has_nonlin_use = false;
6209       info->preserve_biv = false;
6210       info->inv_id = 0;
6211     }
6212   bitmap_clear (data->relevant);
6213   bitmap_clear (data->important_candidates);
6214
6215   for (i = 0; i < n_iv_uses (data); i++)
6216     {
6217       struct iv_use *use = iv_use (data, i);
6218
6219       free (use->iv);
6220       BITMAP_FREE (use->related_cands);
6221       for (j = 0; j < use->n_map_members; j++)
6222         if (use->cost_map[j].depends_on)
6223           BITMAP_FREE (use->cost_map[j].depends_on);
6224       free (use->cost_map);
6225       free (use);
6226     }
6227   VEC_truncate (iv_use_p, data->iv_uses, 0);
6228
6229   for (i = 0; i < n_iv_cands (data); i++)
6230     {
6231       struct iv_cand *cand = iv_cand (data, i);
6232
6233       if (cand->iv)
6234         free (cand->iv);
6235       if (cand->depends_on)
6236         BITMAP_FREE (cand->depends_on);
6237       free (cand);
6238     }
6239   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6240
6241   if (data->version_info_size < num_ssa_names)
6242     {
6243       data->version_info_size = 2 * num_ssa_names;
6244       free (data->version_info);
6245       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6246     }
6247
6248   data->max_inv_id = 0;
6249
6250   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6251     SET_DECL_RTL (obj, NULL_RTX);
6252
6253   VEC_truncate (tree, decl_rtl_to_reset, 0);
6254
6255   htab_empty (data->inv_expr_tab);
6256   data->inv_expr_id = 0;
6257 }
6258
6259 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6260    loop tree.  */
6261
6262 static void
6263 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6264 {
6265   free_loop_data (data);
6266   free (data->version_info);
6267   BITMAP_FREE (data->relevant);
6268   BITMAP_FREE (data->important_candidates);
6269
6270   VEC_free (tree, heap, decl_rtl_to_reset);
6271   VEC_free (iv_use_p, heap, data->iv_uses);
6272   VEC_free (iv_cand_p, heap, data->iv_candidates);
6273   htab_delete (data->inv_expr_tab);
6274 }
6275
6276 /* Returns true if the loop body BODY includes any function calls.  */
6277
6278 static bool
6279 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6280 {
6281   gimple_stmt_iterator gsi;
6282   unsigned i;
6283
6284   for (i = 0; i < num_nodes; i++)
6285     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6286       {
6287         gimple stmt = gsi_stmt (gsi);
6288         if (is_gimple_call (stmt)
6289             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6290           return true;
6291       }
6292   return false;
6293 }
6294
6295 /* Optimizes the LOOP.  Returns true if anything changed.  */
6296
6297 static bool
6298 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6299 {
6300   bool changed = false;
6301   struct iv_ca *iv_ca;
6302   edge exit;
6303   basic_block *body;
6304
6305   gcc_assert (!data->niters);
6306   data->current_loop = loop;
6307   data->speed = optimize_loop_for_speed_p (loop);
6308
6309   if (dump_file && (dump_flags & TDF_DETAILS))
6310     {
6311       fprintf (dump_file, "Processing loop %d\n", loop->num);
6312
6313       exit = single_dom_exit (loop);
6314       if (exit)
6315         {
6316           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6317                    exit->src->index, exit->dest->index);
6318           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6319           fprintf (dump_file, "\n");
6320         }
6321
6322       fprintf (dump_file, "\n");
6323     }
6324
6325   body = get_loop_body (loop);
6326   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6327   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6328   free (body);
6329
6330   /* For each ssa name determines whether it behaves as an induction variable
6331      in some loop.  */
6332   if (!find_induction_variables (data))
6333     goto finish;
6334
6335   /* Finds interesting uses (item 1).  */
6336   find_interesting_uses (data);
6337   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6338     goto finish;
6339
6340   /* Finds candidates for the induction variables (item 2).  */
6341   find_iv_candidates (data);
6342
6343   /* Calculates the costs (item 3, part 1).  */
6344   determine_iv_costs (data);
6345   determine_use_iv_costs (data);
6346   determine_set_costs (data);
6347
6348   /* Find the optimal set of induction variables (item 3, part 2).  */
6349   iv_ca = find_optimal_iv_set (data);
6350   if (!iv_ca)
6351     goto finish;
6352   changed = true;
6353
6354   /* Create the new induction variables (item 4, part 1).  */
6355   create_new_ivs (data, iv_ca);
6356   iv_ca_free (&iv_ca);
6357
6358   /* Rewrite the uses (item 4, part 2).  */
6359   rewrite_uses (data);
6360
6361   /* Remove the ivs that are unused after rewriting.  */
6362   remove_unused_ivs (data);
6363
6364   /* We have changed the structure of induction variables; it might happen
6365      that definitions in the scev database refer to some of them that were
6366      eliminated.  */
6367   scev_reset ();
6368
6369 finish:
6370   free_loop_data (data);
6371
6372   return changed;
6373 }
6374
6375 /* Main entry point.  Optimizes induction variables in loops.  */
6376
6377 void
6378 tree_ssa_iv_optimize (void)
6379 {
6380   struct loop *loop;
6381   struct ivopts_data data;
6382   loop_iterator li;
6383
6384   tree_ssa_iv_optimize_init (&data);
6385
6386   /* Optimize the loops starting with the innermost ones.  */
6387   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6388     {
6389       if (dump_file && (dump_flags & TDF_DETAILS))
6390         flow_loop_dump (loop, dump_file, NULL, 1);
6391
6392       tree_ssa_iv_optimize_loop (&data, loop);
6393     }
6394
6395   tree_ssa_iv_optimize_finalize (&data);
6396 }