gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "tree-flow.h"
  74 #include "cfgloop.h"
  75 #include "tree-pass.h"
  76 #include "ggc.h"
  77 #include "insn-config.h"
  78 #include "pointer-set.h"
  79 #include "hashtab.h"
  80 #include "tree-chrec.h"
  81 #include "tree-scalar-evolution.h"
  82 #include "cfgloop.h"
  83 #include "params.h"
  84 #include "langhooks.h"
  85 #include "tree-affine.h"
  86 #include "target.h"
  87 #include "tree-inline.h"
  88 #include "tree-ssa-propagate.h"
  89 #include "expmed.h"
  90
  91 static hashval_t mbc_entry_hash (const void *);
  92 static int mbc_entry_eq (const void*, const void *);
  93
  94 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  95    cost of different addressing modes.  This should be moved to a TBD
  96    interface between the GIMPLE and RTL worlds.  */
  97 #include "expr.h"
  98 #include "recog.h"
  99
 100 /* The infinite cost.  */
 101 #define INFTY 10000000
 102
 103 #define AVG_LOOP_NITER(LOOP) 5
 104
 105 /* Returns the expected number of loop iterations for LOOP.
 106    The average trip count is computed from profile data if it
 107    exists. */
 108
 109 static inline HOST_WIDE_INT
 110 avg_loop_niter (struct loop *loop)
 111 {
 112   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 113   if (niter == -1)
 114     return AVG_LOOP_NITER (loop);
 115
 116   return niter;
 117 }
 118
 119 /* Representation of the induction variable.  */
 120 struct iv
 121 {
 122   tree base;            /* Initial value of the iv.  */
 123   tree base_object;     /* A memory object to that the induction variable points.  */
 124   tree step;            /* Step of the iv (constant only).  */
 125   tree ssa_name;        /* The ssa name with the value.  */
 126   bool biv_p;           /* Is it a biv?  */
 127   bool have_use_for;    /* Do we already have a use for it?  */
 128   unsigned use_id;      /* The identifier in the use if it is the case.  */
 129 };
 130
 131 /* Per-ssa version information (induction variable descriptions, etc.).  */
 132 struct version_info
 133 {
 134   tree name;            /* The ssa name.  */
 135   struct iv *iv;        /* Induction variable description.  */
 136   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 137                            an expression that is not an induction variable.  */
 138   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 139   unsigned inv_id;      /* Id of an invariant.  */
 140 };
 141
 142 /* Types of uses.  */
 143 enum use_type
 144 {
 145   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 146   USE_ADDRESS,          /* Use in an address.  */
 147   USE_COMPARE           /* Use is a compare.  */
 148 };
 149
 150 /* Cost of a computation.  */
 151 typedef struct
 152 {
 153   int cost;             /* The runtime cost.  */
 154   unsigned complexity;  /* The estimate of the complexity of the code for
 155                            the computation (in no concrete units --
 156                            complexity field should be larger for more
 157                            complex expressions and addressing modes).  */
 158 } comp_cost;
 159
 160 static const comp_cost no_cost = {0, 0};
 161 static const comp_cost infinite_cost = {INFTY, INFTY};
 162
 163 /* The candidate - cost pair.  */
 164 struct cost_pair
 165 {
 166   struct iv_cand *cand; /* The candidate.  */
 167   comp_cost cost;       /* The cost.  */
 168   bitmap depends_on;    /* The list of invariants that have to be
 169                            preserved.  */
 170   tree value;           /* For final value elimination, the expression for
 171                            the final value of the iv.  For iv elimination,
 172                            the new bound to compare with.  */
 173   enum tree_code comp;  /* For iv elimination, the comparison.  */
 174   int inv_expr_id;      /* Loop invariant expression id.  */
 175 };
 176
 177 /* Use.  */
 178 struct iv_use
 179 {
 180   unsigned id;          /* The id of the use.  */
 181   enum use_type type;   /* Type of the use.  */
 182   struct iv *iv;        /* The induction variable it is based on.  */
 183   gimple stmt;          /* Statement in that it occurs.  */
 184   tree *op_p;           /* The place where it occurs.  */
 185   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 186                            important ones.  */
 187
 188   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 189   struct cost_pair *cost_map;
 190                         /* The costs wrto the iv candidates.  */
 191
 192   struct iv_cand *selected;
 193                         /* The selected candidate.  */
 194 };
 195
 196 /* The position where the iv is computed.  */
 197 enum iv_position
 198 {
 199   IP_NORMAL,            /* At the end, just before the exit condition.  */
 200   IP_END,               /* At the end of the latch block.  */
 201   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 202   IP_AFTER_USE,         /* Immediately after a specific use.  */
 203   IP_ORIGINAL           /* The original biv.  */
 204 };
 205
 206 /* The induction variable candidate.  */
 207 struct iv_cand
 208 {
 209   unsigned id;          /* The number of the candidate.  */
 210   bool important;       /* Whether this is an "important" candidate, i.e. such
 211                            that it should be considered by all uses.  */
 212   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 213   gimple incremented_at;/* For original biv, the statement where it is
 214                            incremented.  */
 215   tree var_before;      /* The variable used for it before increment.  */
 216   tree var_after;       /* The variable used for it after increment.  */
 217   struct iv *iv;        /* The value of the candidate.  NULL for
 218                            "pseudocandidate" used to indicate the possibility
 219                            to replace the final value of an iv by direct
 220                            computation of the value.  */
 221   unsigned cost;        /* Cost of the candidate.  */
 222   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 223   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 224                               where it is incremented.  */
 225   bitmap depends_on;    /* The list of invariants that are used in step of the
 226                            biv.  */
 227 };
 228
 229 /* Loop invariant expression hashtable entry.  */
 230 struct iv_inv_expr_ent
 231 {
 232   tree expr;
 233   int id;
 234   hashval_t hash;
 235 };
 236
 237 /* The data used by the induction variable optimizations.  */
 238
 239 typedef struct iv_use *iv_use_p;
 240 DEF_VEC_P(iv_use_p);
 241 DEF_VEC_ALLOC_P(iv_use_p,heap);
 242
 243 typedef struct iv_cand *iv_cand_p;
 244 DEF_VEC_P(iv_cand_p);
 245 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 246
 247 struct ivopts_data
 248 {
 249   /* The currently optimized loop.  */
 250   struct loop *current_loop;
 251
 252   /* Numbers of iterations for all exits of the current loop.  */
 253   struct pointer_map_t *niters;
 254
 255   /* Number of registers used in it.  */
 256   unsigned regs_used;
 257
 258   /* The size of version_info array allocated.  */
 259   unsigned version_info_size;
 260
 261   /* The array of information for the ssa names.  */
 262   struct version_info *version_info;
 263
 264   /* The hashtable of loop invariant expressions created
 265      by ivopt.  */
 266   htab_t inv_expr_tab;
 267
 268   /* Loop invariant expression id.  */
 269   int inv_expr_id;
 270
 271   /* The bitmap of indices in version_info whose value was changed.  */
 272   bitmap relevant;
 273
 274   /* The uses of induction variables.  */
 275   VEC(iv_use_p,heap) *iv_uses;
 276
 277   /* The candidates.  */
 278   VEC(iv_cand_p,heap) *iv_candidates;
 279
 280   /* A bitmap of important candidates.  */
 281   bitmap important_candidates;
 282
 283   /* The maximum invariant id.  */
 284   unsigned max_inv_id;
 285
 286   /* Whether to consider just related and important candidates when replacing a
 287      use.  */
 288   bool consider_all_candidates;
 289
 290   /* Are we optimizing for speed?  */
 291   bool speed;
 292
 293   /* Whether the loop body includes any function calls.  */
 294   bool body_includes_call;
 295
 296   /* Whether the loop body can only be exited via single exit.  */
 297   bool loop_single_exit_p;
 298 };
 299
 300 /* An assignment of iv candidates to uses.  */
 301
 302 struct iv_ca
 303 {
 304   /* The number of uses covered by the assignment.  */
 305   unsigned upto;
 306
 307   /* Number of uses that cannot be expressed by the candidates in the set.  */
 308   unsigned bad_uses;
 309
 310   /* Candidate assigned to a use, together with the related costs.  */
 311   struct cost_pair **cand_for_use;
 312
 313   /* Number of times each candidate is used.  */
 314   unsigned *n_cand_uses;
 315
 316   /* The candidates used.  */
 317   bitmap cands;
 318
 319   /* The number of candidates in the set.  */
 320   unsigned n_cands;
 321
 322   /* Total number of registers needed.  */
 323   unsigned n_regs;
 324
 325   /* Total cost of expressing uses.  */
 326   comp_cost cand_use_cost;
 327
 328   /* Total cost of candidates.  */
 329   unsigned cand_cost;
 330
 331   /* Number of times each invariant is used.  */
 332   unsigned *n_invariant_uses;
 333
 334   /* The array holding the number of uses of each loop
 335      invariant expressions created by ivopt.  */
 336   unsigned *used_inv_expr;
 337
 338   /* The number of created loop invariants.  */
 339   unsigned num_used_inv_expr;
 340
 341   /* Total cost of the assignment.  */
 342   comp_cost cost;
 343 };
 344
 345 /* Difference of two iv candidate assignments.  */
 346
 347 struct iv_ca_delta
 348 {
 349   /* Changed use.  */
 350   struct iv_use *use;
 351
 352   /* An old assignment (for rollback purposes).  */
 353   struct cost_pair *old_cp;
 354
 355   /* A new assignment.  */
 356   struct cost_pair *new_cp;
 357
 358   /* Next change in the list.  */
 359   struct iv_ca_delta *next_change;
 360 };
 361
 362 /* Bound on number of candidates below that all candidates are considered.  */
 363
 364 #define CONSIDER_ALL_CANDIDATES_BOUND \
 365   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 366
 367 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 368    optimizing such a loop would help, and it would take ages).  */
 369
 370 #define MAX_CONSIDERED_USES \
 371   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 372
 373 /* If there are at most this number of ivs in the set, try removing unnecessary
 374    ivs from the set always.  */
 375
 376 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 377   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 378
 379 /* The list of trees for that the decl_rtl field must be reset is stored
 380    here.  */
 381
 382 static VEC(tree,heap) *decl_rtl_to_reset;
 383
 384 /* Cached costs for multiplies by constants, and a flag to indicate
 385    when they're valid.  */
 386 static htab_t mult_costs[2];
 387 static bool cost_tables_exist = false;
 388
 389 static comp_cost force_expr_to_var_cost (tree, bool);
 390
 391 /* Number of uses recorded in DATA.  */
 392
 393 static inline unsigned
 394 n_iv_uses (struct ivopts_data *data)
 395 {
 396   return VEC_length (iv_use_p, data->iv_uses);
 397 }
 398
 399 /* Ith use recorded in DATA.  */
 400
 401 static inline struct iv_use *
 402 iv_use (struct ivopts_data *data, unsigned i)
 403 {
 404   return VEC_index (iv_use_p, data->iv_uses, i);
 405 }
 406
 407 /* Number of candidates recorded in DATA.  */
 408
 409 static inline unsigned
 410 n_iv_cands (struct ivopts_data *data)
 411 {
 412   return VEC_length (iv_cand_p, data->iv_candidates);
 413 }
 414
 415 /* Ith candidate recorded in DATA.  */
 416
 417 static inline struct iv_cand *
 418 iv_cand (struct ivopts_data *data, unsigned i)
 419 {
 420   return VEC_index (iv_cand_p, data->iv_candidates, i);
 421 }
 422
 423 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 424
 425 edge
 426 single_dom_exit (struct loop *loop)
 427 {
 428   edge exit = single_exit (loop);
 429
 430   if (!exit)
 431     return NULL;
 432
 433   if (!just_once_each_iteration_p (loop, exit->src))
 434     return NULL;
 435
 436   return exit;
 437 }
 438
 439 /* Dumps information about the induction variable IV to FILE.  */
 440
 441 extern void dump_iv (FILE *, struct iv *);
 442 void
 443 dump_iv (FILE *file, struct iv *iv)
 444 {
 445   if (iv->ssa_name)
 446     {
 447       fprintf (file, "ssa name ");
 448       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 449       fprintf (file, "\n");
 450     }
 451
 452   fprintf (file, "  type ");
 453   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 454   fprintf (file, "\n");
 455
 456   if (iv->step)
 457     {
 458       fprintf (file, "  base ");
 459       print_generic_expr (file, iv->base, TDF_SLIM);
 460       fprintf (file, "\n");
 461
 462       fprintf (file, "  step ");
 463       print_generic_expr (file, iv->step, TDF_SLIM);
 464       fprintf (file, "\n");
 465     }
 466   else
 467     {
 468       fprintf (file, "  invariant ");
 469       print_generic_expr (file, iv->base, TDF_SLIM);
 470       fprintf (file, "\n");
 471     }
 472
 473   if (iv->base_object)
 474     {
 475       fprintf (file, "  base object ");
 476       print_generic_expr (file, iv->base_object, TDF_SLIM);
 477       fprintf (file, "\n");
 478     }
 479
 480   if (iv->biv_p)
 481     fprintf (file, "  is a biv\n");
 482 }
 483
 484 /* Dumps information about the USE to FILE.  */
 485
 486 extern void dump_use (FILE *, struct iv_use *);
 487 void
 488 dump_use (FILE *file, struct iv_use *use)
 489 {
 490   fprintf (file, "use %d\n", use->id);
 491
 492   switch (use->type)
 493     {
 494     case USE_NONLINEAR_EXPR:
 495       fprintf (file, "  generic\n");
 496       break;
 497
 498     case USE_ADDRESS:
 499       fprintf (file, "  address\n");
 500       break;
 501
 502     case USE_COMPARE:
 503       fprintf (file, "  compare\n");
 504       break;
 505
 506     default:
 507       gcc_unreachable ();
 508     }
 509
 510   fprintf (file, "  in statement ");
 511   print_gimple_stmt (file, use->stmt, 0, 0);
 512   fprintf (file, "\n");
 513
 514   fprintf (file, "  at position ");
 515   if (use->op_p)
 516     print_generic_expr (file, *use->op_p, TDF_SLIM);
 517   fprintf (file, "\n");
 518
 519   dump_iv (file, use->iv);
 520
 521   if (use->related_cands)
 522     {
 523       fprintf (file, "  related candidates ");
 524       dump_bitmap (file, use->related_cands);
 525     }
 526 }
 527
 528 /* Dumps information about the uses to FILE.  */
 529
 530 extern void dump_uses (FILE *, struct ivopts_data *);
 531 void
 532 dump_uses (FILE *file, struct ivopts_data *data)
 533 {
 534   unsigned i;
 535   struct iv_use *use;
 536
 537   for (i = 0; i < n_iv_uses (data); i++)
 538     {
 539       use = iv_use (data, i);
 540
 541       dump_use (file, use);
 542       fprintf (file, "\n");
 543     }
 544 }
 545
 546 /* Dumps information about induction variable candidate CAND to FILE.  */
 547
 548 extern void dump_cand (FILE *, struct iv_cand *);
 549 void
 550 dump_cand (FILE *file, struct iv_cand *cand)
 551 {
 552   struct iv *iv = cand->iv;
 553
 554   fprintf (file, "candidate %d%s\n",
 555            cand->id, cand->important ? " (important)" : "");
 556
 557   if (cand->depends_on)
 558     {
 559       fprintf (file, "  depends on ");
 560       dump_bitmap (file, cand->depends_on);
 561     }
 562
 563   if (!iv)
 564     {
 565       fprintf (file, "  final value replacement\n");
 566       return;
 567     }
 568
 569   if (cand->var_before)
 570     {
 571       fprintf (file, "  var_before ");
 572       print_generic_expr (file, cand->var_before, TDF_SLIM);
 573       fprintf (file, "\n");
 574     }
 575   if (cand->var_after)
 576     {
 577       fprintf (file, "  var_after ");
 578       print_generic_expr (file, cand->var_after, TDF_SLIM);
 579       fprintf (file, "\n");
 580     }
 581
 582   switch (cand->pos)
 583     {
 584     case IP_NORMAL:
 585       fprintf (file, "  incremented before exit test\n");
 586       break;
 587
 588     case IP_BEFORE_USE:
 589       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 590       break;
 591
 592     case IP_AFTER_USE:
 593       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 594       break;
 595
 596     case IP_END:
 597       fprintf (file, "  incremented at end\n");
 598       break;
 599
 600     case IP_ORIGINAL:
 601       fprintf (file, "  original biv\n");
 602       break;
 603     }
 604
 605   dump_iv (file, iv);
 606 }
 607
 608 /* Returns the info for ssa version VER.  */
 609
 610 static inline struct version_info *
 611 ver_info (struct ivopts_data *data, unsigned ver)
 612 {
 613   return data->version_info + ver;
 614 }
 615
 616 /* Returns the info for ssa name NAME.  */
 617
 618 static inline struct version_info *
 619 name_info (struct ivopts_data *data, tree name)
 620 {
 621   return ver_info (data, SSA_NAME_VERSION (name));
 622 }
 623
 624 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 625    emitted in LOOP.  */
 626
 627 static bool
 628 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 629 {
 630   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 631
 632   gcc_assert (bb);
 633
 634   if (sbb == loop->latch)
 635     return true;
 636
 637   if (sbb != bb)
 638     return false;
 639
 640   return stmt == last_stmt (bb);
 641 }
 642
 643 /* Returns true if STMT if after the place where the original induction
 644    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 645    if the positions are identical.  */
 646
 647 static bool
 648 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 649 {
 650   basic_block cand_bb = gimple_bb (cand->incremented_at);
 651   basic_block stmt_bb = gimple_bb (stmt);
 652
 653   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 654     return false;
 655
 656   if (stmt_bb != cand_bb)
 657     return true;
 658
 659   if (true_if_equal
 660       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 661     return true;
 662   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 663 }
 664
 665 /* Returns true if STMT if after the place where the induction variable
 666    CAND is incremented in LOOP.  */
 667
 668 static bool
 669 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 670 {
 671   switch (cand->pos)
 672     {
 673     case IP_END:
 674       return false;
 675
 676     case IP_NORMAL:
 677       return stmt_after_ip_normal_pos (loop, stmt);
 678
 679     case IP_ORIGINAL:
 680     case IP_AFTER_USE:
 681       return stmt_after_inc_pos (cand, stmt, false);
 682
 683     case IP_BEFORE_USE:
 684       return stmt_after_inc_pos (cand, stmt, true);
 685
 686     default:
 687       gcc_unreachable ();
 688     }
 689 }
 690
 691 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 692
 693 static bool
 694 abnormal_ssa_name_p (tree exp)
 695 {
 696   if (!exp)
 697     return false;
 698
 699   if (TREE_CODE (exp) != SSA_NAME)
 700     return false;
 701
 702   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 703 }
 704
 705 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 706    abnormal phi node.  Callback for for_each_index.  */
 707
 708 static bool
 709 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 710                                   void *data ATTRIBUTE_UNUSED)
 711 {
 712   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 713     {
 714       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 715         return false;
 716       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 717         return false;
 718     }
 719
 720   return !abnormal_ssa_name_p (*index);
 721 }
 722
 723 /* Returns true if EXPR contains a ssa name that occurs in an
 724    abnormal phi node.  */
 725
 726 bool
 727 contains_abnormal_ssa_name_p (tree expr)
 728 {
 729   enum tree_code code;
 730   enum tree_code_class codeclass;
 731
 732   if (!expr)
 733     return false;
 734
 735   code = TREE_CODE (expr);
 736   codeclass = TREE_CODE_CLASS (code);
 737
 738   if (code == SSA_NAME)
 739     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 740
 741   if (code == INTEGER_CST
 742       || is_gimple_min_invariant (expr))
 743     return false;
 744
 745   if (code == ADDR_EXPR)
 746     return !for_each_index (&TREE_OPERAND (expr, 0),
 747                             idx_contains_abnormal_ssa_name_p,
 748                             NULL);
 749
 750   if (code == COND_EXPR)
 751     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 752       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 753       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 754
 755   switch (codeclass)
 756     {
 757     case tcc_binary:
 758     case tcc_comparison:
 759       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 760         return true;
 761
 762       /* Fallthru.  */
 763     case tcc_unary:
 764       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 765         return true;
 766
 767       break;
 768
 769     default:
 770       gcc_unreachable ();
 771     }
 772
 773   return false;
 774 }
 775
 776 /*  Returns the structure describing number of iterations determined from
 777     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 778
 779 static struct tree_niter_desc *
 780 niter_for_exit (struct ivopts_data *data, edge exit)
 781 {
 782   struct tree_niter_desc *desc;
 783   void **slot;
 784
 785   if (!data->niters)
 786     {
 787       data->niters = pointer_map_create ();
 788       slot = NULL;
 789     }
 790   else
 791     slot = pointer_map_contains (data->niters, exit);
 792
 793   if (!slot)
 794     {
 795       /* Try to determine number of iterations.  We cannot safely work with ssa
 796          names that appear in phi nodes on abnormal edges, so that we do not
 797          create overlapping life ranges for them (PR 27283).  */
 798       desc = XNEW (struct tree_niter_desc);
 799       if (!number_of_iterations_exit (data->current_loop,
 800                                       exit, desc, true)
 801           || contains_abnormal_ssa_name_p (desc->niter))
 802         {
 803           XDELETE (desc);
 804           desc = NULL;
 805         }
 806       slot = pointer_map_insert (data->niters, exit);
 807       *slot = desc;
 808     }
 809   else
 810     desc = (struct tree_niter_desc *) *slot;
 811
 812   return desc;
 813 }
 814
 815 /* Returns the structure describing number of iterations determined from
 816    single dominating exit of DATA->current_loop, or NULL if something
 817    goes wrong.  */
 818
 819 static struct tree_niter_desc *
 820 niter_for_single_dom_exit (struct ivopts_data *data)
 821 {
 822   edge exit = single_dom_exit (data->current_loop);
 823
 824   if (!exit)
 825     return NULL;
 826
 827   return niter_for_exit (data, exit);
 828 }
 829
 830 /* Hash table equality function for expressions.  */
 831
 832 static int
 833 htab_inv_expr_eq (const void *ent1, const void *ent2)
 834 {
 835   const struct iv_inv_expr_ent *expr1 =
 836       (const struct iv_inv_expr_ent *)ent1;
 837   const struct iv_inv_expr_ent *expr2 =
 838       (const struct iv_inv_expr_ent *)ent2;
 839
 840   return expr1->hash == expr2->hash
 841          && operand_equal_p (expr1->expr, expr2->expr, 0);
 842 }
 843
 844 /* Hash function for loop invariant expressions.  */
 845
 846 static hashval_t
 847 htab_inv_expr_hash (const void *ent)
 848 {
 849   const struct iv_inv_expr_ent *expr =
 850       (const struct iv_inv_expr_ent *)ent;
 851   return expr->hash;
 852 }
 853
 854 /* Allocate data structures for the cost model.  */
 855
 856 void
 857 initialize_costs (void)
 858 {
 859   mult_costs[0] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
 860   mult_costs[1] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
 861   cost_tables_exist = true;
 862 }
 863
 864 /* Release data structures for the cost model.  */
 865
 866 void
 867 finalize_costs (void)
 868 {
 869   cost_tables_exist = false;
 870   htab_delete (mult_costs[0]);
 871   htab_delete (mult_costs[1]);
 872 }
 873
 874 /* Initializes data structures used by the iv optimization pass, stored
 875    in DATA.  */
 876
 877 static void
 878 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 879 {
 880   data->version_info_size = 2 * num_ssa_names;
 881   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 882   data->relevant = BITMAP_ALLOC (NULL);
 883   data->important_candidates = BITMAP_ALLOC (NULL);
 884   data->max_inv_id = 0;
 885   data->niters = NULL;
 886   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 887   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 888   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 889                                     htab_inv_expr_eq, free);
 890   data->inv_expr_id = 0;
 891   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 892
 893   initialize_costs ();
 894 }
 895
 896 /* Returns a memory object to that EXPR points.  In case we are able to
 897    determine that it does not point to any such object, NULL is returned.  */
 898
 899 static tree
 900 determine_base_object (tree expr)
 901 {
 902   enum tree_code code = TREE_CODE (expr);
 903   tree base, obj;
 904
 905   /* If this is a pointer casted to any type, we need to determine
 906      the base object for the pointer; so handle conversions before
 907      throwing away non-pointer expressions.  */
 908   if (CONVERT_EXPR_P (expr))
 909     return determine_base_object (TREE_OPERAND (expr, 0));
 910
 911   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 912     return NULL_TREE;
 913
 914   switch (code)
 915     {
 916     case INTEGER_CST:
 917       return NULL_TREE;
 918
 919     case ADDR_EXPR:
 920       obj = TREE_OPERAND (expr, 0);
 921       base = get_base_address (obj);
 922
 923       if (!base)
 924         return expr;
 925
 926       if (TREE_CODE (base) == MEM_REF)
 927         return determine_base_object (TREE_OPERAND (base, 0));
 928
 929       return fold_convert (ptr_type_node,
 930                            build_fold_addr_expr (base));
 931
 932     case POINTER_PLUS_EXPR:
 933       return determine_base_object (TREE_OPERAND (expr, 0));
 934
 935     case PLUS_EXPR:
 936     case MINUS_EXPR:
 937       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 938       gcc_unreachable ();
 939
 940     default:
 941       return fold_convert (ptr_type_node, expr);
 942     }
 943 }
 944
 945 /* Allocates an induction variable with given initial value BASE and step STEP
 946    for loop LOOP.  */
 947
 948 static struct iv *
 949 alloc_iv (tree base, tree step)
 950 {
 951   struct iv *iv = XCNEW (struct iv);
 952   gcc_assert (step != NULL_TREE);
 953
 954   iv->base = base;
 955   iv->base_object = determine_base_object (base);
 956   iv->step = step;
 957   iv->biv_p = false;
 958   iv->have_use_for = false;
 959   iv->use_id = 0;
 960   iv->ssa_name = NULL_TREE;
 961
 962   return iv;
 963 }
 964
 965 /* Sets STEP and BASE for induction variable IV.  */
 966
 967 static void
 968 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 969 {
 970   struct version_info *info = name_info (data, iv);
 971
 972   gcc_assert (!info->iv);
 973
 974   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 975   info->iv = alloc_iv (base, step);
 976   info->iv->ssa_name = iv;
 977 }
 978
 979 /* Finds induction variable declaration for VAR.  */
 980
 981 static struct iv *
 982 get_iv (struct ivopts_data *data, tree var)
 983 {
 984   basic_block bb;
 985   tree type = TREE_TYPE (var);
 986
 987   if (!POINTER_TYPE_P (type)
 988       && !INTEGRAL_TYPE_P (type))
 989     return NULL;
 990
 991   if (!name_info (data, var)->iv)
 992     {
 993       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 994
 995       if (!bb
 996           || !flow_bb_inside_loop_p (data->current_loop, bb))
 997         set_iv (data, var, var, build_int_cst (type, 0));
 998     }
 999
1000   return name_info (data, var)->iv;
1001 }
1002
1003 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1004    not define a simple affine biv with nonzero step.  */
1005
1006 static tree
1007 determine_biv_step (gimple phi)
1008 {
1009   struct loop *loop = gimple_bb (phi)->loop_father;
1010   tree name = PHI_RESULT (phi);
1011   affine_iv iv;
1012
1013   if (!is_gimple_reg (name))
1014     return NULL_TREE;
1015
1016   if (!simple_iv (loop, loop, name, &iv, true))
1017     return NULL_TREE;
1018
1019   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1020 }
1021
1022 /* Finds basic ivs.  */
1023
1024 static bool
1025 find_bivs (struct ivopts_data *data)
1026 {
1027   gimple phi;
1028   tree step, type, base;
1029   bool found = false;
1030   struct loop *loop = data->current_loop;
1031   gimple_stmt_iterator psi;
1032
1033   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1034     {
1035       phi = gsi_stmt (psi);
1036
1037       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1038         continue;
1039
1040       step = determine_biv_step (phi);
1041       if (!step)
1042         continue;
1043
1044       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1045       base = expand_simple_operations (base);
1046       if (contains_abnormal_ssa_name_p (base)
1047           || contains_abnormal_ssa_name_p (step))
1048         continue;
1049
1050       type = TREE_TYPE (PHI_RESULT (phi));
1051       base = fold_convert (type, base);
1052       if (step)
1053         {
1054           if (POINTER_TYPE_P (type))
1055             step = convert_to_ptrofftype (step);
1056           else
1057             step = fold_convert (type, step);
1058         }
1059
1060       set_iv (data, PHI_RESULT (phi), base, step);
1061       found = true;
1062     }
1063
1064   return found;
1065 }
1066
1067 /* Marks basic ivs.  */
1068
1069 static void
1070 mark_bivs (struct ivopts_data *data)
1071 {
1072   gimple phi;
1073   tree var;
1074   struct iv *iv, *incr_iv;
1075   struct loop *loop = data->current_loop;
1076   basic_block incr_bb;
1077   gimple_stmt_iterator psi;
1078
1079   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1080     {
1081       phi = gsi_stmt (psi);
1082
1083       iv = get_iv (data, PHI_RESULT (phi));
1084       if (!iv)
1085         continue;
1086
1087       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1088       incr_iv = get_iv (data, var);
1089       if (!incr_iv)
1090         continue;
1091
1092       /* If the increment is in the subloop, ignore it.  */
1093       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1094       if (incr_bb->loop_father != data->current_loop
1095           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1096         continue;
1097
1098       iv->biv_p = true;
1099       incr_iv->biv_p = true;
1100     }
1101 }
1102
1103 /* Checks whether STMT defines a linear induction variable and stores its
1104    parameters to IV.  */
1105
1106 static bool
1107 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1108 {
1109   tree lhs;
1110   struct loop *loop = data->current_loop;
1111
1112   iv->base = NULL_TREE;
1113   iv->step = NULL_TREE;
1114
1115   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1116     return false;
1117
1118   lhs = gimple_assign_lhs (stmt);
1119   if (TREE_CODE (lhs) != SSA_NAME)
1120     return false;
1121
1122   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1123     return false;
1124   iv->base = expand_simple_operations (iv->base);
1125
1126   if (contains_abnormal_ssa_name_p (iv->base)
1127       || contains_abnormal_ssa_name_p (iv->step))
1128     return false;
1129
1130   /* If STMT could throw, then do not consider STMT as defining a GIV.
1131      While this will suppress optimizations, we can not safely delete this
1132      GIV and associated statements, even if it appears it is not used.  */
1133   if (stmt_could_throw_p (stmt))
1134     return false;
1135
1136   return true;
1137 }
1138
1139 /* Finds general ivs in statement STMT.  */
1140
1141 static void
1142 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1143 {
1144   affine_iv iv;
1145
1146   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1147     return;
1148
1149   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1150 }
1151
1152 /* Finds general ivs in basic block BB.  */
1153
1154 static void
1155 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1156 {
1157   gimple_stmt_iterator bsi;
1158
1159   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1160     find_givs_in_stmt (data, gsi_stmt (bsi));
1161 }
1162
1163 /* Finds general ivs.  */
1164
1165 static void
1166 find_givs (struct ivopts_data *data)
1167 {
1168   struct loop *loop = data->current_loop;
1169   basic_block *body = get_loop_body_in_dom_order (loop);
1170   unsigned i;
1171
1172   for (i = 0; i < loop->num_nodes; i++)
1173     find_givs_in_bb (data, body[i]);
1174   free (body);
1175 }
1176
1177 /* For each ssa name defined in LOOP determines whether it is an induction
1178    variable and if so, its initial value and step.  */
1179
1180 static bool
1181 find_induction_variables (struct ivopts_data *data)
1182 {
1183   unsigned i;
1184   bitmap_iterator bi;
1185
1186   if (!find_bivs (data))
1187     return false;
1188
1189   find_givs (data);
1190   mark_bivs (data);
1191
1192   if (dump_file && (dump_flags & TDF_DETAILS))
1193     {
1194       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1195
1196       if (niter)
1197         {
1198           fprintf (dump_file, "  number of iterations ");
1199           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1200           if (!integer_zerop (niter->may_be_zero))
1201             {
1202               fprintf (dump_file, "; zero if ");
1203               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1204             }
1205           fprintf (dump_file, "\n\n");
1206         };
1207
1208       fprintf (dump_file, "Induction variables:\n\n");
1209
1210       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1211         {
1212           if (ver_info (data, i)->iv)
1213             dump_iv (dump_file, ver_info (data, i)->iv);
1214         }
1215     }
1216
1217   return true;
1218 }
1219
1220 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1221
1222 static struct iv_use *
1223 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1224             gimple stmt, enum use_type use_type)
1225 {
1226   struct iv_use *use = XCNEW (struct iv_use);
1227
1228   use->id = n_iv_uses (data);
1229   use->type = use_type;
1230   use->iv = iv;
1231   use->stmt = stmt;
1232   use->op_p = use_p;
1233   use->related_cands = BITMAP_ALLOC (NULL);
1234
1235   /* To avoid showing ssa name in the dumps, if it was not reset by the
1236      caller.  */
1237   iv->ssa_name = NULL_TREE;
1238
1239   if (dump_file && (dump_flags & TDF_DETAILS))
1240     dump_use (dump_file, use);
1241
1242   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1243
1244   return use;
1245 }
1246
1247 /* Checks whether OP is a loop-level invariant and if so, records it.
1248    NONLINEAR_USE is true if the invariant is used in a way we do not
1249    handle specially.  */
1250
1251 static void
1252 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1253 {
1254   basic_block bb;
1255   struct version_info *info;
1256
1257   if (TREE_CODE (op) != SSA_NAME
1258       || !is_gimple_reg (op))
1259     return;
1260
1261   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1262   if (bb
1263       && flow_bb_inside_loop_p (data->current_loop, bb))
1264     return;
1265
1266   info = name_info (data, op);
1267   info->name = op;
1268   info->has_nonlin_use |= nonlinear_use;
1269   if (!info->inv_id)
1270     info->inv_id = ++data->max_inv_id;
1271   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1272 }
1273
1274 /* Checks whether the use OP is interesting and if so, records it.  */
1275
1276 static struct iv_use *
1277 find_interesting_uses_op (struct ivopts_data *data, tree op)
1278 {
1279   struct iv *iv;
1280   struct iv *civ;
1281   gimple stmt;
1282   struct iv_use *use;
1283
1284   if (TREE_CODE (op) != SSA_NAME)
1285     return NULL;
1286
1287   iv = get_iv (data, op);
1288   if (!iv)
1289     return NULL;
1290
1291   if (iv->have_use_for)
1292     {
1293       use = iv_use (data, iv->use_id);
1294
1295       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1296       return use;
1297     }
1298
1299   if (integer_zerop (iv->step))
1300     {
1301       record_invariant (data, op, true);
1302       return NULL;
1303     }
1304   iv->have_use_for = true;
1305
1306   civ = XNEW (struct iv);
1307   *civ = *iv;
1308
1309   stmt = SSA_NAME_DEF_STMT (op);
1310   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1311               || is_gimple_assign (stmt));
1312
1313   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1314   iv->use_id = use->id;
1315
1316   return use;
1317 }
1318
1319 /* Given a condition in statement STMT, checks whether it is a compare
1320    of an induction variable and an invariant.  If this is the case,
1321    CONTROL_VAR is set to location of the iv, BOUND to the location of
1322    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1323    induction variable descriptions, and true is returned.  If this is not
1324    the case, CONTROL_VAR and BOUND are set to the arguments of the
1325    condition and false is returned.  */
1326
1327 static bool
1328 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1329                        tree **control_var, tree **bound,
1330                        struct iv **iv_var, struct iv **iv_bound)
1331 {
1332   /* The objects returned when COND has constant operands.  */
1333   static struct iv const_iv;
1334   static tree zero;
1335   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1336   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1337   bool ret = false;
1338
1339   if (gimple_code (stmt) == GIMPLE_COND)
1340     {
1341       op0 = gimple_cond_lhs_ptr (stmt);
1342       op1 = gimple_cond_rhs_ptr (stmt);
1343     }
1344   else
1345     {
1346       op0 = gimple_assign_rhs1_ptr (stmt);
1347       op1 = gimple_assign_rhs2_ptr (stmt);
1348     }
1349
1350   zero = integer_zero_node;
1351   const_iv.step = integer_zero_node;
1352
1353   if (TREE_CODE (*op0) == SSA_NAME)
1354     iv0 = get_iv (data, *op0);
1355   if (TREE_CODE (*op1) == SSA_NAME)
1356     iv1 = get_iv (data, *op1);
1357
1358   /* Exactly one of the compared values must be an iv, and the other one must
1359      be an invariant.  */
1360   if (!iv0 || !iv1)
1361     goto end;
1362
1363   if (integer_zerop (iv0->step))
1364     {
1365       /* Control variable may be on the other side.  */
1366       tmp_op = op0; op0 = op1; op1 = tmp_op;
1367       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1368     }
1369   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1370
1371 end:
1372   if (control_var)
1373     *control_var = op0;;
1374   if (iv_var)
1375     *iv_var = iv0;;
1376   if (bound)
1377     *bound = op1;
1378   if (iv_bound)
1379     *iv_bound = iv1;
1380
1381   return ret;
1382 }
1383
1384 /* Checks whether the condition in STMT is interesting and if so,
1385    records it.  */
1386
1387 static void
1388 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1389 {
1390   tree *var_p, *bound_p;
1391   struct iv *var_iv, *civ;
1392
1393   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1394     {
1395       find_interesting_uses_op (data, *var_p);
1396       find_interesting_uses_op (data, *bound_p);
1397       return;
1398     }
1399
1400   civ = XNEW (struct iv);
1401   *civ = *var_iv;
1402   record_use (data, NULL, civ, stmt, USE_COMPARE);
1403 }
1404
1405 /* Returns true if expression EXPR is obviously invariant in LOOP,
1406    i.e. if all its operands are defined outside of the LOOP.  LOOP
1407    should not be the function body.  */
1408
1409 bool
1410 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1411 {
1412   basic_block def_bb;
1413   unsigned i, len;
1414
1415   gcc_assert (loop_depth (loop) > 0);
1416
1417   if (is_gimple_min_invariant (expr))
1418     return true;
1419
1420   if (TREE_CODE (expr) == SSA_NAME)
1421     {
1422       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1423       if (def_bb
1424           && flow_bb_inside_loop_p (loop, def_bb))
1425         return false;
1426
1427       return true;
1428     }
1429
1430   if (!EXPR_P (expr))
1431     return false;
1432
1433   len = TREE_OPERAND_LENGTH (expr);
1434   for (i = 0; i < len; i++)
1435     if (TREE_OPERAND (expr, i)
1436         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1437       return false;
1438
1439   return true;
1440 }
1441
1442 /* Returns true if statement STMT is obviously invariant in LOOP,
1443    i.e. if all its operands on the RHS are defined outside of the LOOP.
1444    LOOP should not be the function body.  */
1445
1446 bool
1447 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1448 {
1449   unsigned i;
1450   tree lhs;
1451
1452   gcc_assert (loop_depth (loop) > 0);
1453
1454   lhs = gimple_get_lhs (stmt);
1455   for (i = 0; i < gimple_num_ops (stmt); i++)
1456     {
1457       tree op = gimple_op (stmt, i);
1458       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1459         return false;
1460     }
1461
1462   return true;
1463 }
1464
1465 /* Cumulates the steps of indices into DATA and replaces their values with the
1466    initial ones.  Returns false when the value of the index cannot be determined.
1467    Callback for for_each_index.  */
1468
1469 struct ifs_ivopts_data
1470 {
1471   struct ivopts_data *ivopts_data;
1472   gimple stmt;
1473   tree step;
1474 };
1475
1476 static bool
1477 idx_find_step (tree base, tree *idx, void *data)
1478 {
1479   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1480   struct iv *iv;
1481   tree step, iv_base, iv_step, lbound, off;
1482   struct loop *loop = dta->ivopts_data->current_loop;
1483
1484   /* If base is a component ref, require that the offset of the reference
1485      be invariant.  */
1486   if (TREE_CODE (base) == COMPONENT_REF)
1487     {
1488       off = component_ref_field_offset (base);
1489       return expr_invariant_in_loop_p (loop, off);
1490     }
1491
1492   /* If base is array, first check whether we will be able to move the
1493      reference out of the loop (in order to take its address in strength
1494      reduction).  In order for this to work we need both lower bound
1495      and step to be loop invariants.  */
1496   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1497     {
1498       /* Moreover, for a range, the size needs to be invariant as well.  */
1499       if (TREE_CODE (base) == ARRAY_RANGE_REF
1500           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1501         return false;
1502
1503       step = array_ref_element_size (base);
1504       lbound = array_ref_low_bound (base);
1505
1506       if (!expr_invariant_in_loop_p (loop, step)
1507           || !expr_invariant_in_loop_p (loop, lbound))
1508         return false;
1509     }
1510
1511   if (TREE_CODE (*idx) != SSA_NAME)
1512     return true;
1513
1514   iv = get_iv (dta->ivopts_data, *idx);
1515   if (!iv)
1516     return false;
1517
1518   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1519           *&x[0], which is not folded and does not trigger the
1520           ARRAY_REF path below.  */
1521   *idx = iv->base;
1522
1523   if (integer_zerop (iv->step))
1524     return true;
1525
1526   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1527     {
1528       step = array_ref_element_size (base);
1529
1530       /* We only handle addresses whose step is an integer constant.  */
1531       if (TREE_CODE (step) != INTEGER_CST)
1532         return false;
1533     }
1534   else
1535     /* The step for pointer arithmetics already is 1 byte.  */
1536     step = size_one_node;
1537
1538   iv_base = iv->base;
1539   iv_step = iv->step;
1540   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1541                             sizetype, &iv_base, &iv_step, dta->stmt,
1542                             false))
1543     {
1544       /* The index might wrap.  */
1545       return false;
1546     }
1547
1548   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1549   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1550
1551   return true;
1552 }
1553
1554 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1555    object is passed to it in DATA.  */
1556
1557 static bool
1558 idx_record_use (tree base, tree *idx,
1559                 void *vdata)
1560 {
1561   struct ivopts_data *data = (struct ivopts_data *) vdata;
1562   find_interesting_uses_op (data, *idx);
1563   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1564     {
1565       find_interesting_uses_op (data, array_ref_element_size (base));
1566       find_interesting_uses_op (data, array_ref_low_bound (base));
1567     }
1568   return true;
1569 }
1570
1571 /* If we can prove that TOP = cst * BOT for some constant cst,
1572    store cst to MUL and return true.  Otherwise return false.
1573    The returned value is always sign-extended, regardless of the
1574    signedness of TOP and BOT.  */
1575
1576 static bool
1577 constant_multiple_of (tree top, tree bot, double_int *mul)
1578 {
1579   tree mby;
1580   enum tree_code code;
1581   double_int res, p0, p1;
1582   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1583
1584   STRIP_NOPS (top);
1585   STRIP_NOPS (bot);
1586
1587   if (operand_equal_p (top, bot, 0))
1588     {
1589       *mul = double_int_one;
1590       return true;
1591     }
1592
1593   code = TREE_CODE (top);
1594   switch (code)
1595     {
1596     case MULT_EXPR:
1597       mby = TREE_OPERAND (top, 1);
1598       if (TREE_CODE (mby) != INTEGER_CST)
1599         return false;
1600
1601       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1602         return false;
1603
1604       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1605                               precision);
1606       return true;
1607
1608     case PLUS_EXPR:
1609     case MINUS_EXPR:
1610       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1611           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1612         return false;
1613
1614       if (code == MINUS_EXPR)
1615         p1 = double_int_neg (p1);
1616       *mul = double_int_sext (double_int_add (p0, p1), precision);
1617       return true;
1618
1619     case INTEGER_CST:
1620       if (TREE_CODE (bot) != INTEGER_CST)
1621         return false;
1622
1623       p0 = double_int_sext (tree_to_double_int (top), precision);
1624       p1 = double_int_sext (tree_to_double_int (bot), precision);
1625       if (double_int_zero_p (p1))
1626         return false;
1627       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1628                               precision);
1629       return double_int_zero_p (res);
1630
1631     default:
1632       return false;
1633     }
1634 }
1635
1636 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1637
1638 static bool
1639 may_be_unaligned_p (tree ref, tree step)
1640 {
1641   tree base;
1642   tree base_type;
1643   HOST_WIDE_INT bitsize;
1644   HOST_WIDE_INT bitpos;
1645   tree toffset;
1646   enum machine_mode mode;
1647   int unsignedp, volatilep;
1648   unsigned base_align;
1649
1650   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1651      thus they are not misaligned.  */
1652   if (TREE_CODE (ref) == TARGET_MEM_REF)
1653     return false;
1654
1655   /* The test below is basically copy of what expr.c:normal_inner_ref
1656      does to check whether the object must be loaded by parts when
1657      STRICT_ALIGNMENT is true.  */
1658   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1659                               &unsignedp, &volatilep, true);
1660   base_type = TREE_TYPE (base);
1661   base_align = get_object_alignment (base);
1662   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1663
1664   if (mode != BLKmode)
1665     {
1666       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1667
1668       if (base_align < mode_align
1669           || (bitpos % mode_align) != 0
1670           || (bitpos % BITS_PER_UNIT) != 0)
1671         return true;
1672
1673       if (toffset
1674           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1675         return true;
1676
1677       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1678         return true;
1679     }
1680
1681   return false;
1682 }
1683
1684 /* Return true if EXPR may be non-addressable.   */
1685
1686 bool
1687 may_be_nonaddressable_p (tree expr)
1688 {
1689   switch (TREE_CODE (expr))
1690     {
1691     case TARGET_MEM_REF:
1692       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1693          target, thus they are always addressable.  */
1694       return false;
1695
1696     case COMPONENT_REF:
1697       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1698              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1699
1700     case VIEW_CONVERT_EXPR:
1701       /* This kind of view-conversions may wrap non-addressable objects
1702          and make them look addressable.  After some processing the
1703          non-addressability may be uncovered again, causing ADDR_EXPRs
1704          of inappropriate objects to be built.  */
1705       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1706           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1707         return true;
1708
1709       /* ... fall through ... */
1710
1711     case ARRAY_REF:
1712     case ARRAY_RANGE_REF:
1713       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1714
1715     CASE_CONVERT:
1716       return true;
1717
1718     default:
1719       break;
1720     }
1721
1722   return false;
1723 }
1724
1725 /* Finds addresses in *OP_P inside STMT.  */
1726
1727 static void
1728 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1729 {
1730   tree base = *op_p, step = size_zero_node;
1731   struct iv *civ;
1732   struct ifs_ivopts_data ifs_ivopts_data;
1733
1734   /* Do not play with volatile memory references.  A bit too conservative,
1735      perhaps, but safe.  */
1736   if (gimple_has_volatile_ops (stmt))
1737     goto fail;
1738
1739   /* Ignore bitfields for now.  Not really something terribly complicated
1740      to handle.  TODO.  */
1741   if (TREE_CODE (base) == BIT_FIELD_REF)
1742     goto fail;
1743
1744   base = unshare_expr (base);
1745
1746   if (TREE_CODE (base) == TARGET_MEM_REF)
1747     {
1748       tree type = build_pointer_type (TREE_TYPE (base));
1749       tree astep;
1750
1751       if (TMR_BASE (base)
1752           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1753         {
1754           civ = get_iv (data, TMR_BASE (base));
1755           if (!civ)
1756             goto fail;
1757
1758           TMR_BASE (base) = civ->base;
1759           step = civ->step;
1760         }
1761       if (TMR_INDEX2 (base)
1762           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1763         {
1764           civ = get_iv (data, TMR_INDEX2 (base));
1765           if (!civ)
1766             goto fail;
1767
1768           TMR_INDEX2 (base) = civ->base;
1769           step = civ->step;
1770         }
1771       if (TMR_INDEX (base)
1772           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1773         {
1774           civ = get_iv (data, TMR_INDEX (base));
1775           if (!civ)
1776             goto fail;
1777
1778           TMR_INDEX (base) = civ->base;
1779           astep = civ->step;
1780
1781           if (astep)
1782             {
1783               if (TMR_STEP (base))
1784                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1785
1786               step = fold_build2 (PLUS_EXPR, type, step, astep);
1787             }
1788         }
1789
1790       if (integer_zerop (step))
1791         goto fail;
1792       base = tree_mem_ref_addr (type, base);
1793     }
1794   else
1795     {
1796       ifs_ivopts_data.ivopts_data = data;
1797       ifs_ivopts_data.stmt = stmt;
1798       ifs_ivopts_data.step = size_zero_node;
1799       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1800           || integer_zerop (ifs_ivopts_data.step))
1801         goto fail;
1802       step = ifs_ivopts_data.step;
1803
1804       /* Check that the base expression is addressable.  This needs
1805          to be done after substituting bases of IVs into it.  */
1806       if (may_be_nonaddressable_p (base))
1807         goto fail;
1808
1809       /* Moreover, on strict alignment platforms, check that it is
1810          sufficiently aligned.  */
1811       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1812         goto fail;
1813
1814       base = build_fold_addr_expr (base);
1815
1816       /* Substituting bases of IVs into the base expression might
1817          have caused folding opportunities.  */
1818       if (TREE_CODE (base) == ADDR_EXPR)
1819         {
1820           tree *ref = &TREE_OPERAND (base, 0);
1821           while (handled_component_p (*ref))
1822             ref = &TREE_OPERAND (*ref, 0);
1823           if (TREE_CODE (*ref) == MEM_REF)
1824             {
1825               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1826                                       TREE_OPERAND (*ref, 0),
1827                                       TREE_OPERAND (*ref, 1));
1828               if (tem)
1829                 *ref = tem;
1830             }
1831         }
1832     }
1833
1834   civ = alloc_iv (base, step);
1835   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1836   return;
1837
1838 fail:
1839   for_each_index (op_p, idx_record_use, data);
1840 }
1841
1842 /* Finds and records invariants used in STMT.  */
1843
1844 static void
1845 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1846 {
1847   ssa_op_iter iter;
1848   use_operand_p use_p;
1849   tree op;
1850
1851   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1852     {
1853       op = USE_FROM_PTR (use_p);
1854       record_invariant (data, op, false);
1855     }
1856 }
1857
1858 /* Finds interesting uses of induction variables in the statement STMT.  */
1859
1860 static void
1861 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1862 {
1863   struct iv *iv;
1864   tree op, *lhs, *rhs;
1865   ssa_op_iter iter;
1866   use_operand_p use_p;
1867   enum tree_code code;
1868
1869   find_invariants_stmt (data, stmt);
1870
1871   if (gimple_code (stmt) == GIMPLE_COND)
1872     {
1873       find_interesting_uses_cond (data, stmt);
1874       return;
1875     }
1876
1877   if (is_gimple_assign (stmt))
1878     {
1879       lhs = gimple_assign_lhs_ptr (stmt);
1880       rhs = gimple_assign_rhs1_ptr (stmt);
1881
1882       if (TREE_CODE (*lhs) == SSA_NAME)
1883         {
1884           /* If the statement defines an induction variable, the uses are not
1885              interesting by themselves.  */
1886
1887           iv = get_iv (data, *lhs);
1888
1889           if (iv && !integer_zerop (iv->step))
1890             return;
1891         }
1892
1893       code = gimple_assign_rhs_code (stmt);
1894       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1895           && (REFERENCE_CLASS_P (*rhs)
1896               || is_gimple_val (*rhs)))
1897         {
1898           if (REFERENCE_CLASS_P (*rhs))
1899             find_interesting_uses_address (data, stmt, rhs);
1900           else
1901             find_interesting_uses_op (data, *rhs);
1902
1903           if (REFERENCE_CLASS_P (*lhs))
1904             find_interesting_uses_address (data, stmt, lhs);
1905           return;
1906         }
1907       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1908         {
1909           find_interesting_uses_cond (data, stmt);
1910           return;
1911         }
1912
1913       /* TODO -- we should also handle address uses of type
1914
1915          memory = call (whatever);
1916
1917          and
1918
1919          call (memory).  */
1920     }
1921
1922   if (gimple_code (stmt) == GIMPLE_PHI
1923       && gimple_bb (stmt) == data->current_loop->header)
1924     {
1925       iv = get_iv (data, PHI_RESULT (stmt));
1926
1927       if (iv && !integer_zerop (iv->step))
1928         return;
1929     }
1930
1931   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1932     {
1933       op = USE_FROM_PTR (use_p);
1934
1935       if (TREE_CODE (op) != SSA_NAME)
1936         continue;
1937
1938       iv = get_iv (data, op);
1939       if (!iv)
1940         continue;
1941
1942       find_interesting_uses_op (data, op);
1943     }
1944 }
1945
1946 /* Finds interesting uses of induction variables outside of loops
1947    on loop exit edge EXIT.  */
1948
1949 static void
1950 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1951 {
1952   gimple phi;
1953   gimple_stmt_iterator psi;
1954   tree def;
1955
1956   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1957     {
1958       phi = gsi_stmt (psi);
1959       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1960       if (is_gimple_reg (def))
1961         find_interesting_uses_op (data, def);
1962     }
1963 }
1964
1965 /* Finds uses of the induction variables that are interesting.  */
1966
1967 static void
1968 find_interesting_uses (struct ivopts_data *data)
1969 {
1970   basic_block bb;
1971   gimple_stmt_iterator bsi;
1972   basic_block *body = get_loop_body (data->current_loop);
1973   unsigned i;
1974   struct version_info *info;
1975   edge e;
1976
1977   if (dump_file && (dump_flags & TDF_DETAILS))
1978     fprintf (dump_file, "Uses:\n\n");
1979
1980   for (i = 0; i < data->current_loop->num_nodes; i++)
1981     {
1982       edge_iterator ei;
1983       bb = body[i];
1984
1985       FOR_EACH_EDGE (e, ei, bb->succs)
1986         if (e->dest != EXIT_BLOCK_PTR
1987             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1988           find_interesting_uses_outside (data, e);
1989
1990       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1991         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1992       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1993         if (!is_gimple_debug (gsi_stmt (bsi)))
1994           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1995     }
1996
1997   if (dump_file && (dump_flags & TDF_DETAILS))
1998     {
1999       bitmap_iterator bi;
2000
2001       fprintf (dump_file, "\n");
2002
2003       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2004         {
2005           info = ver_info (data, i);
2006           if (info->inv_id)
2007             {
2008               fprintf (dump_file, "  ");
2009               print_generic_expr (dump_file, info->name, TDF_SLIM);
2010               fprintf (dump_file, " is invariant (%d)%s\n",
2011                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2012             }
2013         }
2014
2015       fprintf (dump_file, "\n");
2016     }
2017
2018   free (body);
2019 }
2020
2021 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2022    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2023    we are at the top-level of the processed address.  */
2024
2025 static tree
2026 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2027                 unsigned HOST_WIDE_INT *offset)
2028 {
2029   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2030   enum tree_code code;
2031   tree type, orig_type = TREE_TYPE (expr);
2032   unsigned HOST_WIDE_INT off0, off1, st;
2033   tree orig_expr = expr;
2034
2035   STRIP_NOPS (expr);
2036
2037   type = TREE_TYPE (expr);
2038   code = TREE_CODE (expr);
2039   *offset = 0;
2040
2041   switch (code)
2042     {
2043     case INTEGER_CST:
2044       if (!cst_and_fits_in_hwi (expr)
2045           || integer_zerop (expr))
2046         return orig_expr;
2047
2048       *offset = int_cst_value (expr);
2049       return build_int_cst (orig_type, 0);
2050
2051     case POINTER_PLUS_EXPR:
2052     case PLUS_EXPR:
2053     case MINUS_EXPR:
2054       op0 = TREE_OPERAND (expr, 0);
2055       op1 = TREE_OPERAND (expr, 1);
2056
2057       op0 = strip_offset_1 (op0, false, false, &off0);
2058       op1 = strip_offset_1 (op1, false, false, &off1);
2059
2060       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2061       if (op0 == TREE_OPERAND (expr, 0)
2062           && op1 == TREE_OPERAND (expr, 1))
2063         return orig_expr;
2064
2065       if (integer_zerop (op1))
2066         expr = op0;
2067       else if (integer_zerop (op0))
2068         {
2069           if (code == MINUS_EXPR)
2070             expr = fold_build1 (NEGATE_EXPR, type, op1);
2071           else
2072             expr = op1;
2073         }
2074       else
2075         expr = fold_build2 (code, type, op0, op1);
2076
2077       return fold_convert (orig_type, expr);
2078
2079     case MULT_EXPR:
2080       op1 = TREE_OPERAND (expr, 1);
2081       if (!cst_and_fits_in_hwi (op1))
2082         return orig_expr;
2083
2084       op0 = TREE_OPERAND (expr, 0);
2085       op0 = strip_offset_1 (op0, false, false, &off0);
2086       if (op0 == TREE_OPERAND (expr, 0))
2087         return orig_expr;
2088
2089       *offset = off0 * int_cst_value (op1);
2090       if (integer_zerop (op0))
2091         expr = op0;
2092       else
2093         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2094
2095       return fold_convert (orig_type, expr);
2096
2097     case ARRAY_REF:
2098     case ARRAY_RANGE_REF:
2099       if (!inside_addr)
2100         return orig_expr;
2101
2102       step = array_ref_element_size (expr);
2103       if (!cst_and_fits_in_hwi (step))
2104         break;
2105
2106       st = int_cst_value (step);
2107       op1 = TREE_OPERAND (expr, 1);
2108       op1 = strip_offset_1 (op1, false, false, &off1);
2109       *offset = off1 * st;
2110
2111       if (top_compref
2112           && integer_zerop (op1))
2113         {
2114           /* Strip the component reference completely.  */
2115           op0 = TREE_OPERAND (expr, 0);
2116           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2117           *offset += off0;
2118           return op0;
2119         }
2120       break;
2121
2122     case COMPONENT_REF:
2123       if (!inside_addr)
2124         return orig_expr;
2125
2126       tmp = component_ref_field_offset (expr);
2127       if (top_compref
2128           && cst_and_fits_in_hwi (tmp))
2129         {
2130           /* Strip the component reference completely.  */
2131           op0 = TREE_OPERAND (expr, 0);
2132           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2133           *offset = off0 + int_cst_value (tmp);
2134           return op0;
2135         }
2136       break;
2137
2138     case ADDR_EXPR:
2139       op0 = TREE_OPERAND (expr, 0);
2140       op0 = strip_offset_1 (op0, true, true, &off0);
2141       *offset += off0;
2142
2143       if (op0 == TREE_OPERAND (expr, 0))
2144         return orig_expr;
2145
2146       expr = build_fold_addr_expr (op0);
2147       return fold_convert (orig_type, expr);
2148
2149     case MEM_REF:
2150       /* ???  Offset operand?  */
2151       inside_addr = false;
2152       break;
2153
2154     default:
2155       return orig_expr;
2156     }
2157
2158   /* Default handling of expressions for that we want to recurse into
2159      the first operand.  */
2160   op0 = TREE_OPERAND (expr, 0);
2161   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2162   *offset += off0;
2163
2164   if (op0 == TREE_OPERAND (expr, 0)
2165       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2166     return orig_expr;
2167
2168   expr = copy_node (expr);
2169   TREE_OPERAND (expr, 0) = op0;
2170   if (op1)
2171     TREE_OPERAND (expr, 1) = op1;
2172
2173   /* Inside address, we might strip the top level component references,
2174      thus changing type of the expression.  Handling of ADDR_EXPR
2175      will fix that.  */
2176   expr = fold_convert (orig_type, expr);
2177
2178   return expr;
2179 }
2180
2181 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2182
2183 static tree
2184 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2185 {
2186   return strip_offset_1 (expr, false, false, offset);
2187 }
2188
2189 /* Returns variant of TYPE that can be used as base for different uses.
2190    We return unsigned type with the same precision, which avoids problems
2191    with overflows.  */
2192
2193 static tree
2194 generic_type_for (tree type)
2195 {
2196   if (POINTER_TYPE_P (type))
2197     return unsigned_type_for (type);
2198
2199   if (TYPE_UNSIGNED (type))
2200     return type;
2201
2202   return unsigned_type_for (type);
2203 }
2204
2205 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2206    the bitmap to that we should store it.  */
2207
2208 static struct ivopts_data *fd_ivopts_data;
2209 static tree
2210 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2211 {
2212   bitmap *depends_on = (bitmap *) data;
2213   struct version_info *info;
2214
2215   if (TREE_CODE (*expr_p) != SSA_NAME)
2216     return NULL_TREE;
2217   info = name_info (fd_ivopts_data, *expr_p);
2218
2219   if (!info->inv_id || info->has_nonlin_use)
2220     return NULL_TREE;
2221
2222   if (!*depends_on)
2223     *depends_on = BITMAP_ALLOC (NULL);
2224   bitmap_set_bit (*depends_on, info->inv_id);
2225
2226   return NULL_TREE;
2227 }
2228
2229 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2230    position to POS.  If USE is not NULL, the candidate is set as related to
2231    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2232    replacement of the final value of the iv by a direct computation.  */
2233
2234 static struct iv_cand *
2235 add_candidate_1 (struct ivopts_data *data,
2236                  tree base, tree step, bool important, enum iv_position pos,
2237                  struct iv_use *use, gimple incremented_at)
2238 {
2239   unsigned i;
2240   struct iv_cand *cand = NULL;
2241   tree type, orig_type;
2242
2243   /* For non-original variables, make sure their values are computed in a type
2244      that does not invoke undefined behavior on overflows (since in general,
2245      we cannot prove that these induction variables are non-wrapping).  */
2246   if (pos != IP_ORIGINAL)
2247     {
2248       orig_type = TREE_TYPE (base);
2249       type = generic_type_for (orig_type);
2250       if (type != orig_type)
2251         {
2252           base = fold_convert (type, base);
2253           step = fold_convert (type, step);
2254         }
2255     }
2256
2257   for (i = 0; i < n_iv_cands (data); i++)
2258     {
2259       cand = iv_cand (data, i);
2260
2261       if (cand->pos != pos)
2262         continue;
2263
2264       if (cand->incremented_at != incremented_at
2265           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2266               && cand->ainc_use != use))
2267         continue;
2268
2269       if (!cand->iv)
2270         {
2271           if (!base && !step)
2272             break;
2273
2274           continue;
2275         }
2276
2277       if (!base && !step)
2278         continue;
2279
2280       if (operand_equal_p (base, cand->iv->base, 0)
2281           && operand_equal_p (step, cand->iv->step, 0)
2282           && (TYPE_PRECISION (TREE_TYPE (base))
2283               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2284         break;
2285     }
2286
2287   if (i == n_iv_cands (data))
2288     {
2289       cand = XCNEW (struct iv_cand);
2290       cand->id = i;
2291
2292       if (!base && !step)
2293         cand->iv = NULL;
2294       else
2295         cand->iv = alloc_iv (base, step);
2296
2297       cand->pos = pos;
2298       if (pos != IP_ORIGINAL && cand->iv)
2299         {
2300           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2301           cand->var_after = cand->var_before;
2302         }
2303       cand->important = important;
2304       cand->incremented_at = incremented_at;
2305       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2306
2307       if (step
2308           && TREE_CODE (step) != INTEGER_CST)
2309         {
2310           fd_ivopts_data = data;
2311           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2312         }
2313
2314       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2315         cand->ainc_use = use;
2316       else
2317         cand->ainc_use = NULL;
2318
2319       if (dump_file && (dump_flags & TDF_DETAILS))
2320         dump_cand (dump_file, cand);
2321     }
2322
2323   if (important && !cand->important)
2324     {
2325       cand->important = true;
2326       if (dump_file && (dump_flags & TDF_DETAILS))
2327         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2328     }
2329
2330   if (use)
2331     {
2332       bitmap_set_bit (use->related_cands, i);
2333       if (dump_file && (dump_flags & TDF_DETAILS))
2334         fprintf (dump_file, "Candidate %d is related to use %d\n",
2335                  cand->id, use->id);
2336     }
2337
2338   return cand;
2339 }
2340
2341 /* Returns true if incrementing the induction variable at the end of the LOOP
2342    is allowed.
2343
2344    The purpose is to avoid splitting latch edge with a biv increment, thus
2345    creating a jump, possibly confusing other optimization passes and leaving
2346    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2347    is not available (so we do not have a better alternative), or if the latch
2348    edge is already nonempty.  */
2349
2350 static bool
2351 allow_ip_end_pos_p (struct loop *loop)
2352 {
2353   if (!ip_normal_pos (loop))
2354     return true;
2355
2356   if (!empty_block_p (ip_end_pos (loop)))
2357     return true;
2358
2359   return false;
2360 }
2361
2362 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2363    Important field is set to IMPORTANT.  */
2364
2365 static void
2366 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2367                         bool important, struct iv_use *use)
2368 {
2369   basic_block use_bb = gimple_bb (use->stmt);
2370   enum machine_mode mem_mode;
2371   unsigned HOST_WIDE_INT cstepi;
2372
2373   /* If we insert the increment in any position other than the standard
2374      ones, we must ensure that it is incremented once per iteration.
2375      It must not be in an inner nested loop, or one side of an if
2376      statement.  */
2377   if (use_bb->loop_father != data->current_loop
2378       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2379       || stmt_could_throw_p (use->stmt)
2380       || !cst_and_fits_in_hwi (step))
2381     return;
2382
2383   cstepi = int_cst_value (step);
2384
2385   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2386   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2387         || USE_STORE_PRE_INCREMENT (mem_mode))
2388        && GET_MODE_SIZE (mem_mode) == cstepi)
2389       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2390            || USE_STORE_PRE_DECREMENT (mem_mode))
2391           && GET_MODE_SIZE (mem_mode) == -cstepi))
2392     {
2393       enum tree_code code = MINUS_EXPR;
2394       tree new_base;
2395       tree new_step = step;
2396
2397       if (POINTER_TYPE_P (TREE_TYPE (base)))
2398         {
2399           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2400           code = POINTER_PLUS_EXPR;
2401         }
2402       else
2403         new_step = fold_convert (TREE_TYPE (base), new_step);
2404       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2405       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2406                        use->stmt);
2407     }
2408   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2409         || USE_STORE_POST_INCREMENT (mem_mode))
2410        && GET_MODE_SIZE (mem_mode) == cstepi)
2411       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2412            || USE_STORE_POST_DECREMENT (mem_mode))
2413           && GET_MODE_SIZE (mem_mode) == -cstepi))
2414     {
2415       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2416                        use->stmt);
2417     }
2418 }
2419
2420 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2421    position to POS.  If USE is not NULL, the candidate is set as related to
2422    it.  The candidate computation is scheduled on all available positions.  */
2423
2424 static void
2425 add_candidate (struct ivopts_data *data,
2426                tree base, tree step, bool important, struct iv_use *use)
2427 {
2428   if (ip_normal_pos (data->current_loop))
2429     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2430   if (ip_end_pos (data->current_loop)
2431       && allow_ip_end_pos_p (data->current_loop))
2432     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2433
2434   if (use != NULL && use->type == USE_ADDRESS)
2435     add_autoinc_candidates (data, base, step, important, use);
2436 }
2437
2438 /* Adds standard iv candidates.  */
2439
2440 static void
2441 add_standard_iv_candidates (struct ivopts_data *data)
2442 {
2443   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2444
2445   /* The same for a double-integer type if it is still fast enough.  */
2446   if (TYPE_PRECISION
2447         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2448       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2449     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2450                    build_int_cst (long_integer_type_node, 1), true, NULL);
2451
2452   /* The same for a double-integer type if it is still fast enough.  */
2453   if (TYPE_PRECISION
2454         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2455       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2456     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2457                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2458 }
2459
2460
2461 /* Adds candidates bases on the old induction variable IV.  */
2462
2463 static void
2464 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2465 {
2466   gimple phi;
2467   tree def;
2468   struct iv_cand *cand;
2469
2470   add_candidate (data, iv->base, iv->step, true, NULL);
2471
2472   /* The same, but with initial value zero.  */
2473   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2474     add_candidate (data, size_int (0), iv->step, true, NULL);
2475   else
2476     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2477                    iv->step, true, NULL);
2478
2479   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2480   if (gimple_code (phi) == GIMPLE_PHI)
2481     {
2482       /* Additionally record the possibility of leaving the original iv
2483          untouched.  */
2484       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2485       cand = add_candidate_1 (data,
2486                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2487                               SSA_NAME_DEF_STMT (def));
2488       cand->var_before = iv->ssa_name;
2489       cand->var_after = def;
2490     }
2491 }
2492
2493 /* Adds candidates based on the old induction variables.  */
2494
2495 static void
2496 add_old_ivs_candidates (struct ivopts_data *data)
2497 {
2498   unsigned i;
2499   struct iv *iv;
2500   bitmap_iterator bi;
2501
2502   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2503     {
2504       iv = ver_info (data, i)->iv;
2505       if (iv && iv->biv_p && !integer_zerop (iv->step))
2506         add_old_iv_candidates (data, iv);
2507     }
2508 }
2509
2510 /* Adds candidates based on the value of the induction variable IV and USE.  */
2511
2512 static void
2513 add_iv_value_candidates (struct ivopts_data *data,
2514                          struct iv *iv, struct iv_use *use)
2515 {
2516   unsigned HOST_WIDE_INT offset;
2517   tree base;
2518   tree basetype;
2519
2520   add_candidate (data, iv->base, iv->step, false, use);
2521
2522   /* The same, but with initial value zero.  Make such variable important,
2523      since it is generic enough so that possibly many uses may be based
2524      on it.  */
2525   basetype = TREE_TYPE (iv->base);
2526   if (POINTER_TYPE_P (basetype))
2527     basetype = sizetype;
2528   add_candidate (data, build_int_cst (basetype, 0),
2529                  iv->step, true, use);
2530
2531   /* Third, try removing the constant offset.  Make sure to even
2532      add a candidate for &a[0] vs. (T *)&a.  */
2533   base = strip_offset (iv->base, &offset);
2534   if (offset
2535       || base != iv->base)
2536     add_candidate (data, base, iv->step, false, use);
2537 }
2538
2539 /* Adds candidates based on the uses.  */
2540
2541 static void
2542 add_derived_ivs_candidates (struct ivopts_data *data)
2543 {
2544   unsigned i;
2545
2546   for (i = 0; i < n_iv_uses (data); i++)
2547     {
2548       struct iv_use *use = iv_use (data, i);
2549
2550       if (!use)
2551         continue;
2552
2553       switch (use->type)
2554         {
2555         case USE_NONLINEAR_EXPR:
2556         case USE_COMPARE:
2557         case USE_ADDRESS:
2558           /* Just add the ivs based on the value of the iv used here.  */
2559           add_iv_value_candidates (data, use->iv, use);
2560           break;
2561
2562         default:
2563           gcc_unreachable ();
2564         }
2565     }
2566 }
2567
2568 /* Record important candidates and add them to related_cands bitmaps
2569    if needed.  */
2570
2571 static void
2572 record_important_candidates (struct ivopts_data *data)
2573 {
2574   unsigned i;
2575   struct iv_use *use;
2576
2577   for (i = 0; i < n_iv_cands (data); i++)
2578     {
2579       struct iv_cand *cand = iv_cand (data, i);
2580
2581       if (cand->important)
2582         bitmap_set_bit (data->important_candidates, i);
2583     }
2584
2585   data->consider_all_candidates = (n_iv_cands (data)
2586                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2587
2588   if (data->consider_all_candidates)
2589     {
2590       /* We will not need "related_cands" bitmaps in this case,
2591          so release them to decrease peak memory consumption.  */
2592       for (i = 0; i < n_iv_uses (data); i++)
2593         {
2594           use = iv_use (data, i);
2595           BITMAP_FREE (use->related_cands);
2596         }
2597     }
2598   else
2599     {
2600       /* Add important candidates to the related_cands bitmaps.  */
2601       for (i = 0; i < n_iv_uses (data); i++)
2602         bitmap_ior_into (iv_use (data, i)->related_cands,
2603                          data->important_candidates);
2604     }
2605 }
2606
2607 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2608    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2609    we allocate a simple list to every use.  */
2610
2611 static void
2612 alloc_use_cost_map (struct ivopts_data *data)
2613 {
2614   unsigned i, size, s, j;
2615
2616   for (i = 0; i < n_iv_uses (data); i++)
2617     {
2618       struct iv_use *use = iv_use (data, i);
2619       bitmap_iterator bi;
2620
2621       if (data->consider_all_candidates)
2622         size = n_iv_cands (data);
2623       else
2624         {
2625           s = 0;
2626           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2627             {
2628               s++;
2629             }
2630
2631           /* Round up to the power of two, so that moduling by it is fast.  */
2632           for (size = 1; size < s; size <<= 1)
2633             continue;
2634         }
2635
2636       use->n_map_members = size;
2637       use->cost_map = XCNEWVEC (struct cost_pair, size);
2638     }
2639 }
2640
2641 /* Returns description of computation cost of expression whose runtime
2642    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2643
2644 static comp_cost
2645 new_cost (unsigned runtime, unsigned complexity)
2646 {
2647   comp_cost cost;
2648
2649   cost.cost = runtime;
2650   cost.complexity = complexity;
2651
2652   return cost;
2653 }
2654
2655 /* Adds costs COST1 and COST2.  */
2656
2657 static comp_cost
2658 add_costs (comp_cost cost1, comp_cost cost2)
2659 {
2660   cost1.cost += cost2.cost;
2661   cost1.complexity += cost2.complexity;
2662
2663   return cost1;
2664 }
2665 /* Subtracts costs COST1 and COST2.  */
2666
2667 static comp_cost
2668 sub_costs (comp_cost cost1, comp_cost cost2)
2669 {
2670   cost1.cost -= cost2.cost;
2671   cost1.complexity -= cost2.complexity;
2672
2673   return cost1;
2674 }
2675
2676 /* Returns a negative number if COST1 < COST2, a positive number if
2677    COST1 > COST2, and 0 if COST1 = COST2.  */
2678
2679 static int
2680 compare_costs (comp_cost cost1, comp_cost cost2)
2681 {
2682   if (cost1.cost == cost2.cost)
2683     return cost1.complexity - cost2.complexity;
2684
2685   return cost1.cost - cost2.cost;
2686 }
2687
2688 /* Returns true if COST is infinite.  */
2689
2690 static bool
2691 infinite_cost_p (comp_cost cost)
2692 {
2693   return cost.cost == INFTY;
2694 }
2695
2696 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2697    on invariants DEPENDS_ON and that the value used in expressing it
2698    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2699
2700 static void
2701 set_use_iv_cost (struct ivopts_data *data,
2702                  struct iv_use *use, struct iv_cand *cand,
2703                  comp_cost cost, bitmap depends_on, tree value,
2704                  enum tree_code comp, int inv_expr_id)
2705 {
2706   unsigned i, s;
2707
2708   if (infinite_cost_p (cost))
2709     {
2710       BITMAP_FREE (depends_on);
2711       return;
2712     }
2713
2714   if (data->consider_all_candidates)
2715     {
2716       use->cost_map[cand->id].cand = cand;
2717       use->cost_map[cand->id].cost = cost;
2718       use->cost_map[cand->id].depends_on = depends_on;
2719       use->cost_map[cand->id].value = value;
2720       use->cost_map[cand->id].comp = comp;
2721       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2722       return;
2723     }
2724
2725   /* n_map_members is a power of two, so this computes modulo.  */
2726   s = cand->id & (use->n_map_members - 1);
2727   for (i = s; i < use->n_map_members; i++)
2728     if (!use->cost_map[i].cand)
2729       goto found;
2730   for (i = 0; i < s; i++)
2731     if (!use->cost_map[i].cand)
2732       goto found;
2733
2734   gcc_unreachable ();
2735
2736 found:
2737   use->cost_map[i].cand = cand;
2738   use->cost_map[i].cost = cost;
2739   use->cost_map[i].depends_on = depends_on;
2740   use->cost_map[i].value = value;
2741   use->cost_map[i].comp = comp;
2742   use->cost_map[i].inv_expr_id = inv_expr_id;
2743 }
2744
2745 /* Gets cost of (USE, CANDIDATE) pair.  */
2746
2747 static struct cost_pair *
2748 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2749                  struct iv_cand *cand)
2750 {
2751   unsigned i, s;
2752   struct cost_pair *ret;
2753
2754   if (!cand)
2755     return NULL;
2756
2757   if (data->consider_all_candidates)
2758     {
2759       ret = use->cost_map + cand->id;
2760       if (!ret->cand)
2761         return NULL;
2762
2763       return ret;
2764     }
2765
2766   /* n_map_members is a power of two, so this computes modulo.  */
2767   s = cand->id & (use->n_map_members - 1);
2768   for (i = s; i < use->n_map_members; i++)
2769     if (use->cost_map[i].cand == cand)
2770       return use->cost_map + i;
2771
2772   for (i = 0; i < s; i++)
2773     if (use->cost_map[i].cand == cand)
2774       return use->cost_map + i;
2775
2776   return NULL;
2777 }
2778
2779 /* Returns estimate on cost of computing SEQ.  */
2780
2781 static unsigned
2782 seq_cost (rtx seq, bool speed)
2783 {
2784   unsigned cost = 0;
2785   rtx set;
2786
2787   for (; seq; seq = NEXT_INSN (seq))
2788     {
2789       set = single_set (seq);
2790       if (set)
2791         cost += set_src_cost (SET_SRC (set), speed);
2792       else
2793         cost++;
2794     }
2795
2796   return cost;
2797 }
2798
2799 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2800 static rtx
2801 produce_memory_decl_rtl (tree obj, int *regno)
2802 {
2803   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2804   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2805   rtx x;
2806
2807   gcc_assert (obj);
2808   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2809     {
2810       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2811       x = gen_rtx_SYMBOL_REF (address_mode, name);
2812       SET_SYMBOL_REF_DECL (x, obj);
2813       x = gen_rtx_MEM (DECL_MODE (obj), x);
2814       set_mem_addr_space (x, as);
2815       targetm.encode_section_info (obj, x, true);
2816     }
2817   else
2818     {
2819       x = gen_raw_REG (address_mode, (*regno)++);
2820       x = gen_rtx_MEM (DECL_MODE (obj), x);
2821       set_mem_addr_space (x, as);
2822     }
2823
2824   return x;
2825 }
2826
2827 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2828    walk_tree.  DATA contains the actual fake register number.  */
2829
2830 static tree
2831 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2832 {
2833   tree obj = NULL_TREE;
2834   rtx x = NULL_RTX;
2835   int *regno = (int *) data;
2836
2837   switch (TREE_CODE (*expr_p))
2838     {
2839     case ADDR_EXPR:
2840       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2841            handled_component_p (*expr_p);
2842            expr_p = &TREE_OPERAND (*expr_p, 0))
2843         continue;
2844       obj = *expr_p;
2845       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2846         x = produce_memory_decl_rtl (obj, regno);
2847       break;
2848
2849     case SSA_NAME:
2850       *ws = 0;
2851       obj = SSA_NAME_VAR (*expr_p);
2852       if (!DECL_RTL_SET_P (obj))
2853         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2854       break;
2855
2856     case VAR_DECL:
2857     case PARM_DECL:
2858     case RESULT_DECL:
2859       *ws = 0;
2860       obj = *expr_p;
2861
2862       if (DECL_RTL_SET_P (obj))
2863         break;
2864
2865       if (DECL_MODE (obj) == BLKmode)
2866         x = produce_memory_decl_rtl (obj, regno);
2867       else
2868         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2869
2870       break;
2871
2872     default:
2873       break;
2874     }
2875
2876   if (x)
2877     {
2878       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2879       SET_DECL_RTL (obj, x);
2880     }
2881
2882   return NULL_TREE;
2883 }
2884
2885 /* Determines cost of the computation of EXPR.  */
2886
2887 static unsigned
2888 computation_cost (tree expr, bool speed)
2889 {
2890   rtx seq, rslt;
2891   tree type = TREE_TYPE (expr);
2892   unsigned cost;
2893   /* Avoid using hard regs in ways which may be unsupported.  */
2894   int regno = LAST_VIRTUAL_REGISTER + 1;
2895   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2896   enum node_frequency real_frequency = node->frequency;
2897
2898   node->frequency = NODE_FREQUENCY_NORMAL;
2899   crtl->maybe_hot_insn_p = speed;
2900   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2901   start_sequence ();
2902   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2903   seq = get_insns ();
2904   end_sequence ();
2905   default_rtl_profile ();
2906   node->frequency = real_frequency;
2907
2908   cost = seq_cost (seq, speed);
2909   if (MEM_P (rslt))
2910     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2911                           TYPE_ADDR_SPACE (type), speed);
2912   else if (!REG_P (rslt))
2913     cost += set_src_cost (rslt, speed);
2914
2915   return cost;
2916 }
2917
2918 /* Returns variable containing the value of candidate CAND at statement AT.  */
2919
2920 static tree
2921 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2922 {
2923   if (stmt_after_increment (loop, cand, stmt))
2924     return cand->var_after;
2925   else
2926     return cand->var_before;
2927 }
2928
2929 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2930    same precision that is at least as wide as the precision of TYPE, stores
2931    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2932    type of A and B.  */
2933
2934 static tree
2935 determine_common_wider_type (tree *a, tree *b)
2936 {
2937   tree wider_type = NULL;
2938   tree suba, subb;
2939   tree atype = TREE_TYPE (*a);
2940
2941   if (CONVERT_EXPR_P (*a))
2942     {
2943       suba = TREE_OPERAND (*a, 0);
2944       wider_type = TREE_TYPE (suba);
2945       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2946         return atype;
2947     }
2948   else
2949     return atype;
2950
2951   if (CONVERT_EXPR_P (*b))
2952     {
2953       subb = TREE_OPERAND (*b, 0);
2954       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2955         return atype;
2956     }
2957   else
2958     return atype;
2959
2960   *a = suba;
2961   *b = subb;
2962   return wider_type;
2963 }
2964
2965 /* Determines the expression by that USE is expressed from induction variable
2966    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2967    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2968
2969 static bool
2970 get_computation_aff (struct loop *loop,
2971                      struct iv_use *use, struct iv_cand *cand, gimple at,
2972                      struct affine_tree_combination *aff)
2973 {
2974   tree ubase = use->iv->base;
2975   tree ustep = use->iv->step;
2976   tree cbase = cand->iv->base;
2977   tree cstep = cand->iv->step, cstep_common;
2978   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2979   tree common_type, var;
2980   tree uutype;
2981   aff_tree cbase_aff, var_aff;
2982   double_int rat;
2983
2984   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2985     {
2986       /* We do not have a precision to express the values of use.  */
2987       return false;
2988     }
2989
2990   var = var_at_stmt (loop, cand, at);
2991   uutype = unsigned_type_for (utype);
2992
2993   /* If the conversion is not noop, perform it.  */
2994   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2995     {
2996       cstep = fold_convert (uutype, cstep);
2997       cbase = fold_convert (uutype, cbase);
2998       var = fold_convert (uutype, var);
2999     }
3000
3001   if (!constant_multiple_of (ustep, cstep, &rat))
3002     return false;
3003
3004   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3005      type, we achieve better folding by computing their difference in this
3006      wider type, and cast the result to UUTYPE.  We do not need to worry about
3007      overflows, as all the arithmetics will in the end be performed in UUTYPE
3008      anyway.  */
3009   common_type = determine_common_wider_type (&ubase, &cbase);
3010
3011   /* use = ubase - ratio * cbase + ratio * var.  */
3012   tree_to_aff_combination (ubase, common_type, aff);
3013   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3014   tree_to_aff_combination (var, uutype, &var_aff);
3015
3016   /* We need to shift the value if we are after the increment.  */
3017   if (stmt_after_increment (loop, cand, at))
3018     {
3019       aff_tree cstep_aff;
3020
3021       if (common_type != uutype)
3022         cstep_common = fold_convert (common_type, cstep);
3023       else
3024         cstep_common = cstep;
3025
3026       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3027       aff_combination_add (&cbase_aff, &cstep_aff);
3028     }
3029
3030   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3031   aff_combination_add (aff, &cbase_aff);
3032   if (common_type != uutype)
3033     aff_combination_convert (aff, uutype);
3034
3035   aff_combination_scale (&var_aff, rat);
3036   aff_combination_add (aff, &var_aff);
3037
3038   return true;
3039 }
3040
3041 /* Determines the expression by that USE is expressed from induction variable
3042    CAND at statement AT in LOOP.  The computation is unshared.  */
3043
3044 static tree
3045 get_computation_at (struct loop *loop,
3046                     struct iv_use *use, struct iv_cand *cand, gimple at)
3047 {
3048   aff_tree aff;
3049   tree type = TREE_TYPE (use->iv->base);
3050
3051   if (!get_computation_aff (loop, use, cand, at, &aff))
3052     return NULL_TREE;
3053   unshare_aff_combination (&aff);
3054   return fold_convert (type, aff_combination_to_tree (&aff));
3055 }
3056
3057 /* Determines the expression by that USE is expressed from induction variable
3058    CAND in LOOP.  The computation is unshared.  */
3059
3060 static tree
3061 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3062 {
3063   return get_computation_at (loop, use, cand, use->stmt);
3064 }
3065
3066 /* Adjust the cost COST for being in loop setup rather than loop body.
3067    If we're optimizing for space, the loop setup overhead is constant;
3068    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3069 static unsigned
3070 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3071 {
3072   if (cost == INFTY)
3073     return cost;
3074   else if (optimize_loop_for_speed_p (data->current_loop))
3075     return cost / avg_loop_niter (data->current_loop);
3076   else
3077     return cost;
3078 }
3079
3080 /* Returns cost of addition in MODE.  */
3081
3082 unsigned
3083 add_regs_cost (enum machine_mode mode, bool speed)
3084 {
3085   static unsigned costs[NUM_MACHINE_MODES][2];
3086   rtx seq;
3087   unsigned cost;
3088
3089   if (costs[mode][speed])
3090     return costs[mode][speed];
3091
3092   start_sequence ();
3093   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3094                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3095                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3096                  NULL_RTX);
3097   seq = get_insns ();
3098   end_sequence ();
3099
3100   cost = seq_cost (seq, speed);
3101   if (!cost)
3102     cost = 1;
3103
3104   costs[mode][speed] = cost;
3105
3106   if (dump_file && (dump_flags & TDF_DETAILS))
3107     fprintf (dump_file, "Addition in %s costs %d\n",
3108              GET_MODE_NAME (mode), cost);
3109   return cost;
3110 }
3111
3112 /* Returns cost of multiplication in MODE.  */
3113
3114 unsigned
3115 multiply_regs_cost (enum machine_mode mode, bool speed)
3116 {
3117   static unsigned costs[NUM_MACHINE_MODES][2];
3118   rtx seq;
3119   unsigned cost;
3120
3121   if (costs[mode][speed])
3122     return costs[mode][speed];
3123
3124   start_sequence ();
3125   force_operand (gen_rtx_fmt_ee (MULT, mode,
3126                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3127                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3128                  NULL_RTX);
3129   seq = get_insns ();
3130   end_sequence ();
3131
3132   cost = seq_cost (seq, speed);
3133   if (!cost)
3134     cost = 1;
3135
3136   costs[mode][speed] = cost;
3137
3138   if (dump_file && (dump_flags & TDF_DETAILS))
3139     fprintf (dump_file, "Multiplication in %s costs %d\n",
3140              GET_MODE_NAME (mode), cost);
3141   return cost;
3142 }
3143
3144 /* Returns cost of addition with a constant in MODE.  */
3145
3146 unsigned
3147 add_const_cost (enum machine_mode mode, bool speed)
3148 {
3149   static unsigned costs[NUM_MACHINE_MODES][2];
3150   rtx seq;
3151   unsigned cost;
3152
3153   if (costs[mode][speed])
3154     return costs[mode][speed];
3155
3156   /* Arbitrarily generate insns for x + 2, as the exact constant
3157      shouldn't matter.  */
3158   start_sequence ();
3159   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3160                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3161                                  gen_int_mode (2, mode)),
3162                  NULL_RTX);
3163   seq = get_insns ();
3164   end_sequence ();
3165
3166   cost = seq_cost (seq, speed);
3167   if (!cost)
3168     cost = 1;
3169
3170   costs[mode][speed] = cost;
3171
3172   if (dump_file && (dump_flags & TDF_DETAILS))
3173     fprintf (dump_file, "Addition to constant in %s costs %d\n",
3174              GET_MODE_NAME (mode), cost);
3175   return cost;
3176 }
3177
3178 /* Returns cost of extend or truncate in MODE.  */
3179
3180 unsigned
3181 extend_or_trunc_reg_cost (tree type_to, tree type_from, bool speed)
3182 {
3183   static unsigned costs[NUM_MACHINE_MODES][NUM_MACHINE_MODES][2];
3184   rtx seq;
3185   unsigned cost;
3186   enum machine_mode mode_to = TYPE_MODE (type_to);
3187   enum machine_mode mode_from = TYPE_MODE (type_from);
3188   tree size_to = TYPE_SIZE (type_to);
3189   tree size_from = TYPE_SIZE (type_from);
3190   enum rtx_code code;
3191
3192   gcc_assert (TREE_CODE (size_to) == INTEGER_CST
3193               && TREE_CODE (size_from) == INTEGER_CST);
3194
3195   if (costs[mode_to][mode_from][speed])
3196     return costs[mode_to][mode_from][speed];
3197
3198   if (tree_int_cst_lt (size_to, size_from))
3199     code = TRUNCATE;
3200   else if (TYPE_UNSIGNED (type_to))
3201     code = ZERO_EXTEND;
3202   else
3203     code = SIGN_EXTEND;
3204
3205   start_sequence ();
3206   gen_rtx_fmt_e (code, mode_to,
3207                  gen_raw_REG (mode_from, LAST_VIRTUAL_REGISTER + 1));
3208   seq = get_insns ();
3209   end_sequence ();
3210
3211   cost = seq_cost (seq, speed);
3212   if (!cost)
3213     cost = 1;
3214
3215   if (dump_file && (dump_flags & TDF_DETAILS))
3216     fprintf (dump_file, "Conversion from %s to %s costs %d\n",
3217              GET_MODE_NAME (mode_to), GET_MODE_NAME (mode_from), cost);
3218
3219   costs[mode_to][mode_from][speed] = cost;
3220   return cost;
3221 }
3222
3223 /* Returns cost of negation in MODE.  */
3224
3225 unsigned
3226 negate_reg_cost (enum machine_mode mode, bool speed)
3227 {
3228   static unsigned costs[NUM_MACHINE_MODES][2];
3229   rtx seq;
3230   unsigned cost;
3231
3232   if (costs[mode][speed])
3233     return costs[mode][speed];
3234
3235   start_sequence ();
3236   force_operand (gen_rtx_fmt_e (NEG, mode,
3237                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1)),
3238                  NULL_RTX);
3239   seq = get_insns ();
3240   end_sequence ();
3241
3242   cost = seq_cost (seq, speed);
3243   if (!cost)
3244     cost = 1;
3245
3246   costs[mode][speed] = cost;
3247
3248   if (dump_file && (dump_flags & TDF_DETAILS))
3249     fprintf (dump_file, "Negation in %s costs %d\n",
3250              GET_MODE_NAME (mode), cost);
3251   return cost;
3252 }
3253
3254 /* Entry in a hashtable of already known costs for multiplication.  */
3255 struct mbc_entry
3256 {
3257   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3258   enum machine_mode mode;       /* In mode.  */
3259   unsigned cost;                /* The cost.  */
3260 };
3261
3262 /* Counts hash value for the ENTRY.  */
3263
3264 static hashval_t
3265 mbc_entry_hash (const void *entry)
3266 {
3267   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3268
3269   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3270 }
3271
3272 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3273
3274 static int
3275 mbc_entry_eq (const void *entry1, const void *entry2)
3276 {
3277   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3278   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3279
3280   return (e1->mode == e2->mode
3281           && e1->cst == e2->cst);
3282 }
3283
3284 /* Returns cost of multiplication by constant CST in MODE.  */
3285
3286 unsigned
3287 multiply_by_const_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3288 {
3289   struct mbc_entry **cached, act;
3290   rtx seq;
3291   unsigned cost;
3292
3293   gcc_assert (cost_tables_exist);
3294
3295   act.mode = mode;
3296   act.cst = cst;
3297   cached = (struct mbc_entry **)
3298     htab_find_slot (mult_costs[speed], &act, INSERT);
3299
3300   if (*cached)
3301     return (*cached)->cost;
3302
3303   *cached = XNEW (struct mbc_entry);
3304   (*cached)->mode = mode;
3305   (*cached)->cst = cst;
3306
3307   start_sequence ();
3308   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3309                gen_int_mode (cst, mode), NULL_RTX, 0);
3310   seq = get_insns ();
3311   end_sequence ();
3312
3313   cost = seq_cost (seq, speed);
3314
3315   if (dump_file && (dump_flags & TDF_DETAILS))
3316     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3317              (int) cst, GET_MODE_NAME (mode), cost);
3318
3319   (*cached)->cost = cost;
3320
3321   return cost;
3322 }
3323
3324 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3325    validity for a memory reference accessing memory of mode MODE in
3326    address space AS.  */
3327
3328 DEF_VEC_P (sbitmap);
3329 DEF_VEC_ALLOC_P (sbitmap, heap);
3330
3331 bool
3332 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3333                                  addr_space_t as)
3334 {
3335 #define MAX_RATIO 128
3336   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3337   static VEC (sbitmap, heap) *valid_mult_list;
3338   sbitmap valid_mult;
3339
3340   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3341     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3342
3343   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3344   if (!valid_mult)
3345     {
3346       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3347       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3348       rtx addr;
3349       HOST_WIDE_INT i;
3350
3351       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3352       sbitmap_zero (valid_mult);
3353       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3354       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3355         {
3356           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3357           if (memory_address_addr_space_p (mode, addr, as))
3358             SET_BIT (valid_mult, i + MAX_RATIO);
3359         }
3360
3361       if (dump_file && (dump_flags & TDF_DETAILS))
3362         {
3363           fprintf (dump_file, "  allowed multipliers:");
3364           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3365             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3366               fprintf (dump_file, " %d", (int) i);
3367           fprintf (dump_file, "\n");
3368           fprintf (dump_file, "\n");
3369         }
3370
3371       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3372     }
3373
3374   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3375     return false;
3376
3377   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3378 }
3379
3380 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3381    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3382    variable is omitted.  Compute the cost for a memory reference that accesses
3383    a memory location of mode MEM_MODE in address space AS.
3384
3385    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3386    size of MEM_MODE / RATIO) is available.  To make this determination, we
3387    look at the size of the increment to be made, which is given in CSTEP.
3388    CSTEP may be zero if the step is unknown.
3389    STMT_AFTER_INC is true iff the statement we're looking at is after the
3390    increment of the original biv.
3391
3392    TODO -- there must be some better way.  This all is quite crude.  */
3393
3394 typedef struct
3395 {
3396   HOST_WIDE_INT min_offset, max_offset;
3397   unsigned costs[2][2][2][2];
3398 } *address_cost_data;
3399
3400 DEF_VEC_P (address_cost_data);
3401 DEF_VEC_ALLOC_P (address_cost_data, heap);
3402
3403 static comp_cost
3404 get_address_cost (bool symbol_present, bool var_present,
3405                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3406                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3407                   addr_space_t as, bool speed,
3408                   bool stmt_after_inc, bool *may_autoinc)
3409 {
3410   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3411   static VEC(address_cost_data, heap) *address_cost_data_list;
3412   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3413   address_cost_data data;
3414   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3415   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3416   unsigned cost, acost, complexity;
3417   bool offset_p, ratio_p, autoinc;
3418   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3419   unsigned HOST_WIDE_INT mask;
3420   unsigned bits;
3421
3422   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3423     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3424                            data_index + 1);
3425
3426   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3427   if (!data)
3428     {
3429       HOST_WIDE_INT i;
3430       HOST_WIDE_INT rat, off = 0;
3431       int old_cse_not_expected, width;
3432       unsigned sym_p, var_p, off_p, rat_p, add_c;
3433       rtx seq, addr, base;
3434       rtx reg0, reg1;
3435
3436       data = (address_cost_data) xcalloc (1, sizeof (*data));
3437
3438       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3439
3440       width = GET_MODE_BITSIZE (address_mode) - 1;
3441       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3442         width = HOST_BITS_PER_WIDE_INT - 1;
3443       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3444
3445       for (i = width; i >= 0; i--)
3446         {
3447           off = -((HOST_WIDE_INT) 1 << i);
3448           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3449           if (memory_address_addr_space_p (mem_mode, addr, as))
3450             break;
3451         }
3452       data->min_offset = (i == -1? 0 : off);
3453
3454       for (i = width; i >= 0; i--)
3455         {
3456           off = ((HOST_WIDE_INT) 1 << i) - 1;
3457           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3458           if (memory_address_addr_space_p (mem_mode, addr, as))
3459             break;
3460         }
3461       if (i == -1)
3462         off = 0;
3463       data->max_offset = off;
3464
3465       if (dump_file && (dump_flags & TDF_DETAILS))
3466         {
3467           fprintf (dump_file, "get_address_cost:\n");
3468           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3469                    GET_MODE_NAME (mem_mode),
3470                    data->min_offset);
3471           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3472                    GET_MODE_NAME (mem_mode),
3473                    data->max_offset);
3474         }
3475
3476       rat = 1;
3477       for (i = 2; i <= MAX_RATIO; i++)
3478         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3479           {
3480             rat = i;
3481             break;
3482           }
3483
3484       /* Compute the cost of various addressing modes.  */
3485       acost = 0;
3486       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3487       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3488
3489       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3490           || USE_STORE_PRE_DECREMENT (mem_mode))
3491         {
3492           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3493           has_predec[mem_mode]
3494             = memory_address_addr_space_p (mem_mode, addr, as);
3495         }
3496       if (USE_LOAD_POST_DECREMENT (mem_mode)
3497           || USE_STORE_POST_DECREMENT (mem_mode))
3498         {
3499           addr = gen_rtx_POST_DEC (address_mode, reg0);
3500           has_postdec[mem_mode]
3501             = memory_address_addr_space_p (mem_mode, addr, as);
3502         }
3503       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3504           || USE_STORE_PRE_DECREMENT (mem_mode))
3505         {
3506           addr = gen_rtx_PRE_INC (address_mode, reg0);
3507           has_preinc[mem_mode]
3508             = memory_address_addr_space_p (mem_mode, addr, as);
3509         }
3510       if (USE_LOAD_POST_INCREMENT (mem_mode)
3511           || USE_STORE_POST_INCREMENT (mem_mode))
3512         {
3513           addr = gen_rtx_POST_INC (address_mode, reg0);
3514           has_postinc[mem_mode]
3515             = memory_address_addr_space_p (mem_mode, addr, as);
3516         }
3517       for (i = 0; i < 16; i++)
3518         {
3519           sym_p = i & 1;
3520           var_p = (i >> 1) & 1;
3521           off_p = (i >> 2) & 1;
3522           rat_p = (i >> 3) & 1;
3523
3524           addr = reg0;
3525           if (rat_p)
3526             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3527                                    gen_int_mode (rat, address_mode));
3528
3529           if (var_p)
3530             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3531
3532           if (sym_p)
3533             {
3534               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3535               /* ??? We can run into trouble with some backends by presenting
3536                  it with symbols which haven't been properly passed through
3537                  targetm.encode_section_info.  By setting the local bit, we
3538                  enhance the probability of things working.  */
3539               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3540
3541               if (off_p)
3542                 base = gen_rtx_fmt_e (CONST, address_mode,
3543                                       gen_rtx_fmt_ee
3544                                         (PLUS, address_mode, base,
3545                                          gen_int_mode (off, address_mode)));
3546             }
3547           else if (off_p)
3548             base = gen_int_mode (off, address_mode);
3549           else
3550             base = NULL_RTX;
3551
3552           if (base)
3553             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3554
3555           start_sequence ();
3556           /* To avoid splitting addressing modes, pretend that no cse will
3557              follow.  */
3558           old_cse_not_expected = cse_not_expected;
3559           cse_not_expected = true;
3560           addr = memory_address_addr_space (mem_mode, addr, as);
3561           cse_not_expected = old_cse_not_expected;
3562           seq = get_insns ();
3563           end_sequence ();
3564
3565           acost = seq_cost (seq, speed);
3566           acost += address_cost (addr, mem_mode, as, speed);
3567
3568           if (!acost)
3569             acost = 1;
3570           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3571         }
3572
3573       /* On some targets, it is quite expensive to load symbol to a register,
3574          which makes addresses that contain symbols look much more expensive.
3575          However, the symbol will have to be loaded in any case before the
3576          loop (and quite likely we have it in register already), so it does not
3577          make much sense to penalize them too heavily.  So make some final
3578          tweaks for the SYMBOL_PRESENT modes:
3579
3580          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3581          var is cheaper, use this mode with small penalty.
3582          If VAR_PRESENT is true, try whether the mode with
3583          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3584          if this is the case, use it.  */
3585       add_c = add_regs_cost (address_mode, speed);
3586       for (i = 0; i < 8; i++)
3587         {
3588           var_p = i & 1;
3589           off_p = (i >> 1) & 1;
3590           rat_p = (i >> 2) & 1;
3591
3592           acost = data->costs[0][1][off_p][rat_p] + 1;
3593           if (var_p)
3594             acost += add_c;
3595
3596           if (acost < data->costs[1][var_p][off_p][rat_p])
3597             data->costs[1][var_p][off_p][rat_p] = acost;
3598         }
3599
3600       if (dump_file && (dump_flags & TDF_DETAILS))
3601         {
3602           fprintf (dump_file, "Address costs:\n");
3603
3604           for (i = 0; i < 16; i++)
3605             {
3606               sym_p = i & 1;
3607               var_p = (i >> 1) & 1;
3608               off_p = (i >> 2) & 1;
3609               rat_p = (i >> 3) & 1;
3610
3611               fprintf (dump_file, "  ");
3612               if (sym_p)
3613                 fprintf (dump_file, "sym + ");
3614               if (var_p)
3615                 fprintf (dump_file, "var + ");
3616               if (off_p)
3617                 fprintf (dump_file, "cst + ");
3618               if (rat_p)
3619                 fprintf (dump_file, "rat * ");
3620
3621               acost = data->costs[sym_p][var_p][off_p][rat_p];
3622               fprintf (dump_file, "index costs %d\n", acost);
3623             }
3624           if (has_predec[mem_mode] || has_postdec[mem_mode]
3625               || has_preinc[mem_mode] || has_postinc[mem_mode])
3626             fprintf (dump_file, "  May include autoinc/dec\n");
3627           fprintf (dump_file, "\n");
3628         }
3629
3630       VEC_replace (address_cost_data, address_cost_data_list,
3631                    data_index, data);
3632     }
3633
3634   bits = GET_MODE_BITSIZE (address_mode);
3635   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3636   offset &= mask;
3637   if ((offset >> (bits - 1) & 1))
3638     offset |= ~mask;
3639   s_offset = offset;
3640
3641   autoinc = false;
3642   msize = GET_MODE_SIZE (mem_mode);
3643   autoinc_offset = offset;
3644   if (stmt_after_inc)
3645     autoinc_offset += ratio * cstep;
3646   if (symbol_present || var_present || ratio != 1)
3647     autoinc = false;
3648   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3649                && msize == cstep)
3650            || (has_postdec[mem_mode] && autoinc_offset == 0
3651                && msize == -cstep)
3652            || (has_preinc[mem_mode] && autoinc_offset == msize
3653                && msize == cstep)
3654            || (has_predec[mem_mode] && autoinc_offset == -msize
3655                && msize == -cstep))
3656     autoinc = true;
3657
3658   cost = 0;
3659   offset_p = (s_offset != 0
3660               && data->min_offset <= s_offset
3661               && s_offset <= data->max_offset);
3662   ratio_p = (ratio != 1
3663              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3664
3665   if (ratio != 1 && !ratio_p)
3666     cost += multiply_by_const_cost (ratio, address_mode, speed);
3667
3668   if (s_offset && !offset_p && !symbol_present)
3669     cost += add_regs_cost (address_mode, speed);
3670
3671   if (may_autoinc)
3672     *may_autoinc = autoinc;
3673   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3674   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3675   return new_cost (cost + acost, complexity);
3676 }
3677
3678  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3679     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3680     calculating the operands of EXPR.  Returns true if successful, and returns
3681     the cost in COST.  */
3682
3683 static bool
3684 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3685                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3686 {
3687   comp_cost res;
3688   tree op1 = TREE_OPERAND (expr, 1);
3689   tree cst = TREE_OPERAND (mult, 1);
3690   tree multop = TREE_OPERAND (mult, 0);
3691   int m = exact_log2 (int_cst_value (cst));
3692   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3693   int sa_cost;
3694
3695   if (!(m >= 0 && m < maxm))
3696     return false;
3697
3698   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3699              ? shiftadd_cost[speed][mode][m]
3700              : (mult == op1
3701                 ? shiftsub1_cost[speed][mode][m]
3702                 : shiftsub0_cost[speed][mode][m]));
3703   res = new_cost (sa_cost, 0);
3704   res = add_costs (res, mult == op1 ? cost0 : cost1);
3705
3706   STRIP_NOPS (multop);
3707   if (!is_gimple_val (multop))
3708     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3709
3710   *cost = res;
3711   return true;
3712 }
3713
3714 /* Estimates cost of forcing expression EXPR into a variable.  */
3715
3716 static comp_cost
3717 force_expr_to_var_cost (tree expr, bool speed)
3718 {
3719   static bool costs_initialized = false;
3720   static unsigned integer_cost [2];
3721   static unsigned symbol_cost [2];
3722   static unsigned address_cost [2];
3723   tree op0, op1;
3724   comp_cost cost0, cost1, cost;
3725   enum machine_mode mode;
3726
3727   if (!costs_initialized)
3728     {
3729       tree type = build_pointer_type (integer_type_node);
3730       tree var, addr;
3731       rtx x;
3732       int i;
3733
3734       var = create_tmp_var_raw (integer_type_node, "test_var");
3735       TREE_STATIC (var) = 1;
3736       x = produce_memory_decl_rtl (var, NULL);
3737       SET_DECL_RTL (var, x);
3738
3739       addr = build1 (ADDR_EXPR, type, var);
3740
3741
3742       for (i = 0; i < 2; i++)
3743         {
3744           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3745                                                              2000), i);
3746
3747           symbol_cost[i] = computation_cost (addr, i) + 1;
3748
3749           address_cost[i]
3750             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3751           if (dump_file && (dump_flags & TDF_DETAILS))
3752             {
3753               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3754               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3755               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3756               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3757               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3758               fprintf (dump_file, "\n");
3759             }
3760         }
3761
3762       costs_initialized = true;
3763     }
3764
3765   STRIP_NOPS (expr);
3766
3767   if (SSA_VAR_P (expr))
3768     return no_cost;
3769
3770   if (is_gimple_min_invariant (expr))
3771     {
3772       if (TREE_CODE (expr) == INTEGER_CST)
3773         return new_cost (integer_cost [speed], 0);
3774
3775       if (TREE_CODE (expr) == ADDR_EXPR)
3776         {
3777           tree obj = TREE_OPERAND (expr, 0);
3778
3779           if (TREE_CODE (obj) == VAR_DECL
3780               || TREE_CODE (obj) == PARM_DECL
3781               || TREE_CODE (obj) == RESULT_DECL)
3782             return new_cost (symbol_cost [speed], 0);
3783         }
3784
3785       return new_cost (address_cost [speed], 0);
3786     }
3787
3788   switch (TREE_CODE (expr))
3789     {
3790     case POINTER_PLUS_EXPR:
3791     case PLUS_EXPR:
3792     case MINUS_EXPR:
3793     case MULT_EXPR:
3794       op0 = TREE_OPERAND (expr, 0);
3795       op1 = TREE_OPERAND (expr, 1);
3796       STRIP_NOPS (op0);
3797       STRIP_NOPS (op1);
3798
3799       if (is_gimple_val (op0))
3800         cost0 = no_cost;
3801       else
3802         cost0 = force_expr_to_var_cost (op0, speed);
3803
3804       if (is_gimple_val (op1))
3805         cost1 = no_cost;
3806       else
3807         cost1 = force_expr_to_var_cost (op1, speed);
3808
3809       break;
3810
3811     case NEGATE_EXPR:
3812       op0 = TREE_OPERAND (expr, 0);
3813       STRIP_NOPS (op0);
3814       op1 = NULL_TREE;
3815
3816       if (is_gimple_val (op0))
3817         cost0 = no_cost;
3818       else
3819         cost0 = force_expr_to_var_cost (op0, speed);
3820
3821       cost1 = no_cost;
3822       break;
3823
3824     default:
3825       /* Just an arbitrary value, FIXME.  */
3826       return new_cost (target_spill_cost[speed], 0);
3827     }
3828
3829   mode = TYPE_MODE (TREE_TYPE (expr));
3830   switch (TREE_CODE (expr))
3831     {
3832     case POINTER_PLUS_EXPR:
3833     case PLUS_EXPR:
3834     case MINUS_EXPR:
3835     case NEGATE_EXPR:
3836       cost = new_cost (add_regs_cost (mode, speed), 0);
3837       if (TREE_CODE (expr) != NEGATE_EXPR)
3838         {
3839           tree mult = NULL_TREE;
3840           comp_cost sa_cost;
3841           if (TREE_CODE (op1) == MULT_EXPR)
3842             mult = op1;
3843           else if (TREE_CODE (op0) == MULT_EXPR)
3844             mult = op0;
3845
3846           if (mult != NULL_TREE
3847               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3848               && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3849                                     &sa_cost))
3850             return sa_cost;
3851         }
3852       break;
3853
3854     case MULT_EXPR:
3855       if (cst_and_fits_in_hwi (op0))
3856         cost = new_cost (multiply_by_const_cost (int_cst_value (op0),
3857                                                  mode, speed), 0);
3858       else if (cst_and_fits_in_hwi (op1))
3859         cost = new_cost (multiply_by_const_cost (int_cst_value (op1),
3860                                                  mode, speed), 0);
3861       else
3862         return new_cost (target_spill_cost [speed], 0);
3863       break;
3864
3865     default:
3866       gcc_unreachable ();
3867     }
3868
3869   cost = add_costs (cost, cost0);
3870   cost = add_costs (cost, cost1);
3871
3872   /* Bound the cost by target_spill_cost.  The parts of complicated
3873      computations often are either loop invariant or at least can
3874      be shared between several iv uses, so letting this grow without
3875      limits would not give reasonable results.  */
3876   if (cost.cost > (int) target_spill_cost [speed])
3877     cost.cost = target_spill_cost [speed];
3878
3879   return cost;
3880 }
3881
3882 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3883    invariants the computation depends on.  */
3884
3885 static comp_cost
3886 force_var_cost (struct ivopts_data *data,
3887                 tree expr, bitmap *depends_on)
3888 {
3889   if (depends_on)
3890     {
3891       fd_ivopts_data = data;
3892       walk_tree (&expr, find_depends, depends_on, NULL);
3893     }
3894
3895   return force_expr_to_var_cost (expr, data->speed);
3896 }
3897
3898 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3899    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3900    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3901    invariants the computation depends on.  */
3902
3903 static comp_cost
3904 split_address_cost (struct ivopts_data *data,
3905                     tree addr, bool *symbol_present, bool *var_present,
3906                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3907 {
3908   tree core;
3909   HOST_WIDE_INT bitsize;
3910   HOST_WIDE_INT bitpos;
3911   tree toffset;
3912   enum machine_mode mode;
3913   int unsignedp, volatilep;
3914
3915   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3916                               &unsignedp, &volatilep, false);
3917
3918   if (toffset != 0
3919       || bitpos % BITS_PER_UNIT != 0
3920       || TREE_CODE (core) != VAR_DECL)
3921     {
3922       *symbol_present = false;
3923       *var_present = true;
3924       fd_ivopts_data = data;
3925       walk_tree (&addr, find_depends, depends_on, NULL);
3926       return new_cost (target_spill_cost[data->speed], 0);
3927     }
3928
3929   *offset += bitpos / BITS_PER_UNIT;
3930   if (TREE_STATIC (core)
3931       || DECL_EXTERNAL (core))
3932     {
3933       *symbol_present = true;
3934       *var_present = false;
3935       return no_cost;
3936     }
3937
3938   *symbol_present = false;
3939   *var_present = true;
3940   return no_cost;
3941 }
3942
3943 /* Estimates cost of expressing difference of addresses E1 - E2 as
3944    var + symbol + offset.  The value of offset is added to OFFSET,
3945    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3946    part is missing.  DEPENDS_ON is a set of the invariants the computation
3947    depends on.  */
3948
3949 static comp_cost
3950 ptr_difference_cost (struct ivopts_data *data,
3951                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3952                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3953 {
3954   HOST_WIDE_INT diff = 0;
3955   aff_tree aff_e1, aff_e2;
3956   tree type;
3957
3958   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3959
3960   if (ptr_difference_const (e1, e2, &diff))
3961     {
3962       *offset += diff;
3963       *symbol_present = false;
3964       *var_present = false;
3965       return no_cost;
3966     }
3967
3968   if (integer_zerop (e2))
3969     return split_address_cost (data, TREE_OPERAND (e1, 0),
3970                                symbol_present, var_present, offset, depends_on);
3971
3972   *symbol_present = false;
3973   *var_present = true;
3974
3975   type = signed_type_for (TREE_TYPE (e1));
3976   tree_to_aff_combination (e1, type, &aff_e1);
3977   tree_to_aff_combination (e2, type, &aff_e2);
3978   aff_combination_scale (&aff_e2, double_int_minus_one);
3979   aff_combination_add (&aff_e1, &aff_e2);
3980
3981   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3982 }
3983
3984 /* Estimates cost of expressing difference E1 - E2 as
3985    var + symbol + offset.  The value of offset is added to OFFSET,
3986    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3987    part is missing.  DEPENDS_ON is a set of the invariants the computation
3988    depends on.  */
3989
3990 static comp_cost
3991 difference_cost (struct ivopts_data *data,
3992                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3993                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3994 {
3995   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3996   unsigned HOST_WIDE_INT off1, off2;
3997   aff_tree aff_e1, aff_e2;
3998   tree type;
3999
4000   e1 = strip_offset (e1, &off1);
4001   e2 = strip_offset (e2, &off2);
4002   *offset += off1 - off2;
4003
4004   STRIP_NOPS (e1);
4005   STRIP_NOPS (e2);
4006
4007   if (TREE_CODE (e1) == ADDR_EXPR)
4008     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4009                                 offset, depends_on);
4010   *symbol_present = false;
4011
4012   if (operand_equal_p (e1, e2, 0))
4013     {
4014       *var_present = false;
4015       return no_cost;
4016     }
4017
4018   *var_present = true;
4019
4020   if (integer_zerop (e2))
4021     return force_var_cost (data, e1, depends_on);
4022
4023   if (integer_zerop (e1))
4024     {
4025       comp_cost cost = force_var_cost (data, e2, depends_on);
4026       cost.cost += multiply_by_const_cost (-1, mode, data->speed);
4027       return cost;
4028     }
4029
4030   type = signed_type_for (TREE_TYPE (e1));
4031   tree_to_aff_combination (e1, type, &aff_e1);
4032   tree_to_aff_combination (e2, type, &aff_e2);
4033   aff_combination_scale (&aff_e2, double_int_minus_one);
4034   aff_combination_add (&aff_e1, &aff_e2);
4035
4036   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4037 }
4038
4039 /* Returns true if AFF1 and AFF2 are identical.  */
4040
4041 static bool
4042 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4043 {
4044   unsigned i;
4045
4046   if (aff1->n != aff2->n)
4047     return false;
4048
4049   for (i = 0; i < aff1->n; i++)
4050     {
4051       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
4052         return false;
4053
4054       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4055         return false;
4056     }
4057   return true;
4058 }
4059
4060 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4061
4062 static int
4063 get_expr_id (struct ivopts_data *data, tree expr)
4064 {
4065   struct iv_inv_expr_ent ent;
4066   struct iv_inv_expr_ent **slot;
4067
4068   ent.expr = expr;
4069   ent.hash = iterative_hash_expr (expr, 0);
4070   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
4071                                                      &ent, INSERT);
4072   if (*slot)
4073     return (*slot)->id;
4074
4075   *slot = XNEW (struct iv_inv_expr_ent);
4076   (*slot)->expr = expr;
4077   (*slot)->hash = ent.hash;
4078   (*slot)->id = data->inv_expr_id++;
4079   return (*slot)->id;
4080 }
4081
4082 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4083    requires a new compiler generated temporary.  Returns -1 otherwise.
4084    ADDRESS_P is a flag indicating if the expression is for address
4085    computation.  */
4086
4087 static int
4088 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4089                             tree cbase, HOST_WIDE_INT ratio,
4090                             bool address_p)
4091 {
4092   aff_tree ubase_aff, cbase_aff;
4093   tree expr, ub, cb;
4094
4095   STRIP_NOPS (ubase);
4096   STRIP_NOPS (cbase);
4097   ub = ubase;
4098   cb = cbase;
4099
4100   if ((TREE_CODE (ubase) == INTEGER_CST)
4101       && (TREE_CODE (cbase) == INTEGER_CST))
4102     return -1;
4103
4104   /* Strips the constant part. */
4105   if (TREE_CODE (ubase) == PLUS_EXPR
4106       || TREE_CODE (ubase) == MINUS_EXPR
4107       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4108     {
4109       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4110         ubase = TREE_OPERAND (ubase, 0);
4111     }
4112
4113   /* Strips the constant part. */
4114   if (TREE_CODE (cbase) == PLUS_EXPR
4115       || TREE_CODE (cbase) == MINUS_EXPR
4116       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4117     {
4118       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4119         cbase = TREE_OPERAND (cbase, 0);
4120     }
4121
4122   if (address_p)
4123     {
4124       if (((TREE_CODE (ubase) == SSA_NAME)
4125            || (TREE_CODE (ubase) == ADDR_EXPR
4126                && is_gimple_min_invariant (ubase)))
4127           && (TREE_CODE (cbase) == INTEGER_CST))
4128         return -1;
4129
4130       if (((TREE_CODE (cbase) == SSA_NAME)
4131            || (TREE_CODE (cbase) == ADDR_EXPR
4132                && is_gimple_min_invariant (cbase)))
4133           && (TREE_CODE (ubase) == INTEGER_CST))
4134         return -1;
4135     }
4136
4137   if (ratio == 1)
4138     {
4139       if(operand_equal_p (ubase, cbase, 0))
4140         return -1;
4141
4142       if (TREE_CODE (ubase) == ADDR_EXPR
4143           && TREE_CODE (cbase) == ADDR_EXPR)
4144         {
4145           tree usym, csym;
4146
4147           usym = TREE_OPERAND (ubase, 0);
4148           csym = TREE_OPERAND (cbase, 0);
4149           if (TREE_CODE (usym) == ARRAY_REF)
4150             {
4151               tree ind = TREE_OPERAND (usym, 1);
4152               if (TREE_CODE (ind) == INTEGER_CST
4153                   && host_integerp (ind, 0)
4154                   && TREE_INT_CST_LOW (ind) == 0)
4155                 usym = TREE_OPERAND (usym, 0);
4156             }
4157           if (TREE_CODE (csym) == ARRAY_REF)
4158             {
4159               tree ind = TREE_OPERAND (csym, 1);
4160               if (TREE_CODE (ind) == INTEGER_CST
4161                   && host_integerp (ind, 0)
4162                   && TREE_INT_CST_LOW (ind) == 0)
4163                 csym = TREE_OPERAND (csym, 0);
4164             }
4165           if (operand_equal_p (usym, csym, 0))
4166             return -1;
4167         }
4168       /* Now do more complex comparison  */
4169       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4170       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4171       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4172         return -1;
4173     }
4174
4175   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4176   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4177
4178   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4179   aff_combination_add (&ubase_aff, &cbase_aff);
4180   expr = aff_combination_to_tree (&ubase_aff);
4181   return get_expr_id (data, expr);
4182 }
4183
4184
4185
4186 /* Determines the cost of the computation by that USE is expressed
4187    from induction variable CAND.  If ADDRESS_P is true, we just need
4188    to create an address from it, otherwise we want to get it into
4189    register.  A set of invariants we depend on is stored in
4190    DEPENDS_ON.  AT is the statement at that the value is computed.
4191    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4192    addressing is likely.  */
4193
4194 static comp_cost
4195 get_computation_cost_at (struct ivopts_data *data,
4196                          struct iv_use *use, struct iv_cand *cand,
4197                          bool address_p, bitmap *depends_on, gimple at,
4198                          bool *can_autoinc,
4199                          int *inv_expr_id)
4200 {
4201   tree ubase = use->iv->base, ustep = use->iv->step;
4202   tree cbase, cstep;
4203   tree utype = TREE_TYPE (ubase), ctype;
4204   unsigned HOST_WIDE_INT cstepi, offset = 0;
4205   HOST_WIDE_INT ratio, aratio;
4206   bool var_present, symbol_present, stmt_is_after_inc;
4207   comp_cost cost;
4208   double_int rat;
4209   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4210
4211   *depends_on = NULL;
4212
4213   /* Only consider real candidates.  */
4214   if (!cand->iv)
4215     return infinite_cost;
4216
4217   cbase = cand->iv->base;
4218   cstep = cand->iv->step;
4219   ctype = TREE_TYPE (cbase);
4220
4221   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4222     {
4223       /* We do not have a precision to express the values of use.  */
4224       return infinite_cost;
4225     }
4226
4227   if (address_p
4228       || (use->iv->base_object
4229           && cand->iv->base_object
4230           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4231           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4232     {
4233       /* Do not try to express address of an object with computation based
4234          on address of a different object.  This may cause problems in rtl
4235          level alias analysis (that does not expect this to be happening,
4236          as this is illegal in C), and would be unlikely to be useful
4237          anyway.  */
4238       if (use->iv->base_object
4239           && cand->iv->base_object
4240           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4241         return infinite_cost;
4242     }
4243
4244   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4245     {
4246       /* TODO -- add direct handling of this case.  */
4247       goto fallback;
4248     }
4249
4250   /* CSTEPI is removed from the offset in case statement is after the
4251      increment.  If the step is not constant, we use zero instead.
4252      This is a bit imprecise (there is the extra addition), but
4253      redundancy elimination is likely to transform the code so that
4254      it uses value of the variable before increment anyway,
4255      so it is not that much unrealistic.  */
4256   if (cst_and_fits_in_hwi (cstep))
4257     cstepi = int_cst_value (cstep);
4258   else
4259     cstepi = 0;
4260
4261   if (!constant_multiple_of (ustep, cstep, &rat))
4262     return infinite_cost;
4263
4264   if (double_int_fits_in_shwi_p (rat))
4265     ratio = double_int_to_shwi (rat);
4266   else
4267     return infinite_cost;
4268
4269   STRIP_NOPS (cbase);
4270   ctype = TREE_TYPE (cbase);
4271
4272   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4273
4274   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4275      or ratio == 1, it is better to handle this like
4276
4277      ubase - ratio * cbase + ratio * var
4278
4279      (also holds in the case ratio == -1, TODO.  */
4280
4281   if (cst_and_fits_in_hwi (cbase))
4282     {
4283       offset = - ratio * int_cst_value (cbase);
4284       cost = difference_cost (data,
4285                               ubase, build_int_cst (utype, 0),
4286                               &symbol_present, &var_present, &offset,
4287                               depends_on);
4288       cost.cost /= avg_loop_niter (data->current_loop);
4289     }
4290   else if (ratio == 1)
4291     {
4292       tree real_cbase = cbase;
4293
4294       /* Check to see if any adjustment is needed.  */
4295       if (cstepi == 0 && stmt_is_after_inc)
4296         {
4297           aff_tree real_cbase_aff;
4298           aff_tree cstep_aff;
4299
4300           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4301                                    &real_cbase_aff);
4302           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4303
4304           aff_combination_add (&real_cbase_aff, &cstep_aff);
4305           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4306         }
4307
4308       cost = difference_cost (data,
4309                               ubase, real_cbase,
4310                               &symbol_present, &var_present, &offset,
4311                               depends_on);
4312       cost.cost /= avg_loop_niter (data->current_loop);
4313     }
4314   else if (address_p
4315            && !POINTER_TYPE_P (ctype)
4316            && multiplier_allowed_in_address_p
4317                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4318                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4319     {
4320       cbase
4321         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4322       cost = difference_cost (data,
4323                               ubase, cbase,
4324                               &symbol_present, &var_present, &offset,
4325                               depends_on);
4326       cost.cost /= avg_loop_niter (data->current_loop);
4327     }
4328   else
4329     {
4330       cost = force_var_cost (data, cbase, depends_on);
4331       cost = add_costs (cost,
4332                         difference_cost (data,
4333                                          ubase, build_int_cst (utype, 0),
4334                                          &symbol_present, &var_present,
4335                                          &offset, depends_on));
4336       cost.cost /= avg_loop_niter (data->current_loop);
4337       cost.cost += add_regs_cost (TYPE_MODE (ctype), data->speed);
4338     }
4339
4340   if (inv_expr_id)
4341     {
4342       *inv_expr_id =
4343           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4344       /* Clear depends on.  */
4345       if (*inv_expr_id != -1 && depends_on && *depends_on)
4346         bitmap_clear (*depends_on);
4347     }
4348
4349   /* If we are after the increment, the value of the candidate is higher by
4350      one iteration.  */
4351   if (stmt_is_after_inc)
4352     offset -= ratio * cstepi;
4353
4354   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4355      (symbol/var1/const parts may be omitted).  If we are looking for an
4356      address, find the cost of addressing this.  */
4357   if (address_p)
4358     return add_costs (cost,
4359                       get_address_cost (symbol_present, var_present,
4360                                         offset, ratio, cstepi,
4361                                         TYPE_MODE (TREE_TYPE (utype)),
4362                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4363                                         speed, stmt_is_after_inc,
4364                                         can_autoinc));
4365
4366   /* Otherwise estimate the costs for computing the expression.  */
4367   if (!symbol_present && !var_present && !offset)
4368     {
4369       if (ratio != 1)
4370         cost.cost += multiply_by_const_cost (ratio, TYPE_MODE (ctype), speed);
4371       return cost;
4372     }
4373
4374   /* Symbol + offset should be compile-time computable so consider that they
4375       are added once to the variable, if present.  */
4376   if (var_present && (symbol_present || offset))
4377     cost.cost += adjust_setup_cost (data,
4378                                     add_regs_cost (TYPE_MODE (ctype), speed));
4379
4380   /* Having offset does not affect runtime cost in case it is added to
4381      symbol, but it increases complexity.  */
4382   if (offset)
4383     cost.complexity++;
4384
4385   cost.cost += add_regs_cost (TYPE_MODE (ctype), speed);
4386
4387   aratio = ratio > 0 ? ratio : -ratio;
4388   if (aratio != 1)
4389     cost.cost += multiply_by_const_cost (aratio, TYPE_MODE (ctype), speed);
4390   return cost;
4391
4392 fallback:
4393   if (can_autoinc)
4394     *can_autoinc = false;
4395
4396   {
4397     /* Just get the expression, expand it and measure the cost.  */
4398     tree comp = get_computation_at (data->current_loop, use, cand, at);
4399
4400     if (!comp)
4401       return infinite_cost;
4402
4403     if (address_p)
4404       comp = build_simple_mem_ref (comp);
4405
4406     return new_cost (computation_cost (comp, speed), 0);
4407   }
4408 }
4409
4410 /* Determines the cost of the computation by that USE is expressed
4411    from induction variable CAND.  If ADDRESS_P is true, we just need
4412    to create an address from it, otherwise we want to get it into
4413    register.  A set of invariants we depend on is stored in
4414    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4415    autoinc addressing is likely.  */
4416
4417 static comp_cost
4418 get_computation_cost (struct ivopts_data *data,
4419                       struct iv_use *use, struct iv_cand *cand,
4420                       bool address_p, bitmap *depends_on,
4421                       bool *can_autoinc, int *inv_expr_id)
4422 {
4423   return get_computation_cost_at (data,
4424                                   use, cand, address_p, depends_on, use->stmt,
4425                                   can_autoinc, inv_expr_id);
4426 }
4427
4428 /* Determines cost of basing replacement of USE on CAND in a generic
4429    expression.  */
4430
4431 static bool
4432 determine_use_iv_cost_generic (struct ivopts_data *data,
4433                                struct iv_use *use, struct iv_cand *cand)
4434 {
4435   bitmap depends_on;
4436   comp_cost cost;
4437   int inv_expr_id = -1;
4438
4439   /* The simple case first -- if we need to express value of the preserved
4440      original biv, the cost is 0.  This also prevents us from counting the
4441      cost of increment twice -- once at this use and once in the cost of
4442      the candidate.  */
4443   if (cand->pos == IP_ORIGINAL
4444       && cand->incremented_at == use->stmt)
4445     {
4446       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4447                        ERROR_MARK, -1);
4448       return true;
4449     }
4450
4451   cost = get_computation_cost (data, use, cand, false, &depends_on,
4452                                NULL, &inv_expr_id);
4453
4454   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4455                    inv_expr_id);
4456
4457   return !infinite_cost_p (cost);
4458 }
4459
4460 /* Determines cost of basing replacement of USE on CAND in an address.  */
4461
4462 static bool
4463 determine_use_iv_cost_address (struct ivopts_data *data,
4464                                struct iv_use *use, struct iv_cand *cand)
4465 {
4466   bitmap depends_on;
4467   bool can_autoinc;
4468   int inv_expr_id = -1;
4469   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4470                                          &can_autoinc, &inv_expr_id);
4471
4472   if (cand->ainc_use == use)
4473     {
4474       if (can_autoinc)
4475         cost.cost -= cand->cost_step;
4476       /* If we generated the candidate solely for exploiting autoincrement
4477          opportunities, and it turns out it can't be used, set the cost to
4478          infinity to make sure we ignore it.  */
4479       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4480         cost = infinite_cost;
4481     }
4482   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4483                    inv_expr_id);
4484
4485   return !infinite_cost_p (cost);
4486 }
4487
4488 /* Computes value of candidate CAND at position AT in iteration NITER, and
4489    stores it to VAL.  */
4490
4491 static void
4492 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4493                aff_tree *val)
4494 {
4495   aff_tree step, delta, nit;
4496   struct iv *iv = cand->iv;
4497   tree type = TREE_TYPE (iv->base);
4498   tree steptype = type;
4499   if (POINTER_TYPE_P (type))
4500     steptype = sizetype;
4501
4502   tree_to_aff_combination (iv->step, steptype, &step);
4503   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4504   aff_combination_convert (&nit, steptype);
4505   aff_combination_mult (&nit, &step, &delta);
4506   if (stmt_after_increment (loop, cand, at))
4507     aff_combination_add (&delta, &step);
4508
4509   tree_to_aff_combination (iv->base, type, val);
4510   aff_combination_add (val, &delta);
4511 }
4512
4513 /* Returns period of induction variable iv.  */
4514
4515 static tree
4516 iv_period (struct iv *iv)
4517 {
4518   tree step = iv->step, period, type;
4519   tree pow2div;
4520
4521   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4522
4523   type = unsigned_type_for (TREE_TYPE (step));
4524   /* Period of the iv is lcm (step, type_range)/step -1,
4525      i.e., N*type_range/step - 1. Since type range is power
4526      of two, N == (step >> num_of_ending_zeros_binary (step),
4527      so the final result is
4528
4529        (type_range >> num_of_ending_zeros_binary (step)) - 1
4530
4531   */
4532   pow2div = num_ending_zeros (step);
4533
4534   period = build_low_bits_mask (type,
4535                                 (TYPE_PRECISION (type)
4536                                  - tree_low_cst (pow2div, 1)));
4537
4538   return period;
4539 }
4540
4541 /* Returns the comparison operator used when eliminating the iv USE.  */
4542
4543 static enum tree_code
4544 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4545 {
4546   struct loop *loop = data->current_loop;
4547   basic_block ex_bb;
4548   edge exit;
4549
4550   ex_bb = gimple_bb (use->stmt);
4551   exit = EDGE_SUCC (ex_bb, 0);
4552   if (flow_bb_inside_loop_p (loop, exit->dest))
4553     exit = EDGE_SUCC (ex_bb, 1);
4554
4555   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4556 }
4557
4558 static tree
4559 strip_wrap_conserving_type_conversions (tree exp)
4560 {
4561   while (tree_ssa_useless_type_conversion (exp)
4562          && (nowrap_type_p (TREE_TYPE (exp))
4563              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4564     exp = TREE_OPERAND (exp, 0);
4565   return exp;
4566 }
4567
4568 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4569    check for an exact match.  */
4570
4571 static bool
4572 expr_equal_p (tree e, tree what)
4573 {
4574   gimple stmt;
4575   enum tree_code code;
4576
4577   e = strip_wrap_conserving_type_conversions (e);
4578   what = strip_wrap_conserving_type_conversions (what);
4579
4580   code = TREE_CODE (what);
4581   if (TREE_TYPE (e) != TREE_TYPE (what))
4582     return false;
4583
4584   if (operand_equal_p (e, what, 0))
4585     return true;
4586
4587   if (TREE_CODE (e) != SSA_NAME)
4588     return false;
4589
4590   stmt = SSA_NAME_DEF_STMT (e);
4591   if (gimple_code (stmt) != GIMPLE_ASSIGN
4592       || gimple_assign_rhs_code (stmt) != code)
4593     return false;
4594
4595   switch (get_gimple_rhs_class (code))
4596     {
4597     case GIMPLE_BINARY_RHS:
4598       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4599         return false;
4600       /* Fallthru.  */
4601
4602     case GIMPLE_UNARY_RHS:
4603     case GIMPLE_SINGLE_RHS:
4604       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4605     default:
4606       return false;
4607     }
4608 }
4609
4610 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4611    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4612    calculation is performed in non-wrapping type.
4613
4614    TODO: More generally, we could test for the situation that
4615          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4616          This would require knowing the sign of OFFSET.
4617
4618          Also, we only look for the first addition in the computation of BASE.
4619          More complex analysis would be better, but introducing it just for
4620          this optimization seems like an overkill.  */
4621
4622 static bool
4623 difference_cannot_overflow_p (tree base, tree offset)
4624 {
4625   enum tree_code code;
4626   tree e1, e2;
4627
4628   if (!nowrap_type_p (TREE_TYPE (base)))
4629     return false;
4630
4631   base = expand_simple_operations (base);
4632
4633   if (TREE_CODE (base) == SSA_NAME)
4634     {
4635       gimple stmt = SSA_NAME_DEF_STMT (base);
4636
4637       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4638         return false;
4639
4640       code = gimple_assign_rhs_code (stmt);
4641       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4642         return false;
4643
4644       e1 = gimple_assign_rhs1 (stmt);
4645       e2 = gimple_assign_rhs2 (stmt);
4646     }
4647   else
4648     {
4649       code = TREE_CODE (base);
4650       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4651         return false;
4652       e1 = TREE_OPERAND (base, 0);
4653       e2 = TREE_OPERAND (base, 1);
4654     }
4655
4656   /* TODO: deeper inspection may be necessary to prove the equality.  */
4657   switch (code)
4658     {
4659     case PLUS_EXPR:
4660       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4661     case POINTER_PLUS_EXPR:
4662       return expr_equal_p (e2, offset);
4663
4664     default:
4665       return false;
4666     }
4667 }
4668
4669 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4670    comparison with CAND.  NITER describes the number of iterations of
4671    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4672
4673    We aim to handle the following situation:
4674
4675    sometype *base, *p;
4676    int a, b, i;
4677
4678    i = a;
4679    p = p_0 = base + a;
4680
4681    do
4682      {
4683        bla (*p);
4684        p++;
4685        i++;
4686      }
4687    while (i < b);
4688
4689    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4690    We aim to optimize this to
4691
4692    p = p_0 = base + a;
4693    do
4694      {
4695        bla (*p);
4696        p++;
4697      }
4698    while (p < p_0 - a + b);
4699
4700    This preserves the correctness, since the pointer arithmetics does not
4701    overflow.  More precisely:
4702
4703    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4704       overflow in computing it or the values of p.
4705    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4706       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4707
4708 static bool
4709 iv_elimination_compare_lt (struct ivopts_data *data,
4710                            struct iv_cand *cand, enum tree_code *comp_p,
4711                            struct tree_niter_desc *niter)
4712 {
4713   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4714   struct affine_tree_combination nit, tmpa, tmpb;
4715   enum tree_code comp;
4716   HOST_WIDE_INT step;
4717
4718   /* We need to know that the candidate induction variable does not overflow.
4719      While more complex analysis may be used to prove this, for now just
4720      check that the variable appears in the original program and that it
4721      is computed in a type that guarantees no overflows.  */
4722   cand_type = TREE_TYPE (cand->iv->base);
4723   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4724     return false;
4725
4726   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4727      the calculation of the BOUND could overflow, making the comparison
4728      invalid.  */
4729   if (!data->loop_single_exit_p)
4730     return false;
4731
4732   /* We need to be able to decide whether candidate is increasing or decreasing
4733      in order to choose the right comparison operator.  */
4734   if (!cst_and_fits_in_hwi (cand->iv->step))
4735     return false;
4736   step = int_cst_value (cand->iv->step);
4737
4738   /* Check that the number of iterations matches the expected pattern:
4739      a + 1 > b ? 0 : b - a - 1.  */
4740   mbz = niter->may_be_zero;
4741   if (TREE_CODE (mbz) == GT_EXPR)
4742     {
4743       /* Handle a + 1 > b.  */
4744       tree op0 = TREE_OPERAND (mbz, 0);
4745       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4746         {
4747           a = TREE_OPERAND (op0, 0);
4748           b = TREE_OPERAND (mbz, 1);
4749         }
4750       else
4751         return false;
4752     }
4753   else if (TREE_CODE (mbz) == LT_EXPR)
4754     {
4755       tree op1 = TREE_OPERAND (mbz, 1);
4756
4757       /* Handle b < a + 1.  */
4758       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4759         {
4760           a = TREE_OPERAND (op1, 0);
4761           b = TREE_OPERAND (mbz, 0);
4762         }
4763       else
4764         return false;
4765     }
4766   else
4767     return false;
4768
4769   /* Expected number of iterations is B - A - 1.  Check that it matches
4770      the actual number, i.e., that B - A - NITER = 1.  */
4771   tree_to_aff_combination (niter->niter, nit_type, &nit);
4772   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4773   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4774   aff_combination_scale (&nit, double_int_minus_one);
4775   aff_combination_scale (&tmpa, double_int_minus_one);
4776   aff_combination_add (&tmpb, &tmpa);
4777   aff_combination_add (&tmpb, &nit);
4778   if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4779     return false;
4780
4781   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4782      overflow.  */
4783   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4784                         cand->iv->step,
4785                         fold_convert (TREE_TYPE (cand->iv->step), a));
4786   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4787     return false;
4788
4789   /* Determine the new comparison operator.  */
4790   comp = step < 0 ? GT_EXPR : LT_EXPR;
4791   if (*comp_p == NE_EXPR)
4792     *comp_p = comp;
4793   else if (*comp_p == EQ_EXPR)
4794     *comp_p = invert_tree_comparison (comp, false);
4795   else
4796     gcc_unreachable ();
4797
4798   return true;
4799 }
4800
4801 /* Check whether it is possible to express the condition in USE by comparison
4802    of candidate CAND.  If so, store the value compared with to BOUND, and the
4803    comparison operator to COMP.  */
4804
4805 static bool
4806 may_eliminate_iv (struct ivopts_data *data,
4807                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4808                   enum tree_code *comp)
4809 {
4810   basic_block ex_bb;
4811   edge exit;
4812   tree period;
4813   struct loop *loop = data->current_loop;
4814   aff_tree bnd;
4815   struct tree_niter_desc *desc = NULL;
4816
4817   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4818     return false;
4819
4820   /* For now works only for exits that dominate the loop latch.
4821      TODO: extend to other conditions inside loop body.  */
4822   ex_bb = gimple_bb (use->stmt);
4823   if (use->stmt != last_stmt (ex_bb)
4824       || gimple_code (use->stmt) != GIMPLE_COND
4825       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4826     return false;
4827
4828   exit = EDGE_SUCC (ex_bb, 0);
4829   if (flow_bb_inside_loop_p (loop, exit->dest))
4830     exit = EDGE_SUCC (ex_bb, 1);
4831   if (flow_bb_inside_loop_p (loop, exit->dest))
4832     return false;
4833
4834   desc = niter_for_exit (data, exit);
4835   if (!desc)
4836     return false;
4837
4838   /* Determine whether we can use the variable to test the exit condition.
4839      This is the case iff the period of the induction variable is greater
4840      than the number of iterations for which the exit condition is true.  */
4841   period = iv_period (cand->iv);
4842
4843   /* If the number of iterations is constant, compare against it directly.  */
4844   if (TREE_CODE (desc->niter) == INTEGER_CST)
4845     {
4846       /* See cand_value_at.  */
4847       if (stmt_after_increment (loop, cand, use->stmt))
4848         {
4849           if (!tree_int_cst_lt (desc->niter, period))
4850             return false;
4851         }
4852       else
4853         {
4854           if (tree_int_cst_lt (period, desc->niter))
4855             return false;
4856         }
4857     }
4858
4859   /* If not, and if this is the only possible exit of the loop, see whether
4860      we can get a conservative estimate on the number of iterations of the
4861      entire loop and compare against that instead.  */
4862   else
4863     {
4864       double_int period_value, max_niter;
4865
4866       max_niter = desc->max;
4867       if (stmt_after_increment (loop, cand, use->stmt))
4868         max_niter = double_int_add (max_niter, double_int_one);
4869       period_value = tree_to_double_int (period);
4870       if (double_int_ucmp (max_niter, period_value) > 0)
4871         {
4872           /* See if we can take advantage of inferred loop bound information.  */
4873           if (data->loop_single_exit_p)
4874             {
4875               if (!max_loop_iterations (loop, &max_niter))
4876                 return false;
4877               /* The loop bound is already adjusted by adding 1.  */
4878               if (double_int_ucmp (max_niter, period_value) > 0)
4879                 return false;
4880             }
4881           else
4882             return false;
4883         }
4884     }
4885
4886   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4887
4888   *bound = aff_combination_to_tree (&bnd);
4889   *comp = iv_elimination_compare (data, use);
4890
4891   /* It is unlikely that computing the number of iterations using division
4892      would be more profitable than keeping the original induction variable.  */
4893   if (expression_expensive_p (*bound))
4894     return false;
4895
4896   /* Sometimes, it is possible to handle the situation that the number of
4897      iterations may be zero unless additional assumtions by using <
4898      instead of != in the exit condition.
4899
4900      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4901            base the exit condition on it.  However, that is often too
4902            expensive.  */
4903   if (!integer_zerop (desc->may_be_zero))
4904     return iv_elimination_compare_lt (data, cand, comp, desc);
4905
4906   return true;
4907 }
4908
4909  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4910     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4911
4912 static int
4913 parm_decl_cost (struct ivopts_data *data, tree bound)
4914 {
4915   tree sbound = bound;
4916   STRIP_NOPS (sbound);
4917
4918   if (TREE_CODE (sbound) == SSA_NAME
4919       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4920       && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4921       && data->body_includes_call)
4922     return COSTS_N_INSNS (1);
4923
4924   return 0;
4925 }
4926
4927 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4928
4929 static bool
4930 determine_use_iv_cost_condition (struct ivopts_data *data,
4931                                  struct iv_use *use, struct iv_cand *cand)
4932 {
4933   tree bound = NULL_TREE;
4934   struct iv *cmp_iv;
4935   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4936   comp_cost elim_cost, express_cost, cost, bound_cost;
4937   bool ok;
4938   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4939   tree *control_var, *bound_cst;
4940   enum tree_code comp = ERROR_MARK;
4941
4942   /* Only consider real candidates.  */
4943   if (!cand->iv)
4944     {
4945       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4946                        ERROR_MARK, -1);
4947       return false;
4948     }
4949
4950   /* Try iv elimination.  */
4951   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4952     {
4953       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4954       if (elim_cost.cost == 0)
4955         elim_cost.cost = parm_decl_cost (data, bound);
4956       else if (TREE_CODE (bound) == INTEGER_CST)
4957         elim_cost.cost = 0;
4958       /* If we replace a loop condition 'i < n' with 'p < base + n',
4959          depends_on_elim will have 'base' and 'n' set, which implies
4960          that both 'base' and 'n' will be live during the loop.  More likely,
4961          'base + n' will be loop invariant, resulting in only one live value
4962          during the loop.  So in that case we clear depends_on_elim and set
4963         elim_inv_expr_id instead.  */
4964       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4965         {
4966           elim_inv_expr_id = get_expr_id (data, bound);
4967           bitmap_clear (depends_on_elim);
4968         }
4969       /* The bound is a loop invariant, so it will be only computed
4970          once.  */
4971       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4972     }
4973   else
4974     elim_cost = infinite_cost;
4975
4976   /* Try expressing the original giv.  If it is compared with an invariant,
4977      note that we cannot get rid of it.  */
4978   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4979                               NULL, &cmp_iv);
4980   gcc_assert (ok);
4981
4982   /* When the condition is a comparison of the candidate IV against
4983      zero, prefer this IV.
4984
4985      TODO: The constant that we're subtracting from the cost should
4986      be target-dependent.  This information should be added to the
4987      target costs for each backend.  */
4988   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4989       && integer_zerop (*bound_cst)
4990       && (operand_equal_p (*control_var, cand->var_after, 0)
4991           || operand_equal_p (*control_var, cand->var_before, 0)))
4992     elim_cost.cost -= 1;
4993
4994   express_cost = get_computation_cost (data, use, cand, false,
4995                                        &depends_on_express, NULL,
4996                                        &express_inv_expr_id);
4997   fd_ivopts_data = data;
4998   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4999
5000   /* Count the cost of the original bound as well.  */
5001   bound_cost = force_var_cost (data, *bound_cst, NULL);
5002   if (bound_cost.cost == 0)
5003     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5004   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5005     bound_cost.cost = 0;
5006   express_cost.cost += bound_cost.cost;
5007
5008   /* Choose the better approach, preferring the eliminated IV. */
5009   if (compare_costs (elim_cost, express_cost) <= 0)
5010     {
5011       cost = elim_cost;
5012       depends_on = depends_on_elim;
5013       depends_on_elim = NULL;
5014       inv_expr_id = elim_inv_expr_id;
5015     }
5016   else
5017     {
5018       cost = express_cost;
5019       depends_on = depends_on_express;
5020       depends_on_express = NULL;
5021       bound = NULL_TREE;
5022       comp = ERROR_MARK;
5023       inv_expr_id = express_inv_expr_id;
5024     }
5025
5026   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5027
5028   if (depends_on_elim)
5029     BITMAP_FREE (depends_on_elim);
5030   if (depends_on_express)
5031     BITMAP_FREE (depends_on_express);
5032
5033   return !infinite_cost_p (cost);
5034 }
5035
5036 /* Determines cost of basing replacement of USE on CAND.  Returns false
5037    if USE cannot be based on CAND.  */
5038
5039 static bool
5040 determine_use_iv_cost (struct ivopts_data *data,
5041                        struct iv_use *use, struct iv_cand *cand)
5042 {
5043   switch (use->type)
5044     {
5045     case USE_NONLINEAR_EXPR:
5046       return determine_use_iv_cost_generic (data, use, cand);
5047
5048     case USE_ADDRESS:
5049       return determine_use_iv_cost_address (data, use, cand);
5050
5051     case USE_COMPARE:
5052       return determine_use_iv_cost_condition (data, use, cand);
5053
5054     default:
5055       gcc_unreachable ();
5056     }
5057 }
5058
5059 /* Return true if get_computation_cost indicates that autoincrement is
5060    a possibility for the pair of USE and CAND, false otherwise.  */
5061
5062 static bool
5063 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5064                            struct iv_cand *cand)
5065 {
5066   bitmap depends_on;
5067   bool can_autoinc;
5068   comp_cost cost;
5069
5070   if (use->type != USE_ADDRESS)
5071     return false;
5072
5073   cost = get_computation_cost (data, use, cand, true, &depends_on,
5074                                &can_autoinc, NULL);
5075
5076   BITMAP_FREE (depends_on);
5077
5078   return !infinite_cost_p (cost) && can_autoinc;
5079 }
5080
5081 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5082    use that allows autoincrement, and set their AINC_USE if possible.  */
5083
5084 static void
5085 set_autoinc_for_original_candidates (struct ivopts_data *data)
5086 {
5087   unsigned i, j;
5088
5089   for (i = 0; i < n_iv_cands (data); i++)
5090     {
5091       struct iv_cand *cand = iv_cand (data, i);
5092       struct iv_use *closest = NULL;
5093       if (cand->pos != IP_ORIGINAL)
5094         continue;
5095       for (j = 0; j < n_iv_uses (data); j++)
5096         {
5097           struct iv_use *use = iv_use (data, j);
5098           unsigned uid = gimple_uid (use->stmt);
5099           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
5100               || uid > gimple_uid (cand->incremented_at))
5101             continue;
5102           if (closest == NULL || uid > gimple_uid (closest->stmt))
5103             closest = use;
5104         }
5105       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
5106         continue;
5107       cand->ainc_use = closest;
5108     }
5109 }
5110
5111 /* Finds the candidates for the induction variables.  */
5112
5113 static void
5114 find_iv_candidates (struct ivopts_data *data)
5115 {
5116   /* Add commonly used ivs.  */
5117   add_standard_iv_candidates (data);
5118
5119   /* Add old induction variables.  */
5120   add_old_ivs_candidates (data);
5121
5122   /* Add induction variables derived from uses.  */
5123   add_derived_ivs_candidates (data);
5124
5125   set_autoinc_for_original_candidates (data);
5126
5127   /* Record the important candidates.  */
5128   record_important_candidates (data);
5129 }
5130
5131 /* Determines costs of basing the use of the iv on an iv candidate.  */
5132
5133 static void
5134 determine_use_iv_costs (struct ivopts_data *data)
5135 {
5136   unsigned i, j;
5137   struct iv_use *use;
5138   struct iv_cand *cand;
5139   bitmap to_clear = BITMAP_ALLOC (NULL);
5140
5141   alloc_use_cost_map (data);
5142
5143   for (i = 0; i < n_iv_uses (data); i++)
5144     {
5145       use = iv_use (data, i);
5146
5147       if (data->consider_all_candidates)
5148         {
5149           for (j = 0; j < n_iv_cands (data); j++)
5150             {
5151               cand = iv_cand (data, j);
5152               determine_use_iv_cost (data, use, cand);
5153             }
5154         }
5155       else
5156         {
5157           bitmap_iterator bi;
5158
5159           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5160             {
5161               cand = iv_cand (data, j);
5162               if (!determine_use_iv_cost (data, use, cand))
5163                 bitmap_set_bit (to_clear, j);
5164             }
5165
5166           /* Remove the candidates for that the cost is infinite from
5167              the list of related candidates.  */
5168           bitmap_and_compl_into (use->related_cands, to_clear);
5169           bitmap_clear (to_clear);
5170         }
5171     }
5172
5173   BITMAP_FREE (to_clear);
5174
5175   if (dump_file && (dump_flags & TDF_DETAILS))
5176     {
5177       fprintf (dump_file, "Use-candidate costs:\n");
5178
5179       for (i = 0; i < n_iv_uses (data); i++)
5180         {
5181           use = iv_use (data, i);
5182
5183           fprintf (dump_file, "Use %d:\n", i);
5184           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5185           for (j = 0; j < use->n_map_members; j++)
5186             {
5187               if (!use->cost_map[j].cand
5188                   || infinite_cost_p (use->cost_map[j].cost))
5189                 continue;
5190
5191               fprintf (dump_file, "  %d\t%d\t%d\t",
5192                        use->cost_map[j].cand->id,
5193                        use->cost_map[j].cost.cost,
5194                        use->cost_map[j].cost.complexity);
5195               if (use->cost_map[j].depends_on)
5196                 bitmap_print (dump_file,
5197                               use->cost_map[j].depends_on, "","");
5198               if (use->cost_map[j].inv_expr_id != -1)
5199                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5200               fprintf (dump_file, "\n");
5201             }
5202
5203           fprintf (dump_file, "\n");
5204         }
5205       fprintf (dump_file, "\n");
5206     }
5207 }
5208
5209 /* Determines cost of the candidate CAND.  */
5210
5211 static void
5212 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5213 {
5214   comp_cost cost_base;
5215   unsigned cost, cost_step;
5216   tree base;
5217
5218   if (!cand->iv)
5219     {
5220       cand->cost = 0;
5221       return;
5222     }
5223
5224   /* There are two costs associated with the candidate -- its increment
5225      and its initialization.  The second is almost negligible for any loop
5226      that rolls enough, so we take it just very little into account.  */
5227
5228   base = cand->iv->base;
5229   cost_base = force_var_cost (data, base, NULL);
5230   /* It will be exceptional that the iv register happens to be initialized with
5231      the proper value at no cost.  In general, there will at least be a regcopy
5232      or a const set.  */
5233   if (cost_base.cost == 0)
5234     cost_base.cost = COSTS_N_INSNS (1);
5235   cost_step = add_regs_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
5236
5237   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5238
5239   /* Prefer the original ivs unless we may gain something by replacing it.
5240      The reason is to make debugging simpler; so this is not relevant for
5241      artificial ivs created by other optimization passes.  */
5242   if (cand->pos != IP_ORIGINAL
5243       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5244     cost++;
5245
5246   /* Prefer not to insert statements into latch unless there are some
5247      already (so that we do not create unnecessary jumps).  */
5248   if (cand->pos == IP_END
5249       && empty_block_p (ip_end_pos (data->current_loop)))
5250     cost++;
5251
5252   cand->cost = cost;
5253   cand->cost_step = cost_step;
5254 }
5255
5256 /* Determines costs of computation of the candidates.  */
5257
5258 static void
5259 determine_iv_costs (struct ivopts_data *data)
5260 {
5261   unsigned i;
5262
5263   if (dump_file && (dump_flags & TDF_DETAILS))
5264     {
5265       fprintf (dump_file, "Candidate costs:\n");
5266       fprintf (dump_file, "  cand\tcost\n");
5267     }
5268
5269   for (i = 0; i < n_iv_cands (data); i++)
5270     {
5271       struct iv_cand *cand = iv_cand (data, i);
5272
5273       determine_iv_cost (data, cand);
5274
5275       if (dump_file && (dump_flags & TDF_DETAILS))
5276         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5277     }
5278
5279   if (dump_file && (dump_flags & TDF_DETAILS))
5280     fprintf (dump_file, "\n");
5281 }
5282
5283 /* Calculates cost for having SIZE induction variables.  */
5284
5285 static unsigned
5286 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5287 {
5288   /* We add size to the cost, so that we prefer eliminating ivs
5289      if possible.  */
5290   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5291                                             data->body_includes_call);
5292 }
5293
5294 /* For each size of the induction variable set determine the penalty.  */
5295
5296 static void
5297 determine_set_costs (struct ivopts_data *data)
5298 {
5299   unsigned j, n;
5300   gimple phi;
5301   gimple_stmt_iterator psi;
5302   tree op;
5303   struct loop *loop = data->current_loop;
5304   bitmap_iterator bi;
5305
5306   if (dump_file && (dump_flags & TDF_DETAILS))
5307     {
5308       fprintf (dump_file, "Global costs:\n");
5309       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5310       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5311       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5312       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5313     }
5314
5315   n = 0;
5316   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5317     {
5318       phi = gsi_stmt (psi);
5319       op = PHI_RESULT (phi);
5320
5321       if (!is_gimple_reg (op))
5322         continue;
5323
5324       if (get_iv (data, op))
5325         continue;
5326
5327       n++;
5328     }
5329
5330   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5331     {
5332       struct version_info *info = ver_info (data, j);
5333
5334       if (info->inv_id && info->has_nonlin_use)
5335         n++;
5336     }
5337
5338   data->regs_used = n;
5339   if (dump_file && (dump_flags & TDF_DETAILS))
5340     fprintf (dump_file, "  regs_used %d\n", n);
5341
5342   if (dump_file && (dump_flags & TDF_DETAILS))
5343     {
5344       fprintf (dump_file, "  cost for size:\n");
5345       fprintf (dump_file, "  ivs\tcost\n");
5346       for (j = 0; j <= 2 * target_avail_regs; j++)
5347         fprintf (dump_file, "  %d\t%d\n", j,
5348                  ivopts_global_cost_for_size (data, j));
5349       fprintf (dump_file, "\n");
5350     }
5351 }
5352
5353 /* Returns true if A is a cheaper cost pair than B.  */
5354
5355 static bool
5356 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5357 {
5358   int cmp;
5359
5360   if (!a)
5361     return false;
5362
5363   if (!b)
5364     return true;
5365
5366   cmp = compare_costs (a->cost, b->cost);
5367   if (cmp < 0)
5368     return true;
5369
5370   if (cmp > 0)
5371     return false;
5372
5373   /* In case the costs are the same, prefer the cheaper candidate.  */
5374   if (a->cand->cost < b->cand->cost)
5375     return true;
5376
5377   return false;
5378 }
5379
5380
5381 /* Returns candidate by that USE is expressed in IVS.  */
5382
5383 static struct cost_pair *
5384 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5385 {
5386   return ivs->cand_for_use[use->id];
5387 }
5388
5389 /* Computes the cost field of IVS structure.  */
5390
5391 static void
5392 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5393 {
5394   comp_cost cost = ivs->cand_use_cost;
5395
5396   cost.cost += ivs->cand_cost;
5397
5398   cost.cost += ivopts_global_cost_for_size (data,
5399                                             ivs->n_regs + ivs->num_used_inv_expr);
5400
5401   ivs->cost = cost;
5402 }
5403
5404 /* Remove invariants in set INVS to set IVS.  */
5405
5406 static void
5407 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5408 {
5409   bitmap_iterator bi;
5410   unsigned iid;
5411
5412   if (!invs)
5413     return;
5414
5415   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5416     {
5417       ivs->n_invariant_uses[iid]--;
5418       if (ivs->n_invariant_uses[iid] == 0)
5419         ivs->n_regs--;
5420     }
5421 }
5422
5423 /* Set USE not to be expressed by any candidate in IVS.  */
5424
5425 static void
5426 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5427                  struct iv_use *use)
5428 {
5429   unsigned uid = use->id, cid;
5430   struct cost_pair *cp;
5431
5432   cp = ivs->cand_for_use[uid];
5433   if (!cp)
5434     return;
5435   cid = cp->cand->id;
5436
5437   ivs->bad_uses++;
5438   ivs->cand_for_use[uid] = NULL;
5439   ivs->n_cand_uses[cid]--;
5440
5441   if (ivs->n_cand_uses[cid] == 0)
5442     {
5443       bitmap_clear_bit (ivs->cands, cid);
5444       /* Do not count the pseudocandidates.  */
5445       if (cp->cand->iv)
5446         ivs->n_regs--;
5447       ivs->n_cands--;
5448       ivs->cand_cost -= cp->cand->cost;
5449
5450       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5451     }
5452
5453   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5454
5455   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5456
5457   if (cp->inv_expr_id != -1)
5458     {
5459       ivs->used_inv_expr[cp->inv_expr_id]--;
5460       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5461         ivs->num_used_inv_expr--;
5462     }
5463   iv_ca_recount_cost (data, ivs);
5464 }
5465
5466 /* Add invariants in set INVS to set IVS.  */
5467
5468 static void
5469 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5470 {
5471   bitmap_iterator bi;
5472   unsigned iid;
5473
5474   if (!invs)
5475     return;
5476
5477   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5478     {
5479       ivs->n_invariant_uses[iid]++;
5480       if (ivs->n_invariant_uses[iid] == 1)
5481         ivs->n_regs++;
5482     }
5483 }
5484
5485 /* Set cost pair for USE in set IVS to CP.  */
5486
5487 static void
5488 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5489               struct iv_use *use, struct cost_pair *cp)
5490 {
5491   unsigned uid = use->id, cid;
5492
5493   if (ivs->cand_for_use[uid] == cp)
5494     return;
5495
5496   if (ivs->cand_for_use[uid])
5497     iv_ca_set_no_cp (data, ivs, use);
5498
5499   if (cp)
5500     {
5501       cid = cp->cand->id;
5502
5503       ivs->bad_uses--;
5504       ivs->cand_for_use[uid] = cp;
5505       ivs->n_cand_uses[cid]++;
5506       if (ivs->n_cand_uses[cid] == 1)
5507         {
5508           bitmap_set_bit (ivs->cands, cid);
5509           /* Do not count the pseudocandidates.  */
5510           if (cp->cand->iv)
5511             ivs->n_regs++;
5512           ivs->n_cands++;
5513           ivs->cand_cost += cp->cand->cost;
5514
5515           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5516         }
5517
5518       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5519       iv_ca_set_add_invariants (ivs, cp->depends_on);
5520
5521       if (cp->inv_expr_id != -1)
5522         {
5523           ivs->used_inv_expr[cp->inv_expr_id]++;
5524           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5525             ivs->num_used_inv_expr++;
5526         }
5527       iv_ca_recount_cost (data, ivs);
5528     }
5529 }
5530
5531 /* Extend set IVS by expressing USE by some of the candidates in it
5532    if possible. All important candidates will be considered
5533    if IMPORTANT_CANDIDATES is true.  */
5534
5535 static void
5536 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5537                struct iv_use *use, bool important_candidates)
5538 {
5539   struct cost_pair *best_cp = NULL, *cp;
5540   bitmap_iterator bi;
5541   bitmap cands;
5542   unsigned i;
5543
5544   gcc_assert (ivs->upto >= use->id);
5545
5546   if (ivs->upto == use->id)
5547     {
5548       ivs->upto++;
5549       ivs->bad_uses++;
5550     }
5551
5552   cands = (important_candidates ? data->important_candidates : ivs->cands);
5553   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5554     {
5555       struct iv_cand *cand = iv_cand (data, i);
5556
5557       cp = get_use_iv_cost (data, use, cand);
5558
5559       if (cheaper_cost_pair (cp, best_cp))
5560         best_cp = cp;
5561     }
5562
5563   iv_ca_set_cp (data, ivs, use, best_cp);
5564 }
5565
5566 /* Get cost for assignment IVS.  */
5567
5568 static comp_cost
5569 iv_ca_cost (struct iv_ca *ivs)
5570 {
5571   /* This was a conditional expression but it triggered a bug in
5572      Sun C 5.5.  */
5573   if (ivs->bad_uses)
5574     return infinite_cost;
5575   else
5576     return ivs->cost;
5577 }
5578
5579 /* Returns true if all dependences of CP are among invariants in IVS.  */
5580
5581 static bool
5582 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5583 {
5584   unsigned i;
5585   bitmap_iterator bi;
5586
5587   if (!cp->depends_on)
5588     return true;
5589
5590   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5591     {
5592       if (ivs->n_invariant_uses[i] == 0)
5593         return false;
5594     }
5595
5596   return true;
5597 }
5598
5599 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5600    it before NEXT_CHANGE.  */
5601
5602 static struct iv_ca_delta *
5603 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5604                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5605 {
5606   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5607
5608   change->use = use;
5609   change->old_cp = old_cp;
5610   change->new_cp = new_cp;
5611   change->next_change = next_change;
5612
5613   return change;
5614 }
5615
5616 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5617    are rewritten.  */
5618
5619 static struct iv_ca_delta *
5620 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5621 {
5622   struct iv_ca_delta *last;
5623
5624   if (!l2)
5625     return l1;
5626
5627   if (!l1)
5628     return l2;
5629
5630   for (last = l1; last->next_change; last = last->next_change)
5631     continue;
5632   last->next_change = l2;
5633
5634   return l1;
5635 }
5636
5637 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5638
5639 static struct iv_ca_delta *
5640 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5641 {
5642   struct iv_ca_delta *act, *next, *prev = NULL;
5643   struct cost_pair *tmp;
5644
5645   for (act = delta; act; act = next)
5646     {
5647       next = act->next_change;
5648       act->next_change = prev;
5649       prev = act;
5650
5651       tmp = act->old_cp;
5652       act->old_cp = act->new_cp;
5653       act->new_cp = tmp;
5654     }
5655
5656   return prev;
5657 }
5658
5659 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5660    reverted instead.  */
5661
5662 static void
5663 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5664                     struct iv_ca_delta *delta, bool forward)
5665 {
5666   struct cost_pair *from, *to;
5667   struct iv_ca_delta *act;
5668
5669   if (!forward)
5670     delta = iv_ca_delta_reverse (delta);
5671
5672   for (act = delta; act; act = act->next_change)
5673     {
5674       from = act->old_cp;
5675       to = act->new_cp;
5676       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5677       iv_ca_set_cp (data, ivs, act->use, to);
5678     }
5679
5680   if (!forward)
5681     iv_ca_delta_reverse (delta);
5682 }
5683
5684 /* Returns true if CAND is used in IVS.  */
5685
5686 static bool
5687 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5688 {
5689   return ivs->n_cand_uses[cand->id] > 0;
5690 }
5691
5692 /* Returns number of induction variable candidates in the set IVS.  */
5693
5694 static unsigned
5695 iv_ca_n_cands (struct iv_ca *ivs)
5696 {
5697   return ivs->n_cands;
5698 }
5699
5700 /* Free the list of changes DELTA.  */
5701
5702 static void
5703 iv_ca_delta_free (struct iv_ca_delta **delta)
5704 {
5705   struct iv_ca_delta *act, *next;
5706
5707   for (act = *delta; act; act = next)
5708     {
5709       next = act->next_change;
5710       free (act);
5711     }
5712
5713   *delta = NULL;
5714 }
5715
5716 /* Allocates new iv candidates assignment.  */
5717
5718 static struct iv_ca *
5719 iv_ca_new (struct ivopts_data *data)
5720 {
5721   struct iv_ca *nw = XNEW (struct iv_ca);
5722
5723   nw->upto = 0;
5724   nw->bad_uses = 0;
5725   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5726   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5727   nw->cands = BITMAP_ALLOC (NULL);
5728   nw->n_cands = 0;
5729   nw->n_regs = 0;
5730   nw->cand_use_cost = no_cost;
5731   nw->cand_cost = 0;
5732   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5733   nw->cost = no_cost;
5734   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5735   nw->num_used_inv_expr = 0;
5736
5737   return nw;
5738 }
5739
5740 /* Free memory occupied by the set IVS.  */
5741
5742 static void
5743 iv_ca_free (struct iv_ca **ivs)
5744 {
5745   free ((*ivs)->cand_for_use);
5746   free ((*ivs)->n_cand_uses);
5747   BITMAP_FREE ((*ivs)->cands);
5748   free ((*ivs)->n_invariant_uses);
5749   free ((*ivs)->used_inv_expr);
5750   free (*ivs);
5751   *ivs = NULL;
5752 }
5753
5754 /* Dumps IVS to FILE.  */
5755
5756 static void
5757 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5758 {
5759   const char *pref = "  invariants ";
5760   unsigned i;
5761   comp_cost cost = iv_ca_cost (ivs);
5762
5763   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5764   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5765            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5766   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5767
5768    for (i = 0; i < ivs->upto; i++)
5769     {
5770       struct iv_use *use = iv_use (data, i);
5771       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5772       if (cp)
5773         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5774                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5775       else
5776         fprintf (file, "   use:%d --> ??\n", use->id);
5777     }
5778
5779   for (i = 1; i <= data->max_inv_id; i++)
5780     if (ivs->n_invariant_uses[i])
5781       {
5782         fprintf (file, "%s%d", pref, i);
5783         pref = ", ";
5784       }
5785   fprintf (file, "\n\n");
5786 }
5787
5788 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5789    new set, and store differences in DELTA.  Number of induction variables
5790    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5791    the function will try to find a solution with mimimal iv candidates.  */
5792
5793 static comp_cost
5794 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5795               struct iv_cand *cand, struct iv_ca_delta **delta,
5796               unsigned *n_ivs, bool min_ncand)
5797 {
5798   unsigned i;
5799   comp_cost cost;
5800   struct iv_use *use;
5801   struct cost_pair *old_cp, *new_cp;
5802
5803   *delta = NULL;
5804   for (i = 0; i < ivs->upto; i++)
5805     {
5806       use = iv_use (data, i);
5807       old_cp = iv_ca_cand_for_use (ivs, use);
5808
5809       if (old_cp
5810           && old_cp->cand == cand)
5811         continue;
5812
5813       new_cp = get_use_iv_cost (data, use, cand);
5814       if (!new_cp)
5815         continue;
5816
5817       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5818         continue;
5819
5820       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5821         continue;
5822
5823       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5824     }
5825
5826   iv_ca_delta_commit (data, ivs, *delta, true);
5827   cost = iv_ca_cost (ivs);
5828   if (n_ivs)
5829     *n_ivs = iv_ca_n_cands (ivs);
5830   iv_ca_delta_commit (data, ivs, *delta, false);
5831
5832   return cost;
5833 }
5834
5835 /* Try narrowing set IVS by removing CAND.  Return the cost of
5836    the new set and store the differences in DELTA.  */
5837
5838 static comp_cost
5839 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5840               struct iv_cand *cand, struct iv_ca_delta **delta)
5841 {
5842   unsigned i, ci;
5843   struct iv_use *use;
5844   struct cost_pair *old_cp, *new_cp, *cp;
5845   bitmap_iterator bi;
5846   struct iv_cand *cnd;
5847   comp_cost cost;
5848
5849   *delta = NULL;
5850   for (i = 0; i < n_iv_uses (data); i++)
5851     {
5852       use = iv_use (data, i);
5853
5854       old_cp = iv_ca_cand_for_use (ivs, use);
5855       if (old_cp->cand != cand)
5856         continue;
5857
5858       new_cp = NULL;
5859
5860       if (data->consider_all_candidates)
5861         {
5862           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5863             {
5864               if (ci == cand->id)
5865                 continue;
5866
5867               cnd = iv_cand (data, ci);
5868
5869               cp = get_use_iv_cost (data, use, cnd);
5870               if (!cp)
5871                 continue;
5872
5873               if (!iv_ca_has_deps (ivs, cp))
5874                 continue;
5875
5876               if (!cheaper_cost_pair (cp, new_cp))
5877                 continue;
5878
5879               new_cp = cp;
5880             }
5881         }
5882       else
5883         {
5884           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5885             {
5886               if (ci == cand->id)
5887                 continue;
5888
5889               cnd = iv_cand (data, ci);
5890
5891               cp = get_use_iv_cost (data, use, cnd);
5892               if (!cp)
5893                 continue;
5894               if (!iv_ca_has_deps (ivs, cp))
5895                 continue;
5896
5897               if (!cheaper_cost_pair (cp, new_cp))
5898                 continue;
5899
5900               new_cp = cp;
5901             }
5902         }
5903
5904       if (!new_cp)
5905         {
5906           iv_ca_delta_free (delta);
5907           return infinite_cost;
5908         }
5909
5910       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5911     }
5912
5913   iv_ca_delta_commit (data, ivs, *delta, true);
5914   cost = iv_ca_cost (ivs);
5915   iv_ca_delta_commit (data, ivs, *delta, false);
5916
5917   return cost;
5918 }
5919
5920 /* Try optimizing the set of candidates IVS by removing candidates different
5921    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5922    differences in DELTA.  */
5923
5924 static comp_cost
5925 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5926              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5927 {
5928   bitmap_iterator bi;
5929   struct iv_ca_delta *act_delta, *best_delta;
5930   unsigned i;
5931   comp_cost best_cost, acost;
5932   struct iv_cand *cand;
5933
5934   best_delta = NULL;
5935   best_cost = iv_ca_cost (ivs);
5936
5937   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5938     {
5939       cand = iv_cand (data, i);
5940
5941       if (cand == except_cand)
5942         continue;
5943
5944       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5945
5946       if (compare_costs (acost, best_cost) < 0)
5947         {
5948           best_cost = acost;
5949           iv_ca_delta_free (&best_delta);
5950           best_delta = act_delta;
5951         }
5952       else
5953         iv_ca_delta_free (&act_delta);
5954     }
5955
5956   if (!best_delta)
5957     {
5958       *delta = NULL;
5959       return best_cost;
5960     }
5961
5962   /* Recurse to possibly remove other unnecessary ivs.  */
5963   iv_ca_delta_commit (data, ivs, best_delta, true);
5964   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5965   iv_ca_delta_commit (data, ivs, best_delta, false);
5966   *delta = iv_ca_delta_join (best_delta, *delta);
5967   return best_cost;
5968 }
5969
5970 /* Tries to extend the sets IVS in the best possible way in order
5971    to express the USE.  If ORIGINALP is true, prefer candidates from
5972    the original set of IVs, otherwise favor important candidates not
5973    based on any memory object.  */
5974
5975 static bool
5976 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5977                   struct iv_use *use, bool originalp)
5978 {
5979   comp_cost best_cost, act_cost;
5980   unsigned i;
5981   bitmap_iterator bi;
5982   struct iv_cand *cand;
5983   struct iv_ca_delta *best_delta = NULL, *act_delta;
5984   struct cost_pair *cp;
5985
5986   iv_ca_add_use (data, ivs, use, false);
5987   best_cost = iv_ca_cost (ivs);
5988
5989   cp = iv_ca_cand_for_use (ivs, use);
5990   if (!cp)
5991     {
5992       ivs->upto--;
5993       ivs->bad_uses--;
5994       iv_ca_add_use (data, ivs, use, true);
5995       best_cost = iv_ca_cost (ivs);
5996       cp = iv_ca_cand_for_use (ivs, use);
5997     }
5998   if (cp)
5999     {
6000       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6001       iv_ca_set_no_cp (data, ivs, use);
6002     }
6003
6004   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6005      first try important candidates not based on any memory object.  Only if
6006      this fails, try the specific ones.  Rationale -- in loops with many
6007      variables the best choice often is to use just one generic biv.  If we
6008      added here many ivs specific to the uses, the optimization algorithm later
6009      would be likely to get stuck in a local minimum, thus causing us to create
6010      too many ivs.  The approach from few ivs to more seems more likely to be
6011      successful -- starting from few ivs, replacing an expensive use by a
6012      specific iv should always be a win.  */
6013   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6014     {
6015       cand = iv_cand (data, i);
6016
6017       if (originalp && cand->pos !=IP_ORIGINAL)
6018         continue;
6019
6020       if (!originalp && cand->iv->base_object != NULL_TREE)
6021         continue;
6022
6023       if (iv_ca_cand_used_p (ivs, cand))
6024         continue;
6025
6026       cp = get_use_iv_cost (data, use, cand);
6027       if (!cp)
6028         continue;
6029
6030       iv_ca_set_cp (data, ivs, use, cp);
6031       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6032                                true);
6033       iv_ca_set_no_cp (data, ivs, use);
6034       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6035
6036       if (compare_costs (act_cost, best_cost) < 0)
6037         {
6038           best_cost = act_cost;
6039
6040           iv_ca_delta_free (&best_delta);
6041           best_delta = act_delta;
6042         }
6043       else
6044         iv_ca_delta_free (&act_delta);
6045     }
6046
6047   if (infinite_cost_p (best_cost))
6048     {
6049       for (i = 0; i < use->n_map_members; i++)
6050         {
6051           cp = use->cost_map + i;
6052           cand = cp->cand;
6053           if (!cand)
6054             continue;
6055
6056           /* Already tried this.  */
6057           if (cand->important)
6058             {
6059               if (originalp && cand->pos == IP_ORIGINAL)
6060                 continue;
6061               if (!originalp && cand->iv->base_object == NULL_TREE)
6062                 continue;
6063             }
6064
6065           if (iv_ca_cand_used_p (ivs, cand))
6066             continue;
6067
6068           act_delta = NULL;
6069           iv_ca_set_cp (data, ivs, use, cp);
6070           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6071           iv_ca_set_no_cp (data, ivs, use);
6072           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6073                                        cp, act_delta);
6074
6075           if (compare_costs (act_cost, best_cost) < 0)
6076             {
6077               best_cost = act_cost;
6078
6079               if (best_delta)
6080                 iv_ca_delta_free (&best_delta);
6081               best_delta = act_delta;
6082             }
6083           else
6084             iv_ca_delta_free (&act_delta);
6085         }
6086     }
6087
6088   iv_ca_delta_commit (data, ivs, best_delta, true);
6089   iv_ca_delta_free (&best_delta);
6090
6091   return !infinite_cost_p (best_cost);
6092 }
6093
6094 /* Finds an initial assignment of candidates to uses.  */
6095
6096 static struct iv_ca *
6097 get_initial_solution (struct ivopts_data *data, bool originalp)
6098 {
6099   struct iv_ca *ivs = iv_ca_new (data);
6100   unsigned i;
6101
6102   for (i = 0; i < n_iv_uses (data); i++)
6103     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6104       {
6105         iv_ca_free (&ivs);
6106         return NULL;
6107       }
6108
6109   return ivs;
6110 }
6111
6112 /* Tries to improve set of induction variables IVS.  */
6113
6114 static bool
6115 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6116 {
6117   unsigned i, n_ivs;
6118   comp_cost acost, best_cost = iv_ca_cost (ivs);
6119   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6120   struct iv_cand *cand;
6121
6122   /* Try extending the set of induction variables by one.  */
6123   for (i = 0; i < n_iv_cands (data); i++)
6124     {
6125       cand = iv_cand (data, i);
6126
6127       if (iv_ca_cand_used_p (ivs, cand))
6128         continue;
6129
6130       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6131       if (!act_delta)
6132         continue;
6133
6134       /* If we successfully added the candidate and the set is small enough,
6135          try optimizing it by removing other candidates.  */
6136       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6137         {
6138           iv_ca_delta_commit (data, ivs, act_delta, true);
6139           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6140           iv_ca_delta_commit (data, ivs, act_delta, false);
6141           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6142         }
6143
6144       if (compare_costs (acost, best_cost) < 0)
6145         {
6146           best_cost = acost;
6147           iv_ca_delta_free (&best_delta);
6148           best_delta = act_delta;
6149         }
6150       else
6151         iv_ca_delta_free (&act_delta);
6152     }
6153
6154   if (!best_delta)
6155     {
6156       /* Try removing the candidates from the set instead.  */
6157       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6158
6159       /* Nothing more we can do.  */
6160       if (!best_delta)
6161         return false;
6162     }
6163
6164   iv_ca_delta_commit (data, ivs, best_delta, true);
6165   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6166   iv_ca_delta_free (&best_delta);
6167   return true;
6168 }
6169
6170 /* Attempts to find the optimal set of induction variables.  We do simple
6171    greedy heuristic -- we try to replace at most one candidate in the selected
6172    solution and remove the unused ivs while this improves the cost.  */
6173
6174 static struct iv_ca *
6175 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6176 {
6177   struct iv_ca *set;
6178
6179   /* Get the initial solution.  */
6180   set = get_initial_solution (data, originalp);
6181   if (!set)
6182     {
6183       if (dump_file && (dump_flags & TDF_DETAILS))
6184         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6185       return NULL;
6186     }
6187
6188   if (dump_file && (dump_flags & TDF_DETAILS))
6189     {
6190       fprintf (dump_file, "Initial set of candidates:\n");
6191       iv_ca_dump (data, dump_file, set);
6192     }
6193
6194   while (try_improve_iv_set (data, set))
6195     {
6196       if (dump_file && (dump_flags & TDF_DETAILS))
6197         {
6198           fprintf (dump_file, "Improved to:\n");
6199           iv_ca_dump (data, dump_file, set);
6200         }
6201     }
6202
6203   return set;
6204 }
6205
6206 static struct iv_ca *
6207 find_optimal_iv_set (struct ivopts_data *data)
6208 {
6209   unsigned i;
6210   struct iv_ca *set, *origset;
6211   struct iv_use *use;
6212   comp_cost cost, origcost;
6213
6214   /* Determine the cost based on a strategy that starts with original IVs,
6215      and try again using a strategy that prefers candidates not based
6216      on any IVs.  */
6217   origset = find_optimal_iv_set_1 (data, true);
6218   set = find_optimal_iv_set_1 (data, false);
6219
6220   if (!origset && !set)
6221     return NULL;
6222
6223   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6224   cost = set ? iv_ca_cost (set) : infinite_cost;
6225
6226   if (dump_file && (dump_flags & TDF_DETAILS))
6227     {
6228       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6229                origcost.cost, origcost.complexity);
6230       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6231                cost.cost, cost.complexity);
6232     }
6233
6234   /* Choose the one with the best cost.  */
6235   if (compare_costs (origcost, cost) <= 0)
6236     {
6237       if (set)
6238         iv_ca_free (&set);
6239       set = origset;
6240     }
6241   else if (origset)
6242     iv_ca_free (&origset);
6243
6244   for (i = 0; i < n_iv_uses (data); i++)
6245     {
6246       use = iv_use (data, i);
6247       use->selected = iv_ca_cand_for_use (set, use)->cand;
6248     }
6249
6250   return set;
6251 }
6252
6253 /* Creates a new induction variable corresponding to CAND.  */
6254
6255 static void
6256 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6257 {
6258   gimple_stmt_iterator incr_pos;
6259   tree base;
6260   bool after = false;
6261
6262   if (!cand->iv)
6263     return;
6264
6265   switch (cand->pos)
6266     {
6267     case IP_NORMAL:
6268       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6269       break;
6270
6271     case IP_END:
6272       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6273       after = true;
6274       break;
6275
6276     case IP_AFTER_USE:
6277       after = true;
6278       /* fall through */
6279     case IP_BEFORE_USE:
6280       incr_pos = gsi_for_stmt (cand->incremented_at);
6281       break;
6282
6283     case IP_ORIGINAL:
6284       /* Mark that the iv is preserved.  */
6285       name_info (data, cand->var_before)->preserve_biv = true;
6286       name_info (data, cand->var_after)->preserve_biv = true;
6287
6288       /* Rewrite the increment so that it uses var_before directly.  */
6289       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6290       return;
6291     }
6292
6293   gimple_add_tmp_var (cand->var_before);
6294   add_referenced_var (cand->var_before);
6295
6296   base = unshare_expr (cand->iv->base);
6297
6298   create_iv (base, unshare_expr (cand->iv->step),
6299              cand->var_before, data->current_loop,
6300              &incr_pos, after, &cand->var_before, &cand->var_after);
6301 }
6302
6303 /* Creates new induction variables described in SET.  */
6304
6305 static void
6306 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6307 {
6308   unsigned i;
6309   struct iv_cand *cand;
6310   bitmap_iterator bi;
6311
6312   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6313     {
6314       cand = iv_cand (data, i);
6315       create_new_iv (data, cand);
6316     }
6317
6318   if (dump_file && (dump_flags & TDF_DETAILS))
6319     {
6320       fprintf (dump_file, "\nSelected IV set: \n");
6321       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6322         {
6323           cand = iv_cand (data, i);
6324           dump_cand (dump_file, cand);
6325         }
6326       fprintf (dump_file, "\n");
6327     }
6328 }
6329
6330 /* Rewrites USE (definition of iv used in a nonlinear expression)
6331    using candidate CAND.  */
6332
6333 static void
6334 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6335                             struct iv_use *use, struct iv_cand *cand)
6336 {
6337   tree comp;
6338   tree op, tgt;
6339   gimple ass;
6340   gimple_stmt_iterator bsi;
6341
6342   /* An important special case -- if we are asked to express value of
6343      the original iv by itself, just exit; there is no need to
6344      introduce a new computation (that might also need casting the
6345      variable to unsigned and back).  */
6346   if (cand->pos == IP_ORIGINAL
6347       && cand->incremented_at == use->stmt)
6348     {
6349       tree step, ctype, utype;
6350       enum tree_code incr_code = PLUS_EXPR, old_code;
6351
6352       gcc_assert (is_gimple_assign (use->stmt));
6353       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6354
6355       step = cand->iv->step;
6356       ctype = TREE_TYPE (step);
6357       utype = TREE_TYPE (cand->var_after);
6358       if (TREE_CODE (step) == NEGATE_EXPR)
6359         {
6360           incr_code = MINUS_EXPR;
6361           step = TREE_OPERAND (step, 0);
6362         }
6363
6364       /* Check whether we may leave the computation unchanged.
6365          This is the case only if it does not rely on other
6366          computations in the loop -- otherwise, the computation
6367          we rely upon may be removed in remove_unused_ivs,
6368          thus leading to ICE.  */
6369       old_code = gimple_assign_rhs_code (use->stmt);
6370       if (old_code == PLUS_EXPR
6371           || old_code == MINUS_EXPR
6372           || old_code == POINTER_PLUS_EXPR)
6373         {
6374           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6375             op = gimple_assign_rhs2 (use->stmt);
6376           else if (old_code != MINUS_EXPR
6377                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6378             op = gimple_assign_rhs1 (use->stmt);
6379           else
6380             op = NULL_TREE;
6381         }
6382       else
6383         op = NULL_TREE;
6384
6385       if (op
6386           && (TREE_CODE (op) == INTEGER_CST
6387               || operand_equal_p (op, step, 0)))
6388         return;
6389
6390       /* Otherwise, add the necessary computations to express
6391          the iv.  */
6392       op = fold_convert (ctype, cand->var_before);
6393       comp = fold_convert (utype,
6394                            build2 (incr_code, ctype, op,
6395                                    unshare_expr (step)));
6396     }
6397   else
6398     {
6399       comp = get_computation (data->current_loop, use, cand);
6400       gcc_assert (comp != NULL_TREE);
6401     }
6402
6403   switch (gimple_code (use->stmt))
6404     {
6405     case GIMPLE_PHI:
6406       tgt = PHI_RESULT (use->stmt);
6407
6408       /* If we should keep the biv, do not replace it.  */
6409       if (name_info (data, tgt)->preserve_biv)
6410         return;
6411
6412       bsi = gsi_after_labels (gimple_bb (use->stmt));
6413       break;
6414
6415     case GIMPLE_ASSIGN:
6416       tgt = gimple_assign_lhs (use->stmt);
6417       bsi = gsi_for_stmt (use->stmt);
6418       break;
6419
6420     default:
6421       gcc_unreachable ();
6422     }
6423
6424   if (!valid_gimple_rhs_p (comp)
6425       || (gimple_code (use->stmt) != GIMPLE_PHI
6426           /* We can't allow re-allocating the stmt as it might be pointed
6427              to still.  */
6428           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6429               >= gimple_num_ops (gsi_stmt (bsi)))))
6430     {
6431       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6432                                        true, GSI_SAME_STMT);
6433       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6434         {
6435           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6436           /* As this isn't a plain copy we have to reset alignment
6437              information.  */
6438           if (SSA_NAME_PTR_INFO (comp))
6439             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6440         }
6441     }
6442
6443   if (gimple_code (use->stmt) == GIMPLE_PHI)
6444     {
6445       ass = gimple_build_assign (tgt, comp);
6446       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6447
6448       bsi = gsi_for_stmt (use->stmt);
6449       remove_phi_node (&bsi, false);
6450     }
6451   else
6452     {
6453       gimple_assign_set_rhs_from_tree (&bsi, comp);
6454       use->stmt = gsi_stmt (bsi);
6455     }
6456 }
6457
6458 /* Performs a peephole optimization to reorder the iv update statement with
6459    a mem ref to enable instruction combining in later phases. The mem ref uses
6460    the iv value before the update, so the reordering transformation requires
6461    adjustment of the offset. CAND is the selected IV_CAND.
6462
6463    Example:
6464
6465    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6466    iv2 = iv1 + 1;
6467
6468    if (t < val)      (1)
6469      goto L;
6470    goto Head;
6471
6472
6473    directly propagating t over to (1) will introduce overlapping live range
6474    thus increase register pressure. This peephole transform it into:
6475
6476
6477    iv2 = iv1 + 1;
6478    t = MEM_REF (base, iv2, 8, 8);
6479    if (t < val)
6480      goto L;
6481    goto Head;
6482 */
6483
6484 static void
6485 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6486 {
6487   tree var_after;
6488   gimple iv_update, stmt;
6489   basic_block bb;
6490   gimple_stmt_iterator gsi, gsi_iv;
6491
6492   if (cand->pos != IP_NORMAL)
6493     return;
6494
6495   var_after = cand->var_after;
6496   iv_update = SSA_NAME_DEF_STMT (var_after);
6497
6498   bb = gimple_bb (iv_update);
6499   gsi = gsi_last_nondebug_bb (bb);
6500   stmt = gsi_stmt (gsi);
6501
6502   /* Only handle conditional statement for now.  */
6503   if (gimple_code (stmt) != GIMPLE_COND)
6504     return;
6505
6506   gsi_prev_nondebug (&gsi);
6507   stmt = gsi_stmt (gsi);
6508   if (stmt != iv_update)
6509     return;
6510
6511   gsi_prev_nondebug (&gsi);
6512   if (gsi_end_p (gsi))
6513     return;
6514
6515   stmt = gsi_stmt (gsi);
6516   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6517     return;
6518
6519   if (stmt != use->stmt)
6520     return;
6521
6522   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6523     return;
6524
6525   if (dump_file && (dump_flags & TDF_DETAILS))
6526     {
6527       fprintf (dump_file, "Reordering \n");
6528       print_gimple_stmt (dump_file, iv_update, 0, 0);
6529       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6530       fprintf (dump_file, "\n");
6531     }
6532
6533   gsi = gsi_for_stmt (use->stmt);
6534   gsi_iv = gsi_for_stmt (iv_update);
6535   gsi_move_before (&gsi_iv, &gsi);
6536
6537   cand->pos = IP_BEFORE_USE;
6538   cand->incremented_at = use->stmt;
6539 }
6540
6541 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6542
6543 static void
6544 rewrite_use_address (struct ivopts_data *data,
6545                      struct iv_use *use, struct iv_cand *cand)
6546 {
6547   aff_tree aff;
6548   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6549   tree base_hint = NULL_TREE;
6550   tree ref, iv;
6551   bool ok;
6552
6553   adjust_iv_update_pos (cand, use);
6554   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6555   gcc_assert (ok);
6556   unshare_aff_combination (&aff);
6557
6558   /* To avoid undefined overflow problems, all IV candidates use unsigned
6559      integer types.  The drawback is that this makes it impossible for
6560      create_mem_ref to distinguish an IV that is based on a memory object
6561      from one that represents simply an offset.
6562
6563      To work around this problem, we pass a hint to create_mem_ref that
6564      indicates which variable (if any) in aff is an IV based on a memory
6565      object.  Note that we only consider the candidate.  If this is not
6566      based on an object, the base of the reference is in some subexpression
6567      of the use -- but these will use pointer types, so they are recognized
6568      by the create_mem_ref heuristics anyway.  */
6569   if (cand->iv->base_object)
6570     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6571
6572   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6573   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6574                         reference_alias_ptr_type (*use->op_p),
6575                         iv, base_hint, data->speed);
6576   copy_ref_info (ref, *use->op_p);
6577   *use->op_p = ref;
6578 }
6579
6580 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6581    candidate CAND.  */
6582
6583 static void
6584 rewrite_use_compare (struct ivopts_data *data,
6585                      struct iv_use *use, struct iv_cand *cand)
6586 {
6587   tree comp, *var_p, op, bound;
6588   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6589   enum tree_code compare;
6590   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6591   bool ok;
6592
6593   bound = cp->value;
6594   if (bound)
6595     {
6596       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6597       tree var_type = TREE_TYPE (var);
6598       gimple_seq stmts;
6599
6600       if (dump_file && (dump_flags & TDF_DETAILS))
6601         {
6602           fprintf (dump_file, "Replacing exit test: ");
6603           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6604         }
6605       compare = cp->comp;
6606       bound = unshare_expr (fold_convert (var_type, bound));
6607       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6608       if (stmts)
6609         gsi_insert_seq_on_edge_immediate (
6610                 loop_preheader_edge (data->current_loop),
6611                 stmts);
6612
6613       gimple_cond_set_lhs (use->stmt, var);
6614       gimple_cond_set_code (use->stmt, compare);
6615       gimple_cond_set_rhs (use->stmt, op);
6616       return;
6617     }
6618
6619   /* The induction variable elimination failed; just express the original
6620      giv.  */
6621   comp = get_computation (data->current_loop, use, cand);
6622   gcc_assert (comp != NULL_TREE);
6623
6624   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6625   gcc_assert (ok);
6626
6627   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6628                                      true, GSI_SAME_STMT);
6629 }
6630
6631 /* Rewrites USE using candidate CAND.  */
6632
6633 static void
6634 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6635 {
6636   switch (use->type)
6637     {
6638       case USE_NONLINEAR_EXPR:
6639         rewrite_use_nonlinear_expr (data, use, cand);
6640         break;
6641
6642       case USE_ADDRESS:
6643         rewrite_use_address (data, use, cand);
6644         break;
6645
6646       case USE_COMPARE:
6647         rewrite_use_compare (data, use, cand);
6648         break;
6649
6650       default:
6651         gcc_unreachable ();
6652     }
6653
6654   update_stmt (use->stmt);
6655 }
6656
6657 /* Rewrite the uses using the selected induction variables.  */
6658
6659 static void
6660 rewrite_uses (struct ivopts_data *data)
6661 {
6662   unsigned i;
6663   struct iv_cand *cand;
6664   struct iv_use *use;
6665
6666   for (i = 0; i < n_iv_uses (data); i++)
6667     {
6668       use = iv_use (data, i);
6669       cand = use->selected;
6670       gcc_assert (cand);
6671
6672       rewrite_use (data, use, cand);
6673     }
6674 }
6675
6676 /* Removes the ivs that are not used after rewriting.  */
6677
6678 static void
6679 remove_unused_ivs (struct ivopts_data *data)
6680 {
6681   unsigned j;
6682   bitmap_iterator bi;
6683   bitmap toremove = BITMAP_ALLOC (NULL);
6684
6685   /* Figure out an order in which to release SSA DEFs so that we don't
6686      release something that we'd have to propagate into a debug stmt
6687      afterwards.  */
6688   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6689     {
6690       struct version_info *info;
6691
6692       info = ver_info (data, j);
6693       if (info->iv
6694           && !integer_zerop (info->iv->step)
6695           && !info->inv_id
6696           && !info->iv->have_use_for
6697           && !info->preserve_biv)
6698         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6699     }
6700
6701   release_defs_bitset (toremove);
6702
6703   BITMAP_FREE (toremove);
6704 }
6705
6706 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6707    for pointer_map_traverse.  */
6708
6709 static bool
6710 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6711                       void *data ATTRIBUTE_UNUSED)
6712 {
6713   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6714
6715   free (niter);
6716   return true;
6717 }
6718
6719 /* Frees data allocated by the optimization of a single loop.  */
6720
6721 static void
6722 free_loop_data (struct ivopts_data *data)
6723 {
6724   unsigned i, j;
6725   bitmap_iterator bi;
6726   tree obj;
6727
6728   if (data->niters)
6729     {
6730       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6731       pointer_map_destroy (data->niters);
6732       data->niters = NULL;
6733     }
6734
6735   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6736     {
6737       struct version_info *info;
6738
6739       info = ver_info (data, i);
6740       free (info->iv);
6741       info->iv = NULL;
6742       info->has_nonlin_use = false;
6743       info->preserve_biv = false;
6744       info->inv_id = 0;
6745     }
6746   bitmap_clear (data->relevant);
6747   bitmap_clear (data->important_candidates);
6748
6749   for (i = 0; i < n_iv_uses (data); i++)
6750     {
6751       struct iv_use *use = iv_use (data, i);
6752
6753       free (use->iv);
6754       BITMAP_FREE (use->related_cands);
6755       for (j = 0; j < use->n_map_members; j++)
6756         if (use->cost_map[j].depends_on)
6757           BITMAP_FREE (use->cost_map[j].depends_on);
6758       free (use->cost_map);
6759       free (use);
6760     }
6761   VEC_truncate (iv_use_p, data->iv_uses, 0);
6762
6763   for (i = 0; i < n_iv_cands (data); i++)
6764     {
6765       struct iv_cand *cand = iv_cand (data, i);
6766
6767       free (cand->iv);
6768       if (cand->depends_on)
6769         BITMAP_FREE (cand->depends_on);
6770       free (cand);
6771     }
6772   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6773
6774   if (data->version_info_size < num_ssa_names)
6775     {
6776       data->version_info_size = 2 * num_ssa_names;
6777       free (data->version_info);
6778       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6779     }
6780
6781   data->max_inv_id = 0;
6782
6783   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6784     SET_DECL_RTL (obj, NULL_RTX);
6785
6786   VEC_truncate (tree, decl_rtl_to_reset, 0);
6787
6788   htab_empty (data->inv_expr_tab);
6789   data->inv_expr_id = 0;
6790 }
6791
6792 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6793    loop tree.  */
6794
6795 static void
6796 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6797 {
6798   free_loop_data (data);
6799   free (data->version_info);
6800   BITMAP_FREE (data->relevant);
6801   BITMAP_FREE (data->important_candidates);
6802
6803   VEC_free (tree, heap, decl_rtl_to_reset);
6804   VEC_free (iv_use_p, heap, data->iv_uses);
6805   VEC_free (iv_cand_p, heap, data->iv_candidates);
6806   htab_delete (data->inv_expr_tab);
6807
6808   finalize_costs ();
6809 }
6810
6811 /* Returns true if the loop body BODY includes any function calls.  */
6812
6813 static bool
6814 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6815 {
6816   gimple_stmt_iterator gsi;
6817   unsigned i;
6818
6819   for (i = 0; i < num_nodes; i++)
6820     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6821       {
6822         gimple stmt = gsi_stmt (gsi);
6823         if (is_gimple_call (stmt)
6824             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6825           return true;
6826       }
6827   return false;
6828 }
6829
6830 /* Optimizes the LOOP.  Returns true if anything changed.  */
6831
6832 static bool
6833 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6834 {
6835   bool changed = false;
6836   struct iv_ca *iv_ca;
6837   edge exit = single_dom_exit (loop);
6838   basic_block *body;
6839
6840   gcc_assert (!data->niters);
6841   data->current_loop = loop;
6842   data->speed = optimize_loop_for_speed_p (loop);
6843
6844   if (dump_file && (dump_flags & TDF_DETAILS))
6845     {
6846       fprintf (dump_file, "Processing loop %d\n", loop->num);
6847
6848       if (exit)
6849         {
6850           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6851                    exit->src->index, exit->dest->index);
6852           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6853           fprintf (dump_file, "\n");
6854         }
6855
6856       fprintf (dump_file, "\n");
6857     }
6858
6859   body = get_loop_body (loop);
6860   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6861   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6862   free (body);
6863
6864   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6865
6866   /* For each ssa name determines whether it behaves as an induction variable
6867      in some loop.  */
6868   if (!find_induction_variables (data))
6869     goto finish;
6870
6871   /* Finds interesting uses (item 1).  */
6872   find_interesting_uses (data);
6873   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6874     goto finish;
6875
6876   /* Finds candidates for the induction variables (item 2).  */
6877   find_iv_candidates (data);
6878
6879   /* Calculates the costs (item 3, part 1).  */
6880   determine_iv_costs (data);
6881   determine_use_iv_costs (data);
6882   determine_set_costs (data);
6883
6884   /* Find the optimal set of induction variables (item 3, part 2).  */
6885   iv_ca = find_optimal_iv_set (data);
6886   if (!iv_ca)
6887     goto finish;
6888   changed = true;
6889
6890   /* Create the new induction variables (item 4, part 1).  */
6891   create_new_ivs (data, iv_ca);
6892   iv_ca_free (&iv_ca);
6893
6894   /* Rewrite the uses (item 4, part 2).  */
6895   rewrite_uses (data);
6896
6897   /* Remove the ivs that are unused after rewriting.  */
6898   remove_unused_ivs (data);
6899
6900   /* We have changed the structure of induction variables; it might happen
6901      that definitions in the scev database refer to some of them that were
6902      eliminated.  */
6903   scev_reset ();
6904
6905 finish:
6906   free_loop_data (data);
6907
6908   return changed;
6909 }
6910
6911 /* Main entry point.  Optimizes induction variables in loops.  */
6912
6913 void
6914 tree_ssa_iv_optimize (void)
6915 {
6916   struct loop *loop;
6917   struct ivopts_data data;
6918   loop_iterator li;
6919
6920   tree_ssa_iv_optimize_init (&data);
6921
6922   /* Optimize the loops starting with the innermost ones.  */
6923   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6924     {
6925       if (dump_file && (dump_flags & TDF_DETAILS))
6926         flow_loop_dump (loop, dump_file, NULL, 1);
6927
6928       tree_ssa_iv_optimize_loop (&data, loop);
6929     }
6930
6931   tree_ssa_iv_optimize_finalize (&data);
6932 }