gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "rtl.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "cfghooks.h"
  72 #include "tree-pass.h"
  73 #include "tm_p.h"
  74 #include "ssa.h"
  75 #include "expmed.h"
  76 #include "insn-config.h"
  77 #include "emit-rtl.h"
  78 #include "recog.h"
  79 #include "cgraph.h"
  80 #include "gimple-pretty-print.h"
  81 #include "alias.h"
  82 #include "fold-const.h"
  83 #include "stor-layout.h"
  84 #include "tree-eh.h"
  85 #include "gimplify.h"
  86 #include "gimple-iterator.h"
  87 #include "gimplify-me.h"
  88 #include "tree-cfg.h"
  89 #include "tree-ssa-loop-ivopts.h"
  90 #include "tree-ssa-loop-manip.h"
  91 #include "tree-ssa-loop-niter.h"
  92 #include "tree-ssa-loop.h"
  93 #include "explow.h"
  94 #include "expr.h"
  95 #include "tree-dfa.h"
  96 #include "tree-ssa.h"
  97 #include "cfgloop.h"
  98 #include "tree-scalar-evolution.h"
  99 #include "params.h"
 100 #include "tree-affine.h"
 101 #include "tree-ssa-propagate.h"
 102 #include "tree-ssa-address.h"
 103 #include "builtins.h"
 104 #include "tree-vectorizer.h"
 105
 106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 107    cost of different addressing modes.  This should be moved to a TBD
 108    interface between the GIMPLE and RTL worlds.  */
 109
 110 /* The infinite cost.  */
 111 #define INFTY 10000000
 112
 113 #define AVG_LOOP_NITER(LOOP) 5
 114
 115 /* Returns the expected number of loop iterations for LOOP.
 116    The average trip count is computed from profile data if it
 117    exists. */
 118
 119 static inline HOST_WIDE_INT
 120 avg_loop_niter (struct loop *loop)
 121 {
 122   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 123   if (niter == -1)
 124     return AVG_LOOP_NITER (loop);
 125
 126   return niter;
 127 }
 128
 129 /* Representation of the induction variable.  */
 130 struct iv
 131 {
 132   tree base;            /* Initial value of the iv.  */
 133   tree base_object;     /* A memory object to that the induction variable points.  */
 134   tree step;            /* Step of the iv (constant only).  */
 135   tree ssa_name;        /* The ssa name with the value.  */
 136   unsigned use_id;      /* The identifier in the use if it is the case.  */
 137   bool biv_p;           /* Is it a biv?  */
 138   bool have_use_for;    /* Do we already have a use for it?  */
 139   bool no_overflow;     /* True if the iv doesn't overflow.  */
 140   bool have_address_use;/* For biv, indicate if it's used in any address
 141                            type use.  */
 142 };
 143
 144 /* Per-ssa version information (induction variable descriptions, etc.).  */
 145 struct version_info
 146 {
 147   tree name;            /* The ssa name.  */
 148   struct iv *iv;        /* Induction variable description.  */
 149   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 150                            an expression that is not an induction variable.  */
 151   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 152   unsigned inv_id;      /* Id of an invariant.  */
 153 };
 154
 155 /* Types of uses.  */
 156 enum use_type
 157 {
 158   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 159   USE_ADDRESS,          /* Use in an address.  */
 160   USE_COMPARE           /* Use is a compare.  */
 161 };
 162
 163 /* Cost of a computation.  */
 164 struct comp_cost
 165 {
 166   int cost;             /* The runtime cost.  */
 167   unsigned complexity;  /* The estimate of the complexity of the code for
 168                            the computation (in no concrete units --
 169                            complexity field should be larger for more
 170                            complex expressions and addressing modes).  */
 171 };
 172
 173 static const comp_cost no_cost = {0, 0};
 174 static const comp_cost infinite_cost = {INFTY, INFTY};
 175
 176 /* The candidate - cost pair.  */
 177 struct cost_pair
 178 {
 179   struct iv_cand *cand; /* The candidate.  */
 180   comp_cost cost;       /* The cost.  */
 181   bitmap depends_on;    /* The list of invariants that have to be
 182                            preserved.  */
 183   tree value;           /* For final value elimination, the expression for
 184                            the final value of the iv.  For iv elimination,
 185                            the new bound to compare with.  */
 186   enum tree_code comp;  /* For iv elimination, the comparison.  */
 187   int inv_expr_id;      /* Loop invariant expression id.  */
 188 };
 189
 190 /* Use.  */
 191 struct iv_use
 192 {
 193   unsigned id;          /* The id of the use.  */
 194   unsigned sub_id;      /* The id of the sub use.  */
 195   enum use_type type;   /* Type of the use.  */
 196   struct iv *iv;        /* The induction variable it is based on.  */
 197   gimple *stmt;         /* Statement in that it occurs.  */
 198   tree *op_p;           /* The place where it occurs.  */
 199   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 200                            important ones.  */
 201
 202   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 203   struct cost_pair *cost_map;
 204                         /* The costs wrto the iv candidates.  */
 205
 206   struct iv_cand *selected;
 207                         /* The selected candidate.  */
 208
 209   struct iv_use *next;  /* The next sub use.  */
 210   tree addr_base;       /* Base address with const offset stripped.  */
 211   unsigned HOST_WIDE_INT addr_offset;
 212                         /* Const offset stripped from base address.  */
 213 };
 214
 215 /* The position where the iv is computed.  */
 216 enum iv_position
 217 {
 218   IP_NORMAL,            /* At the end, just before the exit condition.  */
 219   IP_END,               /* At the end of the latch block.  */
 220   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 221   IP_AFTER_USE,         /* Immediately after a specific use.  */
 222   IP_ORIGINAL           /* The original biv.  */
 223 };
 224
 225 /* The induction variable candidate.  */
 226 struct iv_cand
 227 {
 228   unsigned id;          /* The number of the candidate.  */
 229   bool important;       /* Whether this is an "important" candidate, i.e. such
 230                            that it should be considered by all uses.  */
 231   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 232   gimple *incremented_at;/* For original biv, the statement where it is
 233                            incremented.  */
 234   tree var_before;      /* The variable used for it before increment.  */
 235   tree var_after;       /* The variable used for it after increment.  */
 236   struct iv *iv;        /* The value of the candidate.  NULL for
 237                            "pseudocandidate" used to indicate the possibility
 238                            to replace the final value of an iv by direct
 239                            computation of the value.  */
 240   unsigned cost;        /* Cost of the candidate.  */
 241   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 242   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 243                               where it is incremented.  */
 244   bitmap depends_on;    /* The list of invariants that are used in step of the
 245                            biv.  */
 246   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 247                            smaller type.  */
 248 };
 249
 250 /* Hashtable entry for common candidate derived from iv uses.  */
 251 struct iv_common_cand
 252 {
 253   tree base;
 254   tree step;
 255   /* IV uses from which this common candidate is derived.  */
 256   auto_vec<iv_use *> uses;
 257   hashval_t hash;
 258 };
 259
 260 /* Hashtable helpers.  */
 261
 262 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 263 {
 264   static inline hashval_t hash (const iv_common_cand *);
 265   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 266 };
 267
 268 /* Hash function for possible common candidates.  */
 269
 270 inline hashval_t
 271 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 272 {
 273   return ccand->hash;
 274 }
 275
 276 /* Hash table equality function for common candidates.  */
 277
 278 inline bool
 279 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 280                               const iv_common_cand *ccand2)
 281 {
 282   return (ccand1->hash == ccand2->hash
 283           && operand_equal_p (ccand1->base, ccand2->base, 0)
 284           && operand_equal_p (ccand1->step, ccand2->step, 0)
 285           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 286               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 287 }
 288
 289 /* Loop invariant expression hashtable entry.  */
 290 struct iv_inv_expr_ent
 291 {
 292   tree expr;
 293   int id;
 294   hashval_t hash;
 295 };
 296
 297 /* Hashtable helpers.  */
 298
 299 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 300 {
 301   static inline hashval_t hash (const iv_inv_expr_ent *);
 302   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 303 };
 304
 305 /* Hash function for loop invariant expressions.  */
 306
 307 inline hashval_t
 308 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 309 {
 310   return expr->hash;
 311 }
 312
 313 /* Hash table equality function for expressions.  */
 314
 315 inline bool
 316 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 317                            const iv_inv_expr_ent *expr2)
 318 {
 319   return expr1->hash == expr2->hash
 320          && operand_equal_p (expr1->expr, expr2->expr, 0);
 321 }
 322
 323 struct ivopts_data
 324 {
 325   /* The currently optimized loop.  */
 326   struct loop *current_loop;
 327   source_location loop_loc;
 328
 329   /* Numbers of iterations for all exits of the current loop.  */
 330   hash_map<edge, tree_niter_desc *> *niters;
 331
 332   /* Number of registers used in it.  */
 333   unsigned regs_used;
 334
 335   /* The size of version_info array allocated.  */
 336   unsigned version_info_size;
 337
 338   /* The array of information for the ssa names.  */
 339   struct version_info *version_info;
 340
 341   /* The hashtable of loop invariant expressions created
 342      by ivopt.  */
 343   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 344
 345   /* Loop invariant expression id.  */
 346   int inv_expr_id;
 347
 348   /* The bitmap of indices in version_info whose value was changed.  */
 349   bitmap relevant;
 350
 351   /* The uses of induction variables.  */
 352   vec<iv_use *> iv_uses;
 353
 354   /* The candidates.  */
 355   vec<iv_cand *> iv_candidates;
 356
 357   /* A bitmap of important candidates.  */
 358   bitmap important_candidates;
 359
 360   /* Cache used by tree_to_aff_combination_expand.  */
 361   hash_map<tree, name_expansion *> *name_expansion_cache;
 362
 363   /* The hashtable of common candidates derived from iv uses.  */
 364   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 365
 366   /* The common candidates.  */
 367   vec<iv_common_cand *> iv_common_cands;
 368
 369   /* The maximum invariant id.  */
 370   unsigned max_inv_id;
 371
 372   /* Number of no_overflow BIVs which are not used in memory address.  */
 373   unsigned bivs_not_used_in_addr;
 374
 375   /* Obstack for iv structure.  */
 376   struct obstack iv_obstack;
 377
 378   /* Whether to consider just related and important candidates when replacing a
 379      use.  */
 380   bool consider_all_candidates;
 381
 382   /* Are we optimizing for speed?  */
 383   bool speed;
 384
 385   /* Whether the loop body includes any function calls.  */
 386   bool body_includes_call;
 387
 388   /* Whether the loop body can only be exited via single exit.  */
 389   bool loop_single_exit_p;
 390 };
 391
 392 /* An assignment of iv candidates to uses.  */
 393
 394 struct iv_ca
 395 {
 396   /* The number of uses covered by the assignment.  */
 397   unsigned upto;
 398
 399   /* Number of uses that cannot be expressed by the candidates in the set.  */
 400   unsigned bad_uses;
 401
 402   /* Candidate assigned to a use, together with the related costs.  */
 403   struct cost_pair **cand_for_use;
 404
 405   /* Number of times each candidate is used.  */
 406   unsigned *n_cand_uses;
 407
 408   /* The candidates used.  */
 409   bitmap cands;
 410
 411   /* The number of candidates in the set.  */
 412   unsigned n_cands;
 413
 414   /* Total number of registers needed.  */
 415   unsigned n_regs;
 416
 417   /* Total cost of expressing uses.  */
 418   comp_cost cand_use_cost;
 419
 420   /* Total cost of candidates.  */
 421   unsigned cand_cost;
 422
 423   /* Number of times each invariant is used.  */
 424   unsigned *n_invariant_uses;
 425
 426   /* The array holding the number of uses of each loop
 427      invariant expressions created by ivopt.  */
 428   unsigned *used_inv_expr;
 429
 430   /* The number of created loop invariants.  */
 431   unsigned num_used_inv_expr;
 432
 433   /* Total cost of the assignment.  */
 434   comp_cost cost;
 435 };
 436
 437 /* Difference of two iv candidate assignments.  */
 438
 439 struct iv_ca_delta
 440 {
 441   /* Changed use.  */
 442   struct iv_use *use;
 443
 444   /* An old assignment (for rollback purposes).  */
 445   struct cost_pair *old_cp;
 446
 447   /* A new assignment.  */
 448   struct cost_pair *new_cp;
 449
 450   /* Next change in the list.  */
 451   struct iv_ca_delta *next_change;
 452 };
 453
 454 /* Bound on number of candidates below that all candidates are considered.  */
 455
 456 #define CONSIDER_ALL_CANDIDATES_BOUND \
 457   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 458
 459 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 460    optimizing such a loop would help, and it would take ages).  */
 461
 462 #define MAX_CONSIDERED_USES \
 463   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 464
 465 /* If there are at most this number of ivs in the set, try removing unnecessary
 466    ivs from the set always.  */
 467
 468 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 469   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 470
 471 /* The list of trees for that the decl_rtl field must be reset is stored
 472    here.  */
 473
 474 static vec<tree> decl_rtl_to_reset;
 475
 476 static comp_cost force_expr_to_var_cost (tree, bool);
 477
 478 /* Number of uses recorded in DATA.  */
 479
 480 static inline unsigned
 481 n_iv_uses (struct ivopts_data *data)
 482 {
 483   return data->iv_uses.length ();
 484 }
 485
 486 /* Ith use recorded in DATA.  */
 487
 488 static inline struct iv_use *
 489 iv_use (struct ivopts_data *data, unsigned i)
 490 {
 491   return data->iv_uses[i];
 492 }
 493
 494 /* Number of candidates recorded in DATA.  */
 495
 496 static inline unsigned
 497 n_iv_cands (struct ivopts_data *data)
 498 {
 499   return data->iv_candidates.length ();
 500 }
 501
 502 /* Ith candidate recorded in DATA.  */
 503
 504 static inline struct iv_cand *
 505 iv_cand (struct ivopts_data *data, unsigned i)
 506 {
 507   return data->iv_candidates[i];
 508 }
 509
 510 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 511
 512 edge
 513 single_dom_exit (struct loop *loop)
 514 {
 515   edge exit = single_exit (loop);
 516
 517   if (!exit)
 518     return NULL;
 519
 520   if (!just_once_each_iteration_p (loop, exit->src))
 521     return NULL;
 522
 523   return exit;
 524 }
 525
 526 /* Dumps information about the induction variable IV to FILE.  */
 527
 528 void
 529 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 530 {
 531   if (iv->ssa_name && dump_name)
 532     {
 533       fprintf (file, "ssa name ");
 534       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 535       fprintf (file, "\n");
 536     }
 537
 538   fprintf (file, "  type ");
 539   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 540   fprintf (file, "\n");
 541
 542   if (iv->step)
 543     {
 544       fprintf (file, "  base ");
 545       print_generic_expr (file, iv->base, TDF_SLIM);
 546       fprintf (file, "\n");
 547
 548       fprintf (file, "  step ");
 549       print_generic_expr (file, iv->step, TDF_SLIM);
 550       fprintf (file, "\n");
 551     }
 552   else
 553     {
 554       fprintf (file, "  invariant ");
 555       print_generic_expr (file, iv->base, TDF_SLIM);
 556       fprintf (file, "\n");
 557     }
 558
 559   if (iv->base_object)
 560     {
 561       fprintf (file, "  base object ");
 562       print_generic_expr (file, iv->base_object, TDF_SLIM);
 563       fprintf (file, "\n");
 564     }
 565
 566   if (iv->biv_p)
 567     fprintf (file, "  is a biv\n");
 568
 569   if (iv->no_overflow)
 570     fprintf (file, "  iv doesn't overflow wrto loop niter\n");
 571 }
 572
 573 /* Dumps information about the USE to FILE.  */
 574
 575 void
 576 dump_use (FILE *file, struct iv_use *use)
 577 {
 578   fprintf (file, "use %d", use->id);
 579   if (use->sub_id)
 580     fprintf (file, ".%d", use->sub_id);
 581
 582   fprintf (file, "\n");
 583
 584   switch (use->type)
 585     {
 586     case USE_NONLINEAR_EXPR:
 587       fprintf (file, "  generic\n");
 588       break;
 589
 590     case USE_ADDRESS:
 591       fprintf (file, "  address\n");
 592       break;
 593
 594     case USE_COMPARE:
 595       fprintf (file, "  compare\n");
 596       break;
 597
 598     default:
 599       gcc_unreachable ();
 600     }
 601
 602   fprintf (file, "  in statement ");
 603   print_gimple_stmt (file, use->stmt, 0, 0);
 604   fprintf (file, "\n");
 605
 606   fprintf (file, "  at position ");
 607   if (use->op_p)
 608     print_generic_expr (file, *use->op_p, TDF_SLIM);
 609   fprintf (file, "\n");
 610
 611   dump_iv (file, use->iv, false);
 612
 613   if (use->related_cands)
 614     {
 615       fprintf (file, "  related candidates ");
 616       dump_bitmap (file, use->related_cands);
 617     }
 618 }
 619
 620 /* Dumps information about the uses to FILE.  */
 621
 622 void
 623 dump_uses (FILE *file, struct ivopts_data *data)
 624 {
 625   unsigned i;
 626   struct iv_use *use;
 627
 628   for (i = 0; i < n_iv_uses (data); i++)
 629     {
 630       use = iv_use (data, i);
 631       do
 632         {
 633           dump_use (file, use);
 634           use = use->next;
 635         }
 636       while (use);
 637       fprintf (file, "\n");
 638     }
 639 }
 640
 641 /* Dumps information about induction variable candidate CAND to FILE.  */
 642
 643 void
 644 dump_cand (FILE *file, struct iv_cand *cand)
 645 {
 646   struct iv *iv = cand->iv;
 647
 648   fprintf (file, "candidate %d%s\n",
 649            cand->id, cand->important ? " (important)" : "");
 650
 651   if (cand->depends_on)
 652     {
 653       fprintf (file, "  depends on ");
 654       dump_bitmap (file, cand->depends_on);
 655     }
 656
 657   if (!iv)
 658     {
 659       fprintf (file, "  final value replacement\n");
 660       return;
 661     }
 662
 663   if (cand->var_before)
 664     {
 665       fprintf (file, "  var_before ");
 666       print_generic_expr (file, cand->var_before, TDF_SLIM);
 667       fprintf (file, "\n");
 668     }
 669   if (cand->var_after)
 670     {
 671       fprintf (file, "  var_after ");
 672       print_generic_expr (file, cand->var_after, TDF_SLIM);
 673       fprintf (file, "\n");
 674     }
 675
 676   switch (cand->pos)
 677     {
 678     case IP_NORMAL:
 679       fprintf (file, "  incremented before exit test\n");
 680       break;
 681
 682     case IP_BEFORE_USE:
 683       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 684       break;
 685
 686     case IP_AFTER_USE:
 687       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 688       break;
 689
 690     case IP_END:
 691       fprintf (file, "  incremented at end\n");
 692       break;
 693
 694     case IP_ORIGINAL:
 695       fprintf (file, "  original biv\n");
 696       break;
 697     }
 698
 699   dump_iv (file, iv, false);
 700 }
 701
 702 /* Returns the info for ssa version VER.  */
 703
 704 static inline struct version_info *
 705 ver_info (struct ivopts_data *data, unsigned ver)
 706 {
 707   return data->version_info + ver;
 708 }
 709
 710 /* Returns the info for ssa name NAME.  */
 711
 712 static inline struct version_info *
 713 name_info (struct ivopts_data *data, tree name)
 714 {
 715   return ver_info (data, SSA_NAME_VERSION (name));
 716 }
 717
 718 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 719    emitted in LOOP.  */
 720
 721 static bool
 722 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 723 {
 724   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 725
 726   gcc_assert (bb);
 727
 728   if (sbb == loop->latch)
 729     return true;
 730
 731   if (sbb != bb)
 732     return false;
 733
 734   return stmt == last_stmt (bb);
 735 }
 736
 737 /* Returns true if STMT if after the place where the original induction
 738    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 739    if the positions are identical.  */
 740
 741 static bool
 742 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 743 {
 744   basic_block cand_bb = gimple_bb (cand->incremented_at);
 745   basic_block stmt_bb = gimple_bb (stmt);
 746
 747   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 748     return false;
 749
 750   if (stmt_bb != cand_bb)
 751     return true;
 752
 753   if (true_if_equal
 754       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 755     return true;
 756   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 757 }
 758
 759 /* Returns true if STMT if after the place where the induction variable
 760    CAND is incremented in LOOP.  */
 761
 762 static bool
 763 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 764 {
 765   switch (cand->pos)
 766     {
 767     case IP_END:
 768       return false;
 769
 770     case IP_NORMAL:
 771       return stmt_after_ip_normal_pos (loop, stmt);
 772
 773     case IP_ORIGINAL:
 774     case IP_AFTER_USE:
 775       return stmt_after_inc_pos (cand, stmt, false);
 776
 777     case IP_BEFORE_USE:
 778       return stmt_after_inc_pos (cand, stmt, true);
 779
 780     default:
 781       gcc_unreachable ();
 782     }
 783 }
 784
 785 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 786
 787 static bool
 788 abnormal_ssa_name_p (tree exp)
 789 {
 790   if (!exp)
 791     return false;
 792
 793   if (TREE_CODE (exp) != SSA_NAME)
 794     return false;
 795
 796   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 797 }
 798
 799 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 800    abnormal phi node.  Callback for for_each_index.  */
 801
 802 static bool
 803 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 804                                   void *data ATTRIBUTE_UNUSED)
 805 {
 806   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 807     {
 808       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 809         return false;
 810       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 811         return false;
 812     }
 813
 814   return !abnormal_ssa_name_p (*index);
 815 }
 816
 817 /* Returns true if EXPR contains a ssa name that occurs in an
 818    abnormal phi node.  */
 819
 820 bool
 821 contains_abnormal_ssa_name_p (tree expr)
 822 {
 823   enum tree_code code;
 824   enum tree_code_class codeclass;
 825
 826   if (!expr)
 827     return false;
 828
 829   code = TREE_CODE (expr);
 830   codeclass = TREE_CODE_CLASS (code);
 831
 832   if (code == SSA_NAME)
 833     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 834
 835   if (code == INTEGER_CST
 836       || is_gimple_min_invariant (expr))
 837     return false;
 838
 839   if (code == ADDR_EXPR)
 840     return !for_each_index (&TREE_OPERAND (expr, 0),
 841                             idx_contains_abnormal_ssa_name_p,
 842                             NULL);
 843
 844   if (code == COND_EXPR)
 845     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 846       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 847       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 848
 849   switch (codeclass)
 850     {
 851     case tcc_binary:
 852     case tcc_comparison:
 853       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 854         return true;
 855
 856       /* Fallthru.  */
 857     case tcc_unary:
 858       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 859         return true;
 860
 861       break;
 862
 863     default:
 864       gcc_unreachable ();
 865     }
 866
 867   return false;
 868 }
 869
 870 /*  Returns the structure describing number of iterations determined from
 871     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 872
 873 static struct tree_niter_desc *
 874 niter_for_exit (struct ivopts_data *data, edge exit)
 875 {
 876   struct tree_niter_desc *desc;
 877   tree_niter_desc **slot;
 878
 879   if (!data->niters)
 880     {
 881       data->niters = new hash_map<edge, tree_niter_desc *>;
 882       slot = NULL;
 883     }
 884   else
 885     slot = data->niters->get (exit);
 886
 887   if (!slot)
 888     {
 889       /* Try to determine number of iterations.  We cannot safely work with ssa
 890          names that appear in phi nodes on abnormal edges, so that we do not
 891          create overlapping life ranges for them (PR 27283).  */
 892       desc = XNEW (struct tree_niter_desc);
 893       if (!number_of_iterations_exit (data->current_loop,
 894                                       exit, desc, true)
 895           || contains_abnormal_ssa_name_p (desc->niter))
 896         {
 897           XDELETE (desc);
 898           desc = NULL;
 899         }
 900       data->niters->put (exit, desc);
 901     }
 902   else
 903     desc = *slot;
 904
 905   return desc;
 906 }
 907
 908 /* Returns the structure describing number of iterations determined from
 909    single dominating exit of DATA->current_loop, or NULL if something
 910    goes wrong.  */
 911
 912 static struct tree_niter_desc *
 913 niter_for_single_dom_exit (struct ivopts_data *data)
 914 {
 915   edge exit = single_dom_exit (data->current_loop);
 916
 917   if (!exit)
 918     return NULL;
 919
 920   return niter_for_exit (data, exit);
 921 }
 922
 923 /* Initializes data structures used by the iv optimization pass, stored
 924    in DATA.  */
 925
 926 static void
 927 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 928 {
 929   data->version_info_size = 2 * num_ssa_names;
 930   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 931   data->relevant = BITMAP_ALLOC (NULL);
 932   data->important_candidates = BITMAP_ALLOC (NULL);
 933   data->max_inv_id = 0;
 934   data->niters = NULL;
 935   data->iv_uses.create (20);
 936   data->iv_candidates.create (20);
 937   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 938   data->inv_expr_id = 0;
 939   data->name_expansion_cache = NULL;
 940   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
 941   data->iv_common_cands.create (20);
 942   decl_rtl_to_reset.create (20);
 943   gcc_obstack_init (&data->iv_obstack);
 944 }
 945
 946 /* Returns a memory object to that EXPR points.  In case we are able to
 947    determine that it does not point to any such object, NULL is returned.  */
 948
 949 static tree
 950 determine_base_object (tree expr)
 951 {
 952   enum tree_code code = TREE_CODE (expr);
 953   tree base, obj;
 954
 955   /* If this is a pointer casted to any type, we need to determine
 956      the base object for the pointer; so handle conversions before
 957      throwing away non-pointer expressions.  */
 958   if (CONVERT_EXPR_P (expr))
 959     return determine_base_object (TREE_OPERAND (expr, 0));
 960
 961   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 962     return NULL_TREE;
 963
 964   switch (code)
 965     {
 966     case INTEGER_CST:
 967       return NULL_TREE;
 968
 969     case ADDR_EXPR:
 970       obj = TREE_OPERAND (expr, 0);
 971       base = get_base_address (obj);
 972
 973       if (!base)
 974         return expr;
 975
 976       if (TREE_CODE (base) == MEM_REF)
 977         return determine_base_object (TREE_OPERAND (base, 0));
 978
 979       return fold_convert (ptr_type_node,
 980                            build_fold_addr_expr (base));
 981
 982     case POINTER_PLUS_EXPR:
 983       return determine_base_object (TREE_OPERAND (expr, 0));
 984
 985     case PLUS_EXPR:
 986     case MINUS_EXPR:
 987       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 988       gcc_unreachable ();
 989
 990     default:
 991       return fold_convert (ptr_type_node, expr);
 992     }
 993 }
 994
 995 /* Return true if address expression with non-DECL_P operand appears
 996    in EXPR.  */
 997
 998 static bool
 999 contain_complex_addr_expr (tree expr)
1000 {
1001   bool res = false;
1002
1003   STRIP_NOPS (expr);
1004   switch (TREE_CODE (expr))
1005     {
1006     case POINTER_PLUS_EXPR:
1007     case PLUS_EXPR:
1008     case MINUS_EXPR:
1009       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1010       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1011       break;
1012
1013     case ADDR_EXPR:
1014       return (!DECL_P (TREE_OPERAND (expr, 0)));
1015
1016     default:
1017       return false;
1018     }
1019
1020   return res;
1021 }
1022
1023 /* Allocates an induction variable with given initial value BASE and step STEP
1024    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1025
1026 static struct iv *
1027 alloc_iv (struct ivopts_data *data, tree base, tree step,
1028           bool no_overflow = false)
1029 {
1030   tree expr = base;
1031   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1032                                               sizeof (struct iv));
1033   gcc_assert (step != NULL_TREE);
1034
1035   /* Lower address expression in base except ones with DECL_P as operand.
1036      By doing this:
1037        1) More accurate cost can be computed for address expressions;
1038        2) Duplicate candidates won't be created for bases in different
1039           forms, like &a[0] and &a.  */
1040   STRIP_NOPS (expr);
1041   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1042       || contain_complex_addr_expr (expr))
1043     {
1044       aff_tree comb;
1045       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1046       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1047     }
1048
1049   iv->base = base;
1050   iv->base_object = determine_base_object (base);
1051   iv->step = step;
1052   iv->biv_p = false;
1053   iv->have_use_for = false;
1054   iv->use_id = 0;
1055   iv->ssa_name = NULL_TREE;
1056   iv->no_overflow = no_overflow;
1057   iv->have_address_use = false;
1058
1059   return iv;
1060 }
1061
1062 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1063    doesn't overflow.  */
1064
1065 static void
1066 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1067         bool no_overflow)
1068 {
1069   struct version_info *info = name_info (data, iv);
1070
1071   gcc_assert (!info->iv);
1072
1073   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1074   info->iv = alloc_iv (data, base, step, no_overflow);
1075   info->iv->ssa_name = iv;
1076 }
1077
1078 /* Finds induction variable declaration for VAR.  */
1079
1080 static struct iv *
1081 get_iv (struct ivopts_data *data, tree var)
1082 {
1083   basic_block bb;
1084   tree type = TREE_TYPE (var);
1085
1086   if (!POINTER_TYPE_P (type)
1087       && !INTEGRAL_TYPE_P (type))
1088     return NULL;
1089
1090   if (!name_info (data, var)->iv)
1091     {
1092       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1093
1094       if (!bb
1095           || !flow_bb_inside_loop_p (data->current_loop, bb))
1096         set_iv (data, var, var, build_int_cst (type, 0), true);
1097     }
1098
1099   return name_info (data, var)->iv;
1100 }
1101
1102 /* Return the first non-invariant ssa var found in EXPR.  */
1103
1104 static tree
1105 extract_single_var_from_expr (tree expr)
1106 {
1107   int i, n;
1108   tree tmp;
1109   enum tree_code code;
1110
1111   if (!expr || is_gimple_min_invariant (expr))
1112     return NULL;
1113
1114   code = TREE_CODE (expr);
1115   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1116     {
1117       n = TREE_OPERAND_LENGTH (expr);
1118       for (i = 0; i < n; i++)
1119         {
1120           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1121
1122           if (tmp)
1123             return tmp;
1124         }
1125     }
1126   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1127 }
1128
1129 /* Finds basic ivs.  */
1130
1131 static bool
1132 find_bivs (struct ivopts_data *data)
1133 {
1134   gphi *phi;
1135   affine_iv iv;
1136   tree step, type, base, stop;
1137   bool found = false;
1138   struct loop *loop = data->current_loop;
1139   gphi_iterator psi;
1140
1141   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1142     {
1143       phi = psi.phi ();
1144
1145       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1146         continue;
1147
1148       if (virtual_operand_p (PHI_RESULT (phi)))
1149         continue;
1150
1151       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1152         continue;
1153
1154       if (integer_zerop (iv.step))
1155         continue;
1156
1157       step = iv.step;
1158       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1159       /* Stop expanding iv base at the first ssa var referred by iv step.
1160          Ideally we should stop at any ssa var, because that's expensive
1161          and unusual to happen, we just do it on the first one.
1162
1163          See PR64705 for the rationale.  */
1164       stop = extract_single_var_from_expr (step);
1165       base = expand_simple_operations (base, stop);
1166       if (contains_abnormal_ssa_name_p (base)
1167           || contains_abnormal_ssa_name_p (step))
1168         continue;
1169
1170       type = TREE_TYPE (PHI_RESULT (phi));
1171       base = fold_convert (type, base);
1172       if (step)
1173         {
1174           if (POINTER_TYPE_P (type))
1175             step = convert_to_ptrofftype (step);
1176           else
1177             step = fold_convert (type, step);
1178         }
1179
1180       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1181       found = true;
1182     }
1183
1184   return found;
1185 }
1186
1187 /* Marks basic ivs.  */
1188
1189 static void
1190 mark_bivs (struct ivopts_data *data)
1191 {
1192   gphi *phi;
1193   gimple *def;
1194   tree var;
1195   struct iv *iv, *incr_iv;
1196   struct loop *loop = data->current_loop;
1197   basic_block incr_bb;
1198   gphi_iterator psi;
1199
1200   data->bivs_not_used_in_addr = 0;
1201   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1202     {
1203       phi = psi.phi ();
1204
1205       iv = get_iv (data, PHI_RESULT (phi));
1206       if (!iv)
1207         continue;
1208
1209       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1210       def = SSA_NAME_DEF_STMT (var);
1211       /* Don't mark iv peeled from other one as biv.  */
1212       if (def
1213           && gimple_code (def) == GIMPLE_PHI
1214           && gimple_bb (def) == loop->header)
1215         continue;
1216
1217       incr_iv = get_iv (data, var);
1218       if (!incr_iv)
1219         continue;
1220
1221       /* If the increment is in the subloop, ignore it.  */
1222       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1223       if (incr_bb->loop_father != data->current_loop
1224           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1225         continue;
1226
1227       iv->biv_p = true;
1228       incr_iv->biv_p = true;
1229       if (iv->no_overflow)
1230         data->bivs_not_used_in_addr++;
1231       if (incr_iv->no_overflow)
1232         data->bivs_not_used_in_addr++;
1233     }
1234 }
1235
1236 /* Checks whether STMT defines a linear induction variable and stores its
1237    parameters to IV.  */
1238
1239 static bool
1240 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1241 {
1242   tree lhs, stop;
1243   struct loop *loop = data->current_loop;
1244
1245   iv->base = NULL_TREE;
1246   iv->step = NULL_TREE;
1247
1248   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1249     return false;
1250
1251   lhs = gimple_assign_lhs (stmt);
1252   if (TREE_CODE (lhs) != SSA_NAME)
1253     return false;
1254
1255   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1256     return false;
1257
1258   /* Stop expanding iv base at the first ssa var referred by iv step.
1259      Ideally we should stop at any ssa var, because that's expensive
1260      and unusual to happen, we just do it on the first one.
1261
1262      See PR64705 for the rationale.  */
1263   stop = extract_single_var_from_expr (iv->step);
1264   iv->base = expand_simple_operations (iv->base, stop);
1265   if (contains_abnormal_ssa_name_p (iv->base)
1266       || contains_abnormal_ssa_name_p (iv->step))
1267     return false;
1268
1269   /* If STMT could throw, then do not consider STMT as defining a GIV.
1270      While this will suppress optimizations, we can not safely delete this
1271      GIV and associated statements, even if it appears it is not used.  */
1272   if (stmt_could_throw_p (stmt))
1273     return false;
1274
1275   return true;
1276 }
1277
1278 /* Finds general ivs in statement STMT.  */
1279
1280 static void
1281 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1282 {
1283   affine_iv iv;
1284
1285   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1286     return;
1287
1288   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1289 }
1290
1291 /* Finds general ivs in basic block BB.  */
1292
1293 static void
1294 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1295 {
1296   gimple_stmt_iterator bsi;
1297
1298   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1299     find_givs_in_stmt (data, gsi_stmt (bsi));
1300 }
1301
1302 /* Finds general ivs.  */
1303
1304 static void
1305 find_givs (struct ivopts_data *data)
1306 {
1307   struct loop *loop = data->current_loop;
1308   basic_block *body = get_loop_body_in_dom_order (loop);
1309   unsigned i;
1310
1311   for (i = 0; i < loop->num_nodes; i++)
1312     find_givs_in_bb (data, body[i]);
1313   free (body);
1314 }
1315
1316 /* For each ssa name defined in LOOP determines whether it is an induction
1317    variable and if so, its initial value and step.  */
1318
1319 static bool
1320 find_induction_variables (struct ivopts_data *data)
1321 {
1322   unsigned i;
1323   bitmap_iterator bi;
1324
1325   if (!find_bivs (data))
1326     return false;
1327
1328   find_givs (data);
1329   mark_bivs (data);
1330
1331   if (dump_file && (dump_flags & TDF_DETAILS))
1332     {
1333       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1334
1335       if (niter)
1336         {
1337           fprintf (dump_file, "  number of iterations ");
1338           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1339           if (!integer_zerop (niter->may_be_zero))
1340             {
1341               fprintf (dump_file, "; zero if ");
1342               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1343             }
1344           fprintf (dump_file, "\n\n");
1345         };
1346
1347       fprintf (dump_file, "Induction variables:\n\n");
1348
1349       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1350         {
1351           if (ver_info (data, i)->iv)
1352             dump_iv (dump_file, ver_info (data, i)->iv, true);
1353         }
1354     }
1355
1356   return true;
1357 }
1358
1359 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1361    is the const offset stripped from IV base.  For uses of other types,
1362    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1363
1364 static struct iv_use *
1365 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1366             gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1367             unsigned HOST_WIDE_INT addr_offset = 0)
1368 {
1369   struct iv_use *use = XCNEW (struct iv_use);
1370
1371   use->id = n_iv_uses (data);
1372   use->sub_id = 0;
1373   use->type = use_type;
1374   use->iv = iv;
1375   use->stmt = stmt;
1376   use->op_p = use_p;
1377   use->related_cands = BITMAP_ALLOC (NULL);
1378   use->next = NULL;
1379   use->addr_base = addr_base;
1380   use->addr_offset = addr_offset;
1381
1382   data->iv_uses.safe_push (use);
1383
1384   return use;
1385 }
1386
1387 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1388    The sub use is recorded under the one whose use id is ID_GROUP.  */
1389
1390 static struct iv_use *
1391 record_sub_use (struct ivopts_data *data, tree *use_p,
1392                     struct iv *iv, gimple *stmt, enum use_type use_type,
1393                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1394                     unsigned int id_group)
1395 {
1396   struct iv_use *use = XCNEW (struct iv_use);
1397   struct iv_use *group = iv_use (data, id_group);
1398
1399   use->id = group->id;
1400   use->sub_id = 0;
1401   use->type = use_type;
1402   use->iv = iv;
1403   use->stmt = stmt;
1404   use->op_p = use_p;
1405   use->related_cands = NULL;
1406   use->addr_base = addr_base;
1407   use->addr_offset = addr_offset;
1408
1409   /* Sub use list is maintained in offset ascending order.  */
1410   if (addr_offset <= group->addr_offset)
1411     {
1412       use->related_cands = group->related_cands;
1413       group->related_cands = NULL;
1414       use->next = group;
1415       data->iv_uses[id_group] = use;
1416     }
1417   else
1418     {
1419       struct iv_use *pre;
1420       do
1421         {
1422           pre = group;
1423           group = group->next;
1424         }
1425       while (group && addr_offset > group->addr_offset);
1426       use->next = pre->next;
1427       pre->next = use;
1428     }
1429
1430   return use;
1431 }
1432
1433 /* Checks whether OP is a loop-level invariant and if so, records it.
1434    NONLINEAR_USE is true if the invariant is used in a way we do not
1435    handle specially.  */
1436
1437 static void
1438 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1439 {
1440   basic_block bb;
1441   struct version_info *info;
1442
1443   if (TREE_CODE (op) != SSA_NAME
1444       || virtual_operand_p (op))
1445     return;
1446
1447   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1448   if (bb
1449       && flow_bb_inside_loop_p (data->current_loop, bb))
1450     return;
1451
1452   info = name_info (data, op);
1453   info->name = op;
1454   info->has_nonlin_use |= nonlinear_use;
1455   if (!info->inv_id)
1456     info->inv_id = ++data->max_inv_id;
1457   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1458 }
1459
1460 /* Checks whether the use OP is interesting and if so, records it.  */
1461
1462 static struct iv_use *
1463 find_interesting_uses_op (struct ivopts_data *data, tree op)
1464 {
1465   struct iv *iv;
1466   gimple *stmt;
1467   struct iv_use *use;
1468
1469   if (TREE_CODE (op) != SSA_NAME)
1470     return NULL;
1471
1472   iv = get_iv (data, op);
1473   if (!iv)
1474     return NULL;
1475
1476   if (iv->have_use_for)
1477     {
1478       use = iv_use (data, iv->use_id);
1479
1480       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1481       return use;
1482     }
1483
1484   if (integer_zerop (iv->step))
1485     {
1486       record_invariant (data, op, true);
1487       return NULL;
1488     }
1489   iv->have_use_for = true;
1490
1491   stmt = SSA_NAME_DEF_STMT (op);
1492   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1493               || is_gimple_assign (stmt));
1494
1495   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1496   iv->use_id = use->id;
1497
1498   return use;
1499 }
1500
1501 /* Given a condition in statement STMT, checks whether it is a compare
1502    of an induction variable and an invariant.  If this is the case,
1503    CONTROL_VAR is set to location of the iv, BOUND to the location of
1504    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1505    induction variable descriptions, and true is returned.  If this is not
1506    the case, CONTROL_VAR and BOUND are set to the arguments of the
1507    condition and false is returned.  */
1508
1509 static bool
1510 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1511                        tree **control_var, tree **bound,
1512                        struct iv **iv_var, struct iv **iv_bound)
1513 {
1514   /* The objects returned when COND has constant operands.  */
1515   static struct iv const_iv;
1516   static tree zero;
1517   tree *op0 = &zero, *op1 = &zero;
1518   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1519   bool ret = false;
1520
1521   if (gimple_code (stmt) == GIMPLE_COND)
1522     {
1523       gcond *cond_stmt = as_a <gcond *> (stmt);
1524       op0 = gimple_cond_lhs_ptr (cond_stmt);
1525       op1 = gimple_cond_rhs_ptr (cond_stmt);
1526     }
1527   else
1528     {
1529       op0 = gimple_assign_rhs1_ptr (stmt);
1530       op1 = gimple_assign_rhs2_ptr (stmt);
1531     }
1532
1533   zero = integer_zero_node;
1534   const_iv.step = integer_zero_node;
1535
1536   if (TREE_CODE (*op0) == SSA_NAME)
1537     iv0 = get_iv (data, *op0);
1538   if (TREE_CODE (*op1) == SSA_NAME)
1539     iv1 = get_iv (data, *op1);
1540
1541   /* Exactly one of the compared values must be an iv, and the other one must
1542      be an invariant.  */
1543   if (!iv0 || !iv1)
1544     goto end;
1545
1546   if (integer_zerop (iv0->step))
1547     {
1548       /* Control variable may be on the other side.  */
1549       std::swap (op0, op1);
1550       std::swap (iv0, iv1);
1551     }
1552   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1553
1554 end:
1555   if (control_var)
1556     *control_var = op0;
1557   if (iv_var)
1558     *iv_var = iv0;
1559   if (bound)
1560     *bound = op1;
1561   if (iv_bound)
1562     *iv_bound = iv1;
1563
1564   return ret;
1565 }
1566
1567 /* Checks whether the condition in STMT is interesting and if so,
1568    records it.  */
1569
1570 static void
1571 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1572 {
1573   tree *var_p, *bound_p;
1574   struct iv *var_iv;
1575
1576   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1577     {
1578       find_interesting_uses_op (data, *var_p);
1579       find_interesting_uses_op (data, *bound_p);
1580       return;
1581     }
1582
1583   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1584 }
1585
1586 /* Returns the outermost loop EXPR is obviously invariant in
1587    relative to the loop LOOP, i.e. if all its operands are defined
1588    outside of the returned loop.  Returns NULL if EXPR is not
1589    even obviously invariant in LOOP.  */
1590
1591 struct loop *
1592 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1593 {
1594   basic_block def_bb;
1595   unsigned i, len;
1596
1597   if (is_gimple_min_invariant (expr))
1598     return current_loops->tree_root;
1599
1600   if (TREE_CODE (expr) == SSA_NAME)
1601     {
1602       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1603       if (def_bb)
1604         {
1605           if (flow_bb_inside_loop_p (loop, def_bb))
1606             return NULL;
1607           return superloop_at_depth (loop,
1608                                      loop_depth (def_bb->loop_father) + 1);
1609         }
1610
1611       return current_loops->tree_root;
1612     }
1613
1614   if (!EXPR_P (expr))
1615     return NULL;
1616
1617   unsigned maxdepth = 0;
1618   len = TREE_OPERAND_LENGTH (expr);
1619   for (i = 0; i < len; i++)
1620     {
1621       struct loop *ivloop;
1622       if (!TREE_OPERAND (expr, i))
1623         continue;
1624
1625       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1626       if (!ivloop)
1627         return NULL;
1628       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1629     }
1630
1631   return superloop_at_depth (loop, maxdepth);
1632 }
1633
1634 /* Returns true if expression EXPR is obviously invariant in LOOP,
1635    i.e. if all its operands are defined outside of the LOOP.  LOOP
1636    should not be the function body.  */
1637
1638 bool
1639 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1640 {
1641   basic_block def_bb;
1642   unsigned i, len;
1643
1644   gcc_assert (loop_depth (loop) > 0);
1645
1646   if (is_gimple_min_invariant (expr))
1647     return true;
1648
1649   if (TREE_CODE (expr) == SSA_NAME)
1650     {
1651       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1652       if (def_bb
1653           && flow_bb_inside_loop_p (loop, def_bb))
1654         return false;
1655
1656       return true;
1657     }
1658
1659   if (!EXPR_P (expr))
1660     return false;
1661
1662   len = TREE_OPERAND_LENGTH (expr);
1663   for (i = 0; i < len; i++)
1664     if (TREE_OPERAND (expr, i)
1665         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1666       return false;
1667
1668   return true;
1669 }
1670
1671 /* Given expression EXPR which computes inductive values with respect
1672    to loop recorded in DATA, this function returns biv from which EXPR
1673    is derived by tracing definition chains of ssa variables in EXPR.  */
1674
1675 static struct iv*
1676 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1677 {
1678   struct iv *iv;
1679   unsigned i, n;
1680   tree e2, e1;
1681   enum tree_code code;
1682   gimple *stmt;
1683
1684   if (expr == NULL_TREE)
1685     return NULL;
1686
1687   if (is_gimple_min_invariant (expr))
1688     return NULL;
1689
1690   code = TREE_CODE (expr);
1691   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1692     {
1693       n = TREE_OPERAND_LENGTH (expr);
1694       for (i = 0; i < n; i++)
1695         {
1696           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1697           if (iv)
1698             return iv;
1699         }
1700     }
1701
1702   /* Stop if it's not ssa name.  */
1703   if (code != SSA_NAME)
1704     return NULL;
1705
1706   iv = get_iv (data, expr);
1707   if (!iv || integer_zerop (iv->step))
1708     return NULL;
1709   else if (iv->biv_p)
1710     return iv;
1711
1712   stmt = SSA_NAME_DEF_STMT (expr);
1713   if (gphi *phi = dyn_cast <gphi *> (stmt))
1714     {
1715       ssa_op_iter iter;
1716       use_operand_p use_p;
1717
1718       if (virtual_operand_p (gimple_phi_result (phi)))
1719         return NULL;
1720
1721       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1722         {
1723           tree use = USE_FROM_PTR (use_p);
1724           iv = find_deriving_biv_for_expr (data, use);
1725           if (iv)
1726             return iv;
1727         }
1728       return NULL;
1729     }
1730   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1731     return NULL;
1732
1733   e1 = gimple_assign_rhs1 (stmt);
1734   code = gimple_assign_rhs_code (stmt);
1735   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1736     return find_deriving_biv_for_expr (data, e1);
1737
1738   switch (code)
1739     {
1740     case MULT_EXPR:
1741     case PLUS_EXPR:
1742     case MINUS_EXPR:
1743     case POINTER_PLUS_EXPR:
1744       /* Increments, decrements and multiplications by a constant
1745          are simple.  */
1746       e2 = gimple_assign_rhs2 (stmt);
1747       iv = find_deriving_biv_for_expr (data, e2);
1748       if (iv)
1749         return iv;
1750
1751       /* Fallthru.  */
1752     CASE_CONVERT:
1753       /* Casts are simple.  */
1754       return find_deriving_biv_for_expr (data, e1);
1755
1756     default:
1757       break;
1758     }
1759
1760   return NULL;
1761 }
1762
1763 /* Record BIV, its predecessor and successor that they are used in
1764    address type uses.  */
1765
1766 static void
1767 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1768 {
1769   unsigned i;
1770   tree type, base_1, base_2;
1771   bitmap_iterator bi;
1772
1773   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1774       || biv->have_address_use || !biv->no_overflow)
1775     return;
1776
1777   type = TREE_TYPE (biv->base);
1778   if (!INTEGRAL_TYPE_P (type))
1779     return;
1780
1781   biv->have_address_use = true;
1782   data->bivs_not_used_in_addr--;
1783   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1784   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1785     {
1786       struct iv *iv = ver_info (data, i)->iv;
1787
1788       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1789           || iv->have_address_use || !iv->no_overflow)
1790         continue;
1791
1792       if (type != TREE_TYPE (iv->base)
1793           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1794         continue;
1795
1796       if (!operand_equal_p (biv->step, iv->step, 0))
1797         continue;
1798
1799       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1800       if (operand_equal_p (base_1, iv->base, 0)
1801           || operand_equal_p (base_2, biv->base, 0))
1802         {
1803           iv->have_address_use = true;
1804           data->bivs_not_used_in_addr--;
1805         }
1806     }
1807 }
1808
1809 /* Cumulates the steps of indices into DATA and replaces their values with the
1810    initial ones.  Returns false when the value of the index cannot be determined.
1811    Callback for for_each_index.  */
1812
1813 struct ifs_ivopts_data
1814 {
1815   struct ivopts_data *ivopts_data;
1816   gimple *stmt;
1817   tree step;
1818 };
1819
1820 static bool
1821 idx_find_step (tree base, tree *idx, void *data)
1822 {
1823   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1824   struct iv *iv;
1825   bool use_overflow_semantics = false;
1826   tree step, iv_base, iv_step, lbound, off;
1827   struct loop *loop = dta->ivopts_data->current_loop;
1828
1829   /* If base is a component ref, require that the offset of the reference
1830      be invariant.  */
1831   if (TREE_CODE (base) == COMPONENT_REF)
1832     {
1833       off = component_ref_field_offset (base);
1834       return expr_invariant_in_loop_p (loop, off);
1835     }
1836
1837   /* If base is array, first check whether we will be able to move the
1838      reference out of the loop (in order to take its address in strength
1839      reduction).  In order for this to work we need both lower bound
1840      and step to be loop invariants.  */
1841   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1842     {
1843       /* Moreover, for a range, the size needs to be invariant as well.  */
1844       if (TREE_CODE (base) == ARRAY_RANGE_REF
1845           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1846         return false;
1847
1848       step = array_ref_element_size (base);
1849       lbound = array_ref_low_bound (base);
1850
1851       if (!expr_invariant_in_loop_p (loop, step)
1852           || !expr_invariant_in_loop_p (loop, lbound))
1853         return false;
1854     }
1855
1856   if (TREE_CODE (*idx) != SSA_NAME)
1857     return true;
1858
1859   iv = get_iv (dta->ivopts_data, *idx);
1860   if (!iv)
1861     return false;
1862
1863   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1864           *&x[0], which is not folded and does not trigger the
1865           ARRAY_REF path below.  */
1866   *idx = iv->base;
1867
1868   if (integer_zerop (iv->step))
1869     return true;
1870
1871   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1872     {
1873       step = array_ref_element_size (base);
1874
1875       /* We only handle addresses whose step is an integer constant.  */
1876       if (TREE_CODE (step) != INTEGER_CST)
1877         return false;
1878     }
1879   else
1880     /* The step for pointer arithmetics already is 1 byte.  */
1881     step = size_one_node;
1882
1883   iv_base = iv->base;
1884   iv_step = iv->step;
1885   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1886     use_overflow_semantics = true;
1887
1888   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1889                             sizetype, &iv_base, &iv_step, dta->stmt,
1890                             use_overflow_semantics))
1891     {
1892       /* The index might wrap.  */
1893       return false;
1894     }
1895
1896   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1897   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1898
1899   if (dta->ivopts_data->bivs_not_used_in_addr)
1900     {
1901       if (!iv->biv_p)
1902         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1903
1904       record_biv_for_address_use (dta->ivopts_data, iv);
1905     }
1906   return true;
1907 }
1908
1909 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1910    object is passed to it in DATA.  */
1911
1912 static bool
1913 idx_record_use (tree base, tree *idx,
1914                 void *vdata)
1915 {
1916   struct ivopts_data *data = (struct ivopts_data *) vdata;
1917   find_interesting_uses_op (data, *idx);
1918   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1919     {
1920       find_interesting_uses_op (data, array_ref_element_size (base));
1921       find_interesting_uses_op (data, array_ref_low_bound (base));
1922     }
1923   return true;
1924 }
1925
1926 /* If we can prove that TOP = cst * BOT for some constant cst,
1927    store cst to MUL and return true.  Otherwise return false.
1928    The returned value is always sign-extended, regardless of the
1929    signedness of TOP and BOT.  */
1930
1931 static bool
1932 constant_multiple_of (tree top, tree bot, widest_int *mul)
1933 {
1934   tree mby;
1935   enum tree_code code;
1936   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1937   widest_int res, p0, p1;
1938
1939   STRIP_NOPS (top);
1940   STRIP_NOPS (bot);
1941
1942   if (operand_equal_p (top, bot, 0))
1943     {
1944       *mul = 1;
1945       return true;
1946     }
1947
1948   code = TREE_CODE (top);
1949   switch (code)
1950     {
1951     case MULT_EXPR:
1952       mby = TREE_OPERAND (top, 1);
1953       if (TREE_CODE (mby) != INTEGER_CST)
1954         return false;
1955
1956       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1957         return false;
1958
1959       *mul = wi::sext (res * wi::to_widest (mby), precision);
1960       return true;
1961
1962     case PLUS_EXPR:
1963     case MINUS_EXPR:
1964       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1965           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1966         return false;
1967
1968       if (code == MINUS_EXPR)
1969         p1 = -p1;
1970       *mul = wi::sext (p0 + p1, precision);
1971       return true;
1972
1973     case INTEGER_CST:
1974       if (TREE_CODE (bot) != INTEGER_CST)
1975         return false;
1976
1977       p0 = widest_int::from (top, SIGNED);
1978       p1 = widest_int::from (bot, SIGNED);
1979       if (p1 == 0)
1980         return false;
1981       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1982       return res == 0;
1983
1984     default:
1985       return false;
1986     }
1987 }
1988
1989 /* Return true if memory reference REF with step STEP may be unaligned.  */
1990
1991 static bool
1992 may_be_unaligned_p (tree ref, tree step)
1993 {
1994   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1995      thus they are not misaligned.  */
1996   if (TREE_CODE (ref) == TARGET_MEM_REF)
1997     return false;
1998
1999   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2000   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2001     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2002
2003   unsigned HOST_WIDE_INT bitpos;
2004   unsigned int ref_align;
2005   get_object_alignment_1 (ref, &ref_align, &bitpos);
2006   if (ref_align < align
2007       || (bitpos % align) != 0
2008       || (bitpos % BITS_PER_UNIT) != 0)
2009     return true;
2010
2011   unsigned int trailing_zeros = tree_ctz (step);
2012   if (trailing_zeros < HOST_BITS_PER_INT
2013       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2014     return true;
2015
2016   return false;
2017 }
2018
2019 /* Return true if EXPR may be non-addressable.   */
2020
2021 bool
2022 may_be_nonaddressable_p (tree expr)
2023 {
2024   switch (TREE_CODE (expr))
2025     {
2026     case TARGET_MEM_REF:
2027       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2028          target, thus they are always addressable.  */
2029       return false;
2030
2031     case MEM_REF:
2032       /* Likewise for MEM_REFs, modulo the storage order.  */
2033       return REF_REVERSE_STORAGE_ORDER (expr);
2034
2035     case BIT_FIELD_REF:
2036       if (REF_REVERSE_STORAGE_ORDER (expr))
2037         return true;
2038       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2039
2040     case COMPONENT_REF:
2041       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2042         return true;
2043       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2044              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2045
2046     case ARRAY_REF:
2047     case ARRAY_RANGE_REF:
2048       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2049         return true;
2050       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2051
2052     case VIEW_CONVERT_EXPR:
2053       /* This kind of view-conversions may wrap non-addressable objects
2054          and make them look addressable.  After some processing the
2055          non-addressability may be uncovered again, causing ADDR_EXPRs
2056          of inappropriate objects to be built.  */
2057       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2058           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2059         return true;
2060       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2061
2062     CASE_CONVERT:
2063       return true;
2064
2065     default:
2066       break;
2067     }
2068
2069   return false;
2070 }
2071
2072 static tree
2073 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2074
2075 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2076    If there is an existing use which has same stripped iv base and step,
2077    this function records this one as a sub use to that; otherwise records
2078    it as a normal one.  */
2079
2080 static struct iv_use *
2081 record_group_use (struct ivopts_data *data, tree *use_p,
2082                   struct iv *iv, gimple *stmt, enum use_type use_type)
2083 {
2084   unsigned int i;
2085   struct iv_use *use;
2086   tree addr_base;
2087   unsigned HOST_WIDE_INT addr_offset;
2088
2089   /* Only support sub use for address type uses, that is, with base
2090      object.  */
2091   if (!iv->base_object)
2092     return record_use (data, use_p, iv, stmt, use_type);
2093
2094   addr_base = strip_offset (iv->base, &addr_offset);
2095   for (i = 0; i < n_iv_uses (data); i++)
2096     {
2097       use = iv_use (data, i);
2098       if (use->type != USE_ADDRESS || !use->iv->base_object)
2099         continue;
2100
2101       /* Check if it has the same stripped base and step.  */
2102       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2103           && operand_equal_p (iv->step, use->iv->step, 0)
2104           && operand_equal_p (addr_base, use->addr_base, 0))
2105         break;
2106     }
2107
2108   if (i == n_iv_uses (data))
2109     return record_use (data, use_p, iv, stmt,
2110                        use_type, addr_base, addr_offset);
2111   else
2112     return record_sub_use (data, use_p, iv, stmt,
2113                            use_type, addr_base, addr_offset, i);
2114 }
2115
2116 /* Finds addresses in *OP_P inside STMT.  */
2117
2118 static void
2119 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2120                                tree *op_p)
2121 {
2122   tree base = *op_p, step = size_zero_node;
2123   struct iv *civ;
2124   struct ifs_ivopts_data ifs_ivopts_data;
2125
2126   /* Do not play with volatile memory references.  A bit too conservative,
2127      perhaps, but safe.  */
2128   if (gimple_has_volatile_ops (stmt))
2129     goto fail;
2130
2131   /* Ignore bitfields for now.  Not really something terribly complicated
2132      to handle.  TODO.  */
2133   if (TREE_CODE (base) == BIT_FIELD_REF)
2134     goto fail;
2135
2136   base = unshare_expr (base);
2137
2138   if (TREE_CODE (base) == TARGET_MEM_REF)
2139     {
2140       tree type = build_pointer_type (TREE_TYPE (base));
2141       tree astep;
2142
2143       if (TMR_BASE (base)
2144           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2145         {
2146           civ = get_iv (data, TMR_BASE (base));
2147           if (!civ)
2148             goto fail;
2149
2150           TMR_BASE (base) = civ->base;
2151           step = civ->step;
2152         }
2153       if (TMR_INDEX2 (base)
2154           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2155         {
2156           civ = get_iv (data, TMR_INDEX2 (base));
2157           if (!civ)
2158             goto fail;
2159
2160           TMR_INDEX2 (base) = civ->base;
2161           step = civ->step;
2162         }
2163       if (TMR_INDEX (base)
2164           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2165         {
2166           civ = get_iv (data, TMR_INDEX (base));
2167           if (!civ)
2168             goto fail;
2169
2170           TMR_INDEX (base) = civ->base;
2171           astep = civ->step;
2172
2173           if (astep)
2174             {
2175               if (TMR_STEP (base))
2176                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2177
2178               step = fold_build2 (PLUS_EXPR, type, step, astep);
2179             }
2180         }
2181
2182       if (integer_zerop (step))
2183         goto fail;
2184       base = tree_mem_ref_addr (type, base);
2185     }
2186   else
2187     {
2188       ifs_ivopts_data.ivopts_data = data;
2189       ifs_ivopts_data.stmt = stmt;
2190       ifs_ivopts_data.step = size_zero_node;
2191       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2192           || integer_zerop (ifs_ivopts_data.step))
2193         goto fail;
2194       step = ifs_ivopts_data.step;
2195
2196       /* Check that the base expression is addressable.  This needs
2197          to be done after substituting bases of IVs into it.  */
2198       if (may_be_nonaddressable_p (base))
2199         goto fail;
2200
2201       /* Moreover, on strict alignment platforms, check that it is
2202          sufficiently aligned.  */
2203       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2204         goto fail;
2205
2206       base = build_fold_addr_expr (base);
2207
2208       /* Substituting bases of IVs into the base expression might
2209          have caused folding opportunities.  */
2210       if (TREE_CODE (base) == ADDR_EXPR)
2211         {
2212           tree *ref = &TREE_OPERAND (base, 0);
2213           while (handled_component_p (*ref))
2214             ref = &TREE_OPERAND (*ref, 0);
2215           if (TREE_CODE (*ref) == MEM_REF)
2216             {
2217               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2218                                       TREE_OPERAND (*ref, 0),
2219                                       TREE_OPERAND (*ref, 1));
2220               if (tem)
2221                 *ref = tem;
2222             }
2223         }
2224     }
2225
2226   civ = alloc_iv (data, base, step);
2227   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2228   return;
2229
2230 fail:
2231   for_each_index (op_p, idx_record_use, data);
2232 }
2233
2234 /* Finds and records invariants used in STMT.  */
2235
2236 static void
2237 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2238 {
2239   ssa_op_iter iter;
2240   use_operand_p use_p;
2241   tree op;
2242
2243   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2244     {
2245       op = USE_FROM_PTR (use_p);
2246       record_invariant (data, op, false);
2247     }
2248 }
2249
2250 /* Finds interesting uses of induction variables in the statement STMT.  */
2251
2252 static void
2253 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2254 {
2255   struct iv *iv;
2256   tree op, *lhs, *rhs;
2257   ssa_op_iter iter;
2258   use_operand_p use_p;
2259   enum tree_code code;
2260
2261   find_invariants_stmt (data, stmt);
2262
2263   if (gimple_code (stmt) == GIMPLE_COND)
2264     {
2265       find_interesting_uses_cond (data, stmt);
2266       return;
2267     }
2268
2269   if (is_gimple_assign (stmt))
2270     {
2271       lhs = gimple_assign_lhs_ptr (stmt);
2272       rhs = gimple_assign_rhs1_ptr (stmt);
2273
2274       if (TREE_CODE (*lhs) == SSA_NAME)
2275         {
2276           /* If the statement defines an induction variable, the uses are not
2277              interesting by themselves.  */
2278
2279           iv = get_iv (data, *lhs);
2280
2281           if (iv && !integer_zerop (iv->step))
2282             return;
2283         }
2284
2285       code = gimple_assign_rhs_code (stmt);
2286       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2287           && (REFERENCE_CLASS_P (*rhs)
2288               || is_gimple_val (*rhs)))
2289         {
2290           if (REFERENCE_CLASS_P (*rhs))
2291             find_interesting_uses_address (data, stmt, rhs);
2292           else
2293             find_interesting_uses_op (data, *rhs);
2294
2295           if (REFERENCE_CLASS_P (*lhs))
2296             find_interesting_uses_address (data, stmt, lhs);
2297           return;
2298         }
2299       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2300         {
2301           find_interesting_uses_cond (data, stmt);
2302           return;
2303         }
2304
2305       /* TODO -- we should also handle address uses of type
2306
2307          memory = call (whatever);
2308
2309          and
2310
2311          call (memory).  */
2312     }
2313
2314   if (gimple_code (stmt) == GIMPLE_PHI
2315       && gimple_bb (stmt) == data->current_loop->header)
2316     {
2317       iv = get_iv (data, PHI_RESULT (stmt));
2318
2319       if (iv && !integer_zerop (iv->step))
2320         return;
2321     }
2322
2323   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2324     {
2325       op = USE_FROM_PTR (use_p);
2326
2327       if (TREE_CODE (op) != SSA_NAME)
2328         continue;
2329
2330       iv = get_iv (data, op);
2331       if (!iv)
2332         continue;
2333
2334       find_interesting_uses_op (data, op);
2335     }
2336 }
2337
2338 /* Finds interesting uses of induction variables outside of loops
2339    on loop exit edge EXIT.  */
2340
2341 static void
2342 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2343 {
2344   gphi *phi;
2345   gphi_iterator psi;
2346   tree def;
2347
2348   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2349     {
2350       phi = psi.phi ();
2351       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2352       if (!virtual_operand_p (def))
2353         find_interesting_uses_op (data, def);
2354     }
2355 }
2356
2357 /* Finds uses of the induction variables that are interesting.  */
2358
2359 static void
2360 find_interesting_uses (struct ivopts_data *data)
2361 {
2362   basic_block bb;
2363   gimple_stmt_iterator bsi;
2364   basic_block *body = get_loop_body (data->current_loop);
2365   unsigned i;
2366   struct version_info *info;
2367   edge e;
2368
2369   if (dump_file && (dump_flags & TDF_DETAILS))
2370     fprintf (dump_file, "Uses:\n\n");
2371
2372   for (i = 0; i < data->current_loop->num_nodes; i++)
2373     {
2374       edge_iterator ei;
2375       bb = body[i];
2376
2377       FOR_EACH_EDGE (e, ei, bb->succs)
2378         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2379             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2380           find_interesting_uses_outside (data, e);
2381
2382       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2383         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2384       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2385         if (!is_gimple_debug (gsi_stmt (bsi)))
2386           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2387     }
2388
2389   if (dump_file && (dump_flags & TDF_DETAILS))
2390     {
2391       bitmap_iterator bi;
2392
2393       fprintf (dump_file, "\n");
2394
2395       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2396         {
2397           info = ver_info (data, i);
2398           if (info->inv_id)
2399             {
2400               fprintf (dump_file, "  ");
2401               print_generic_expr (dump_file, info->name, TDF_SLIM);
2402               fprintf (dump_file, " is invariant (%d)%s\n",
2403                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2404             }
2405         }
2406
2407       fprintf (dump_file, "\n");
2408     }
2409
2410   free (body);
2411 }
2412
2413 /* Compute maximum offset of [base + offset] addressing mode
2414    for memory reference represented by USE.  */
2415
2416 static HOST_WIDE_INT
2417 compute_max_addr_offset (struct iv_use *use)
2418 {
2419   int width;
2420   rtx reg, addr;
2421   HOST_WIDE_INT i, off;
2422   unsigned list_index, num;
2423   addr_space_t as;
2424   machine_mode mem_mode, addr_mode;
2425   static vec<HOST_WIDE_INT> max_offset_list;
2426
2427   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2428   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2429
2430   num = max_offset_list.length ();
2431   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2432   if (list_index >= num)
2433     {
2434       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2435       for (; num < max_offset_list.length (); num++)
2436         max_offset_list[num] = -1;
2437     }
2438
2439   off = max_offset_list[list_index];
2440   if (off != -1)
2441     return off;
2442
2443   addr_mode = targetm.addr_space.address_mode (as);
2444   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2445   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2446
2447   width = GET_MODE_BITSIZE (addr_mode) - 1;
2448   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2449     width = HOST_BITS_PER_WIDE_INT - 1;
2450
2451   for (i = width; i > 0; i--)
2452     {
2453       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2454       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2455       if (memory_address_addr_space_p (mem_mode, addr, as))
2456         break;
2457
2458       /* For some strict-alignment targets, the offset must be naturally
2459          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2460       off = ((unsigned HOST_WIDE_INT) 1 << i);
2461       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2462         {
2463           off -= GET_MODE_SIZE (mem_mode);
2464           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2465           if (memory_address_addr_space_p (mem_mode, addr, as))
2466             break;
2467         }
2468     }
2469   if (i == 0)
2470     off = 0;
2471
2472   max_offset_list[list_index] = off;
2473   return off;
2474 }
2475
2476 /* Check if all small groups should be split.  Return true if and
2477    only if:
2478
2479      1) At least one groups contain two uses with different offsets.
2480      2) No group contains more than two uses with different offsets.
2481
2482    Return false otherwise.  We want to split such groups because:
2483
2484      1) Small groups don't have much benefit and may interfer with
2485         general candidate selection.
2486      2) Size for problem with only small groups is usually small and
2487         general algorithm can handle it well.
2488
2489    TODO -- Above claim may not hold when auto increment is supported.  */
2490
2491 static bool
2492 split_all_small_groups (struct ivopts_data *data)
2493 {
2494   bool split_p = false;
2495   unsigned int i, n, distinct;
2496   struct iv_use *pre, *use;
2497
2498   n = n_iv_uses (data);
2499   for (i = 0; i < n; i++)
2500     {
2501       use = iv_use (data, i);
2502       if (!use->next)
2503         continue;
2504
2505       distinct = 1;
2506       gcc_assert (use->type == USE_ADDRESS);
2507       for (pre = use, use = use->next; use; pre = use, use = use->next)
2508         {
2509           if (pre->addr_offset != use->addr_offset)
2510             distinct++;
2511
2512           if (distinct > 2)
2513             return false;
2514         }
2515       if (distinct == 2)
2516         split_p = true;
2517     }
2518
2519   return split_p;
2520 }
2521
2522 /* For each group of address type uses, this function further groups
2523    these uses according to the maximum offset supported by target's
2524    [base + offset] addressing mode.  */
2525
2526 static void
2527 group_address_uses (struct ivopts_data *data)
2528 {
2529   HOST_WIDE_INT max_offset = -1;
2530   unsigned int i, n, sub_id;
2531   struct iv_use *pre, *use;
2532   unsigned HOST_WIDE_INT addr_offset_first;
2533
2534   /* Reset max offset to split all small groups.  */
2535   if (split_all_small_groups (data))
2536     max_offset = 0;
2537
2538   n = n_iv_uses (data);
2539   for (i = 0; i < n; i++)
2540     {
2541       use = iv_use (data, i);
2542       if (!use->next)
2543         continue;
2544
2545       gcc_assert (use->type == USE_ADDRESS);
2546       if (max_offset != 0)
2547         max_offset = compute_max_addr_offset (use);
2548
2549       while (use)
2550         {
2551           sub_id = 0;
2552           addr_offset_first = use->addr_offset;
2553           /* Only uses with offset that can fit in offset part against
2554              the first use can be grouped together.  */
2555           for (pre = use, use = use->next;
2556                use && (use->addr_offset - addr_offset_first
2557                        <= (unsigned HOST_WIDE_INT) max_offset);
2558                pre = use, use = use->next)
2559             {
2560               use->id = pre->id;
2561               use->sub_id = ++sub_id;
2562             }
2563
2564           /* Break the list and create new group.  */
2565           if (use)
2566             {
2567               pre->next = NULL;
2568               use->id = n_iv_uses (data);
2569               use->related_cands = BITMAP_ALLOC (NULL);
2570               data->iv_uses.safe_push (use);
2571             }
2572         }
2573     }
2574
2575   if (dump_file && (dump_flags & TDF_DETAILS))
2576     dump_uses (dump_file, data);
2577 }
2578
2579 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2580    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2581    we are at the top-level of the processed address.  */
2582
2583 static tree
2584 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2585                 HOST_WIDE_INT *offset)
2586 {
2587   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2588   enum tree_code code;
2589   tree type, orig_type = TREE_TYPE (expr);
2590   HOST_WIDE_INT off0, off1, st;
2591   tree orig_expr = expr;
2592
2593   STRIP_NOPS (expr);
2594
2595   type = TREE_TYPE (expr);
2596   code = TREE_CODE (expr);
2597   *offset = 0;
2598
2599   switch (code)
2600     {
2601     case INTEGER_CST:
2602       if (!cst_and_fits_in_hwi (expr)
2603           || integer_zerop (expr))
2604         return orig_expr;
2605
2606       *offset = int_cst_value (expr);
2607       return build_int_cst (orig_type, 0);
2608
2609     case POINTER_PLUS_EXPR:
2610     case PLUS_EXPR:
2611     case MINUS_EXPR:
2612       op0 = TREE_OPERAND (expr, 0);
2613       op1 = TREE_OPERAND (expr, 1);
2614
2615       op0 = strip_offset_1 (op0, false, false, &off0);
2616       op1 = strip_offset_1 (op1, false, false, &off1);
2617
2618       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2619       if (op0 == TREE_OPERAND (expr, 0)
2620           && op1 == TREE_OPERAND (expr, 1))
2621         return orig_expr;
2622
2623       if (integer_zerop (op1))
2624         expr = op0;
2625       else if (integer_zerop (op0))
2626         {
2627           if (code == MINUS_EXPR)
2628             expr = fold_build1 (NEGATE_EXPR, type, op1);
2629           else
2630             expr = op1;
2631         }
2632       else
2633         expr = fold_build2 (code, type, op0, op1);
2634
2635       return fold_convert (orig_type, expr);
2636
2637     case MULT_EXPR:
2638       op1 = TREE_OPERAND (expr, 1);
2639       if (!cst_and_fits_in_hwi (op1))
2640         return orig_expr;
2641
2642       op0 = TREE_OPERAND (expr, 0);
2643       op0 = strip_offset_1 (op0, false, false, &off0);
2644       if (op0 == TREE_OPERAND (expr, 0))
2645         return orig_expr;
2646
2647       *offset = off0 * int_cst_value (op1);
2648       if (integer_zerop (op0))
2649         expr = op0;
2650       else
2651         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2652
2653       return fold_convert (orig_type, expr);
2654
2655     case ARRAY_REF:
2656     case ARRAY_RANGE_REF:
2657       if (!inside_addr)
2658         return orig_expr;
2659
2660       step = array_ref_element_size (expr);
2661       if (!cst_and_fits_in_hwi (step))
2662         break;
2663
2664       st = int_cst_value (step);
2665       op1 = TREE_OPERAND (expr, 1);
2666       op1 = strip_offset_1 (op1, false, false, &off1);
2667       *offset = off1 * st;
2668
2669       if (top_compref
2670           && integer_zerop (op1))
2671         {
2672           /* Strip the component reference completely.  */
2673           op0 = TREE_OPERAND (expr, 0);
2674           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2675           *offset += off0;
2676           return op0;
2677         }
2678       break;
2679
2680     case COMPONENT_REF:
2681       {
2682         tree field;
2683
2684         if (!inside_addr)
2685           return orig_expr;
2686
2687         tmp = component_ref_field_offset (expr);
2688         field = TREE_OPERAND (expr, 1);
2689         if (top_compref
2690             && cst_and_fits_in_hwi (tmp)
2691             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2692           {
2693             HOST_WIDE_INT boffset, abs_off;
2694
2695             /* Strip the component reference completely.  */
2696             op0 = TREE_OPERAND (expr, 0);
2697             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2698             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2699             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2700             if (boffset < 0)
2701               abs_off = -abs_off;
2702
2703             *offset = off0 + int_cst_value (tmp) + abs_off;
2704             return op0;
2705           }
2706       }
2707       break;
2708
2709     case ADDR_EXPR:
2710       op0 = TREE_OPERAND (expr, 0);
2711       op0 = strip_offset_1 (op0, true, true, &off0);
2712       *offset += off0;
2713
2714       if (op0 == TREE_OPERAND (expr, 0))
2715         return orig_expr;
2716
2717       expr = build_fold_addr_expr (op0);
2718       return fold_convert (orig_type, expr);
2719
2720     case MEM_REF:
2721       /* ???  Offset operand?  */
2722       inside_addr = false;
2723       break;
2724
2725     default:
2726       return orig_expr;
2727     }
2728
2729   /* Default handling of expressions for that we want to recurse into
2730      the first operand.  */
2731   op0 = TREE_OPERAND (expr, 0);
2732   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2733   *offset += off0;
2734
2735   if (op0 == TREE_OPERAND (expr, 0)
2736       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2737     return orig_expr;
2738
2739   expr = copy_node (expr);
2740   TREE_OPERAND (expr, 0) = op0;
2741   if (op1)
2742     TREE_OPERAND (expr, 1) = op1;
2743
2744   /* Inside address, we might strip the top level component references,
2745      thus changing type of the expression.  Handling of ADDR_EXPR
2746      will fix that.  */
2747   expr = fold_convert (orig_type, expr);
2748
2749   return expr;
2750 }
2751
2752 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2753
2754 static tree
2755 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2756 {
2757   HOST_WIDE_INT off;
2758   tree core = strip_offset_1 (expr, false, false, &off);
2759   *offset = off;
2760   return core;
2761 }
2762
2763 /* Returns variant of TYPE that can be used as base for different uses.
2764    We return unsigned type with the same precision, which avoids problems
2765    with overflows.  */
2766
2767 static tree
2768 generic_type_for (tree type)
2769 {
2770   if (POINTER_TYPE_P (type))
2771     return unsigned_type_for (type);
2772
2773   if (TYPE_UNSIGNED (type))
2774     return type;
2775
2776   return unsigned_type_for (type);
2777 }
2778
2779 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2780    the bitmap to that we should store it.  */
2781
2782 static struct ivopts_data *fd_ivopts_data;
2783 static tree
2784 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2785 {
2786   bitmap *depends_on = (bitmap *) data;
2787   struct version_info *info;
2788
2789   if (TREE_CODE (*expr_p) != SSA_NAME)
2790     return NULL_TREE;
2791   info = name_info (fd_ivopts_data, *expr_p);
2792
2793   if (!info->inv_id || info->has_nonlin_use)
2794     return NULL_TREE;
2795
2796   if (!*depends_on)
2797     *depends_on = BITMAP_ALLOC (NULL);
2798   bitmap_set_bit (*depends_on, info->inv_id);
2799
2800   return NULL_TREE;
2801 }
2802
2803 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2804    position to POS.  If USE is not NULL, the candidate is set as related to
2805    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2806    replacement of the final value of the iv by a direct computation.  */
2807
2808 static struct iv_cand *
2809 add_candidate_1 (struct ivopts_data *data,
2810                  tree base, tree step, bool important, enum iv_position pos,
2811                  struct iv_use *use, gimple *incremented_at,
2812                  struct iv *orig_iv = NULL)
2813 {
2814   unsigned i;
2815   struct iv_cand *cand = NULL;
2816   tree type, orig_type;
2817
2818   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2819      live, but the ivopts code may replace a real pointer with one
2820      pointing before or after the memory block that is then adjusted
2821      into the memory block during the loop.  FIXME: It would likely be
2822      better to actually force the pointer live and still use ivopts;
2823      for example, it would be enough to write the pointer into memory
2824      and keep it there until after the loop.  */
2825   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2826     return NULL;
2827
2828   /* For non-original variables, make sure their values are computed in a type
2829      that does not invoke undefined behavior on overflows (since in general,
2830      we cannot prove that these induction variables are non-wrapping).  */
2831   if (pos != IP_ORIGINAL)
2832     {
2833       orig_type = TREE_TYPE (base);
2834       type = generic_type_for (orig_type);
2835       if (type != orig_type)
2836         {
2837           base = fold_convert (type, base);
2838           step = fold_convert (type, step);
2839         }
2840     }
2841
2842   for (i = 0; i < n_iv_cands (data); i++)
2843     {
2844       cand = iv_cand (data, i);
2845
2846       if (cand->pos != pos)
2847         continue;
2848
2849       if (cand->incremented_at != incremented_at
2850           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2851               && cand->ainc_use != use))
2852         continue;
2853
2854       if (!cand->iv)
2855         {
2856           if (!base && !step)
2857             break;
2858
2859           continue;
2860         }
2861
2862       if (!base && !step)
2863         continue;
2864
2865       if (operand_equal_p (base, cand->iv->base, 0)
2866           && operand_equal_p (step, cand->iv->step, 0)
2867           && (TYPE_PRECISION (TREE_TYPE (base))
2868               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2869         break;
2870     }
2871
2872   if (i == n_iv_cands (data))
2873     {
2874       cand = XCNEW (struct iv_cand);
2875       cand->id = i;
2876
2877       if (!base && !step)
2878         cand->iv = NULL;
2879       else
2880         cand->iv = alloc_iv (data, base, step);
2881
2882       cand->pos = pos;
2883       if (pos != IP_ORIGINAL && cand->iv)
2884         {
2885           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2886           cand->var_after = cand->var_before;
2887         }
2888       cand->important = important;
2889       cand->incremented_at = incremented_at;
2890       data->iv_candidates.safe_push (cand);
2891
2892       if (step
2893           && TREE_CODE (step) != INTEGER_CST)
2894         {
2895           fd_ivopts_data = data;
2896           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2897         }
2898
2899       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2900         cand->ainc_use = use;
2901       else
2902         cand->ainc_use = NULL;
2903
2904       cand->orig_iv = orig_iv;
2905       if (dump_file && (dump_flags & TDF_DETAILS))
2906         dump_cand (dump_file, cand);
2907     }
2908
2909   if (important && !cand->important)
2910     {
2911       cand->important = true;
2912       if (dump_file && (dump_flags & TDF_DETAILS))
2913         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2914     }
2915
2916   if (use)
2917     {
2918       bitmap_set_bit (use->related_cands, i);
2919       if (dump_file && (dump_flags & TDF_DETAILS))
2920         fprintf (dump_file, "Candidate %d is related to use %d\n",
2921                  cand->id, use->id);
2922     }
2923
2924   return cand;
2925 }
2926
2927 /* Returns true if incrementing the induction variable at the end of the LOOP
2928    is allowed.
2929
2930    The purpose is to avoid splitting latch edge with a biv increment, thus
2931    creating a jump, possibly confusing other optimization passes and leaving
2932    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2933    is not available (so we do not have a better alternative), or if the latch
2934    edge is already nonempty.  */
2935
2936 static bool
2937 allow_ip_end_pos_p (struct loop *loop)
2938 {
2939   if (!ip_normal_pos (loop))
2940     return true;
2941
2942   if (!empty_block_p (ip_end_pos (loop)))
2943     return true;
2944
2945   return false;
2946 }
2947
2948 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2949    Important field is set to IMPORTANT.  */
2950
2951 static void
2952 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2953                         bool important, struct iv_use *use)
2954 {
2955   basic_block use_bb = gimple_bb (use->stmt);
2956   machine_mode mem_mode;
2957   unsigned HOST_WIDE_INT cstepi;
2958
2959   /* If we insert the increment in any position other than the standard
2960      ones, we must ensure that it is incremented once per iteration.
2961      It must not be in an inner nested loop, or one side of an if
2962      statement.  */
2963   if (use_bb->loop_father != data->current_loop
2964       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2965       || stmt_could_throw_p (use->stmt)
2966       || !cst_and_fits_in_hwi (step))
2967     return;
2968
2969   cstepi = int_cst_value (step);
2970
2971   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2972   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2973         || USE_STORE_PRE_INCREMENT (mem_mode))
2974        && GET_MODE_SIZE (mem_mode) == cstepi)
2975       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2976            || USE_STORE_PRE_DECREMENT (mem_mode))
2977           && GET_MODE_SIZE (mem_mode) == -cstepi))
2978     {
2979       enum tree_code code = MINUS_EXPR;
2980       tree new_base;
2981       tree new_step = step;
2982
2983       if (POINTER_TYPE_P (TREE_TYPE (base)))
2984         {
2985           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2986           code = POINTER_PLUS_EXPR;
2987         }
2988       else
2989         new_step = fold_convert (TREE_TYPE (base), new_step);
2990       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2991       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2992                        use->stmt);
2993     }
2994   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2995         || USE_STORE_POST_INCREMENT (mem_mode))
2996        && GET_MODE_SIZE (mem_mode) == cstepi)
2997       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2998            || USE_STORE_POST_DECREMENT (mem_mode))
2999           && GET_MODE_SIZE (mem_mode) == -cstepi))
3000     {
3001       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3002                        use->stmt);
3003     }
3004 }
3005
3006 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3007    position to POS.  If USE is not NULL, the candidate is set as related to
3008    it.  The candidate computation is scheduled before exit condition and at
3009    the end of loop.  */
3010
3011 static void
3012 add_candidate (struct ivopts_data *data,
3013                tree base, tree step, bool important, struct iv_use *use,
3014                struct iv *orig_iv = NULL)
3015 {
3016   gcc_assert (use == NULL || use->sub_id == 0);
3017
3018   if (ip_normal_pos (data->current_loop))
3019     add_candidate_1 (data, base, step, important,
3020                      IP_NORMAL, use, NULL, orig_iv);
3021   if (ip_end_pos (data->current_loop)
3022       && allow_ip_end_pos_p (data->current_loop))
3023     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3024 }
3025
3026 /* Adds standard iv candidates.  */
3027
3028 static void
3029 add_standard_iv_candidates (struct ivopts_data *data)
3030 {
3031   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3032
3033   /* The same for a double-integer type if it is still fast enough.  */
3034   if (TYPE_PRECISION
3035         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3036       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3037     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3038                    build_int_cst (long_integer_type_node, 1), true, NULL);
3039
3040   /* The same for a double-integer type if it is still fast enough.  */
3041   if (TYPE_PRECISION
3042         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3043       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3044     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3045                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3046 }
3047
3048
3049 /* Adds candidates bases on the old induction variable IV.  */
3050
3051 static void
3052 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3053 {
3054   gimple *phi;
3055   tree def;
3056   struct iv_cand *cand;
3057
3058   /* Check if this biv is used in address type use.  */
3059   if (iv->no_overflow  && iv->have_address_use
3060       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3061       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3062     {
3063       tree base = fold_convert (sizetype, iv->base);
3064       tree step = fold_convert (sizetype, iv->step);
3065
3066       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3067       add_candidate (data, base, step, true, NULL, iv);
3068       /* Add iv cand of the original type only if it has nonlinear use.  */
3069       if (iv->have_use_for)
3070         add_candidate (data, iv->base, iv->step, true, NULL);
3071     }
3072   else
3073     add_candidate (data, iv->base, iv->step, true, NULL);
3074
3075   /* The same, but with initial value zero.  */
3076   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3077     add_candidate (data, size_int (0), iv->step, true, NULL);
3078   else
3079     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3080                    iv->step, true, NULL);
3081
3082   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3083   if (gimple_code (phi) == GIMPLE_PHI)
3084     {
3085       /* Additionally record the possibility of leaving the original iv
3086          untouched.  */
3087       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3088       /* Don't add candidate if it's from another PHI node because
3089          it's an affine iv appearing in the form of PEELED_CHREC.  */
3090       phi = SSA_NAME_DEF_STMT (def);
3091       if (gimple_code (phi) != GIMPLE_PHI)
3092         {
3093           cand = add_candidate_1 (data,
3094                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3095                                   SSA_NAME_DEF_STMT (def));
3096           if (cand)
3097             {
3098               cand->var_before = iv->ssa_name;
3099               cand->var_after = def;
3100             }
3101         }
3102       else
3103         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3104     }
3105 }
3106
3107 /* Adds candidates based on the old induction variables.  */
3108
3109 static void
3110 add_iv_candidate_for_bivs (struct ivopts_data *data)
3111 {
3112   unsigned i;
3113   struct iv *iv;
3114   bitmap_iterator bi;
3115
3116   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3117     {
3118       iv = ver_info (data, i)->iv;
3119       if (iv && iv->biv_p && !integer_zerop (iv->step))
3120         add_iv_candidate_for_biv (data, iv);
3121     }
3122 }
3123
3124 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3125
3126 static void
3127 record_common_cand (struct ivopts_data *data, tree base,
3128                     tree step, struct iv_use *use)
3129 {
3130   struct iv_common_cand ent;
3131   struct iv_common_cand **slot;
3132
3133   gcc_assert (use != NULL);
3134
3135   ent.base = base;
3136   ent.step = step;
3137   ent.hash = iterative_hash_expr (base, 0);
3138   ent.hash = iterative_hash_expr (step, ent.hash);
3139
3140   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3141   if (*slot == NULL)
3142     {
3143       *slot = new iv_common_cand ();
3144       (*slot)->base = base;
3145       (*slot)->step = step;
3146       (*slot)->uses.create (8);
3147       (*slot)->hash = ent.hash;
3148       data->iv_common_cands.safe_push ((*slot));
3149     }
3150   (*slot)->uses.safe_push (use);
3151   return;
3152 }
3153
3154 /* Comparison function used to sort common candidates.  */
3155
3156 static int
3157 common_cand_cmp (const void *p1, const void *p2)
3158 {
3159   unsigned n1, n2;
3160   const struct iv_common_cand *const *const ccand1
3161     = (const struct iv_common_cand *const *)p1;
3162   const struct iv_common_cand *const *const ccand2
3163     = (const struct iv_common_cand *const *)p2;
3164
3165   n1 = (*ccand1)->uses.length ();
3166   n2 = (*ccand2)->uses.length ();
3167   return n2 - n1;
3168 }
3169
3170 /* Adds IV candidates based on common candidated recorded.  */
3171
3172 static void
3173 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3174 {
3175   unsigned i, j;
3176   struct iv_cand *cand_1, *cand_2;
3177
3178   data->iv_common_cands.qsort (common_cand_cmp);
3179   for (i = 0; i < data->iv_common_cands.length (); i++)
3180     {
3181       struct iv_common_cand *ptr = data->iv_common_cands[i];
3182
3183       /* Only add IV candidate if it's derived from multiple uses.  */
3184       if (ptr->uses.length () <= 1)
3185         break;
3186
3187       cand_1 = NULL;
3188       cand_2 = NULL;
3189       if (ip_normal_pos (data->current_loop))
3190         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3191                                   false, IP_NORMAL, NULL, NULL);
3192
3193       if (ip_end_pos (data->current_loop)
3194           && allow_ip_end_pos_p (data->current_loop))
3195         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3196                                   false, IP_END, NULL, NULL);
3197
3198       /* Bind deriving uses and the new candidates.  */
3199       for (j = 0; j < ptr->uses.length (); j++)
3200         {
3201           struct iv_use *use = ptr->uses[j];
3202           if (cand_1)
3203             bitmap_set_bit (use->related_cands, cand_1->id);
3204           if (cand_2)
3205             bitmap_set_bit (use->related_cands, cand_2->id);
3206         }
3207     }
3208
3209   /* Release data since it is useless from this point.  */
3210   data->iv_common_cand_tab->empty ();
3211   data->iv_common_cands.truncate (0);
3212 }
3213
3214 /* Adds candidates based on the value of USE's iv.  */
3215
3216 static void
3217 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3218 {
3219   unsigned HOST_WIDE_INT offset;
3220   tree base;
3221   tree basetype;
3222   struct iv *iv = use->iv;
3223
3224   add_candidate (data, iv->base, iv->step, false, use);
3225
3226   /* Record common candidate for use in case it can be shared by others.  */
3227   record_common_cand (data, iv->base, iv->step, use);
3228
3229   /* Record common candidate with initial value zero.  */
3230   basetype = TREE_TYPE (iv->base);
3231   if (POINTER_TYPE_P (basetype))
3232     basetype = sizetype;
3233   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3234
3235   /* Record common candidate with constant offset stripped in base.
3236      Like the use itself, we also add candidate directly for it.  */
3237   base = strip_offset (iv->base, &offset);
3238   if (offset || base != iv->base)
3239     {
3240       record_common_cand (data, base, iv->step, use);
3241       add_candidate (data, base, iv->step, false, use);
3242     }
3243
3244   /* Record common candidate with base_object removed in base.  */
3245   if (iv->base_object != NULL)
3246     {
3247       unsigned i;
3248       aff_tree aff_base;
3249       tree step, base_object = iv->base_object;
3250
3251       base = iv->base;
3252       step = iv->step;
3253       STRIP_NOPS (base);
3254       STRIP_NOPS (step);
3255       STRIP_NOPS (base_object);
3256       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3257       for (i = 0; i < aff_base.n; i++)
3258         {
3259           if (aff_base.elts[i].coef != 1)
3260             continue;
3261
3262           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3263             break;
3264         }
3265       if (i < aff_base.n)
3266         {
3267           aff_combination_remove_elt (&aff_base, i);
3268           base = aff_combination_to_tree (&aff_base);
3269           basetype = TREE_TYPE (base);
3270           if (POINTER_TYPE_P (basetype))
3271             basetype = sizetype;
3272
3273           step = fold_convert (basetype, step);
3274           record_common_cand (data, base, step, use);
3275           /* Also record common candidate with offset stripped.  */
3276           base = strip_offset (base, &offset);
3277           if (offset)
3278             record_common_cand (data, base, step, use);
3279         }
3280     }
3281
3282   /* At last, add auto-incremental candidates.  Make such variables
3283      important since other iv uses with same base object may be based
3284      on it.  */
3285   if (use != NULL && use->type == USE_ADDRESS)
3286     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3287 }
3288
3289 /* Adds candidates based on the uses.  */
3290
3291 static void
3292 add_iv_candidate_for_uses (struct ivopts_data *data)
3293 {
3294   unsigned i;
3295
3296   for (i = 0; i < n_iv_uses (data); i++)
3297     {
3298       struct iv_use *use = iv_use (data, i);
3299
3300       if (!use)
3301         continue;
3302
3303       switch (use->type)
3304         {
3305         case USE_NONLINEAR_EXPR:
3306         case USE_COMPARE:
3307         case USE_ADDRESS:
3308           /* Just add the ivs based on the value of the iv used here.  */
3309           add_iv_candidate_for_use (data, use);
3310           break;
3311
3312         default:
3313           gcc_unreachable ();
3314         }
3315     }
3316   add_iv_candidate_derived_from_uses (data);
3317 }
3318
3319 /* Record important candidates and add them to related_cands bitmaps.  */
3320
3321 static void
3322 record_important_candidates (struct ivopts_data *data)
3323 {
3324   unsigned i;
3325   struct iv_use *use;
3326
3327   for (i = 0; i < n_iv_cands (data); i++)
3328     {
3329       struct iv_cand *cand = iv_cand (data, i);
3330
3331       if (cand->important)
3332         bitmap_set_bit (data->important_candidates, i);
3333     }
3334
3335   data->consider_all_candidates = (n_iv_cands (data)
3336                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3337
3338   /* Add important candidates to uses' related_cands bitmaps.  */
3339   for (i = 0; i < n_iv_uses (data); i++)
3340     {
3341       use = iv_use (data, i);
3342       bitmap_ior_into (use->related_cands, data->important_candidates);
3343     }
3344 }
3345
3346 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3347    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3348    we allocate a simple list to every use.  */
3349
3350 static void
3351 alloc_use_cost_map (struct ivopts_data *data)
3352 {
3353   unsigned i, size, s;
3354
3355   for (i = 0; i < n_iv_uses (data); i++)
3356     {
3357       struct iv_use *use = iv_use (data, i);
3358
3359       if (data->consider_all_candidates)
3360         size = n_iv_cands (data);
3361       else
3362         {
3363           s = bitmap_count_bits (use->related_cands);
3364
3365           /* Round up to the power of two, so that moduling by it is fast.  */
3366           size = s ? (1 << ceil_log2 (s)) : 1;
3367         }
3368
3369       use->n_map_members = size;
3370       use->cost_map = XCNEWVEC (struct cost_pair, size);
3371     }
3372 }
3373
3374 /* Returns description of computation cost of expression whose runtime
3375    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3376
3377 static comp_cost
3378 new_cost (unsigned runtime, unsigned complexity)
3379 {
3380   comp_cost cost;
3381
3382   cost.cost = runtime;
3383   cost.complexity = complexity;
3384
3385   return cost;
3386 }
3387
3388 /* Returns true if COST is infinite.  */
3389
3390 static bool
3391 infinite_cost_p (comp_cost cost)
3392 {
3393   return cost.cost == INFTY;
3394 }
3395
3396 /* Adds costs COST1 and COST2.  */
3397
3398 static comp_cost
3399 add_costs (comp_cost cost1, comp_cost cost2)
3400 {
3401   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3402     return infinite_cost;
3403
3404   cost1.cost += cost2.cost;
3405   cost1.complexity += cost2.complexity;
3406
3407   return cost1;
3408 }
3409 /* Subtracts costs COST1 and COST2.  */
3410
3411 static comp_cost
3412 sub_costs (comp_cost cost1, comp_cost cost2)
3413 {
3414   cost1.cost -= cost2.cost;
3415   cost1.complexity -= cost2.complexity;
3416
3417   return cost1;
3418 }
3419
3420 /* Returns a negative number if COST1 < COST2, a positive number if
3421    COST1 > COST2, and 0 if COST1 = COST2.  */
3422
3423 static int
3424 compare_costs (comp_cost cost1, comp_cost cost2)
3425 {
3426   if (cost1.cost == cost2.cost)
3427     return cost1.complexity - cost2.complexity;
3428
3429   return cost1.cost - cost2.cost;
3430 }
3431
3432 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3433    on invariants DEPENDS_ON and that the value used in expressing it
3434    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3435
3436 static void
3437 set_use_iv_cost (struct ivopts_data *data,
3438                  struct iv_use *use, struct iv_cand *cand,
3439                  comp_cost cost, bitmap depends_on, tree value,
3440                  enum tree_code comp, int inv_expr_id)
3441 {
3442   unsigned i, s;
3443
3444   if (infinite_cost_p (cost))
3445     {
3446       BITMAP_FREE (depends_on);
3447       return;
3448     }
3449
3450   if (data->consider_all_candidates)
3451     {
3452       use->cost_map[cand->id].cand = cand;
3453       use->cost_map[cand->id].cost = cost;
3454       use->cost_map[cand->id].depends_on = depends_on;
3455       use->cost_map[cand->id].value = value;
3456       use->cost_map[cand->id].comp = comp;
3457       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3458       return;
3459     }
3460
3461   /* n_map_members is a power of two, so this computes modulo.  */
3462   s = cand->id & (use->n_map_members - 1);
3463   for (i = s; i < use->n_map_members; i++)
3464     if (!use->cost_map[i].cand)
3465       goto found;
3466   for (i = 0; i < s; i++)
3467     if (!use->cost_map[i].cand)
3468       goto found;
3469
3470   gcc_unreachable ();
3471
3472 found:
3473   use->cost_map[i].cand = cand;
3474   use->cost_map[i].cost = cost;
3475   use->cost_map[i].depends_on = depends_on;
3476   use->cost_map[i].value = value;
3477   use->cost_map[i].comp = comp;
3478   use->cost_map[i].inv_expr_id = inv_expr_id;
3479 }
3480
3481 /* Gets cost of (USE, CANDIDATE) pair.  */
3482
3483 static struct cost_pair *
3484 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3485                  struct iv_cand *cand)
3486 {
3487   unsigned i, s;
3488   struct cost_pair *ret;
3489
3490   if (!cand)
3491     return NULL;
3492
3493   if (data->consider_all_candidates)
3494     {
3495       ret = use->cost_map + cand->id;
3496       if (!ret->cand)
3497         return NULL;
3498
3499       return ret;
3500     }
3501
3502   /* n_map_members is a power of two, so this computes modulo.  */
3503   s = cand->id & (use->n_map_members - 1);
3504   for (i = s; i < use->n_map_members; i++)
3505     if (use->cost_map[i].cand == cand)
3506       return use->cost_map + i;
3507     else if (use->cost_map[i].cand == NULL)
3508       return NULL;
3509   for (i = 0; i < s; i++)
3510     if (use->cost_map[i].cand == cand)
3511       return use->cost_map + i;
3512     else if (use->cost_map[i].cand == NULL)
3513       return NULL;
3514
3515   return NULL;
3516 }
3517
3518 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3519 static rtx
3520 produce_memory_decl_rtl (tree obj, int *regno)
3521 {
3522   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3523   machine_mode address_mode = targetm.addr_space.address_mode (as);
3524   rtx x;
3525
3526   gcc_assert (obj);
3527   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3528     {
3529       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3530       x = gen_rtx_SYMBOL_REF (address_mode, name);
3531       SET_SYMBOL_REF_DECL (x, obj);
3532       x = gen_rtx_MEM (DECL_MODE (obj), x);
3533       set_mem_addr_space (x, as);
3534       targetm.encode_section_info (obj, x, true);
3535     }
3536   else
3537     {
3538       x = gen_raw_REG (address_mode, (*regno)++);
3539       x = gen_rtx_MEM (DECL_MODE (obj), x);
3540       set_mem_addr_space (x, as);
3541     }
3542
3543   return x;
3544 }
3545
3546 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3547    walk_tree.  DATA contains the actual fake register number.  */
3548
3549 static tree
3550 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3551 {
3552   tree obj = NULL_TREE;
3553   rtx x = NULL_RTX;
3554   int *regno = (int *) data;
3555
3556   switch (TREE_CODE (*expr_p))
3557     {
3558     case ADDR_EXPR:
3559       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3560            handled_component_p (*expr_p);
3561            expr_p = &TREE_OPERAND (*expr_p, 0))
3562         continue;
3563       obj = *expr_p;
3564       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3565         x = produce_memory_decl_rtl (obj, regno);
3566       break;
3567
3568     case SSA_NAME:
3569       *ws = 0;
3570       obj = SSA_NAME_VAR (*expr_p);
3571       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3572       if (!obj)
3573         return NULL_TREE;
3574       if (!DECL_RTL_SET_P (obj))
3575         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3576       break;
3577
3578     case VAR_DECL:
3579     case PARM_DECL:
3580     case RESULT_DECL:
3581       *ws = 0;
3582       obj = *expr_p;
3583
3584       if (DECL_RTL_SET_P (obj))
3585         break;
3586
3587       if (DECL_MODE (obj) == BLKmode)
3588         x = produce_memory_decl_rtl (obj, regno);
3589       else
3590         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3591
3592       break;
3593
3594     default:
3595       break;
3596     }
3597
3598   if (x)
3599     {
3600       decl_rtl_to_reset.safe_push (obj);
3601       SET_DECL_RTL (obj, x);
3602     }
3603
3604   return NULL_TREE;
3605 }
3606
3607 /* Determines cost of the computation of EXPR.  */
3608
3609 static unsigned
3610 computation_cost (tree expr, bool speed)
3611 {
3612   rtx_insn *seq;
3613   rtx rslt;
3614   tree type = TREE_TYPE (expr);
3615   unsigned cost;
3616   /* Avoid using hard regs in ways which may be unsupported.  */
3617   int regno = LAST_VIRTUAL_REGISTER + 1;
3618   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3619   enum node_frequency real_frequency = node->frequency;
3620
3621   node->frequency = NODE_FREQUENCY_NORMAL;
3622   crtl->maybe_hot_insn_p = speed;
3623   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3624   start_sequence ();
3625   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3626   seq = get_insns ();
3627   end_sequence ();
3628   default_rtl_profile ();
3629   node->frequency = real_frequency;
3630
3631   cost = seq_cost (seq, speed);
3632   if (MEM_P (rslt))
3633     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3634                           TYPE_ADDR_SPACE (type), speed);
3635   else if (!REG_P (rslt))
3636     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3637
3638   return cost;
3639 }
3640
3641 /* Returns variable containing the value of candidate CAND at statement AT.  */
3642
3643 static tree
3644 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3645 {
3646   if (stmt_after_increment (loop, cand, stmt))
3647     return cand->var_after;
3648   else
3649     return cand->var_before;
3650 }
3651
3652 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3653    same precision that is at least as wide as the precision of TYPE, stores
3654    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3655    type of A and B.  */
3656
3657 static tree
3658 determine_common_wider_type (tree *a, tree *b)
3659 {
3660   tree wider_type = NULL;
3661   tree suba, subb;
3662   tree atype = TREE_TYPE (*a);
3663
3664   if (CONVERT_EXPR_P (*a))
3665     {
3666       suba = TREE_OPERAND (*a, 0);
3667       wider_type = TREE_TYPE (suba);
3668       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3669         return atype;
3670     }
3671   else
3672     return atype;
3673
3674   if (CONVERT_EXPR_P (*b))
3675     {
3676       subb = TREE_OPERAND (*b, 0);
3677       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3678         return atype;
3679     }
3680   else
3681     return atype;
3682
3683   *a = suba;
3684   *b = subb;
3685   return wider_type;
3686 }
3687
3688 /* Determines the expression by that USE is expressed from induction variable
3689    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3690    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3691
3692 static bool
3693 get_computation_aff (struct loop *loop,
3694                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3695                      struct aff_tree *aff)
3696 {
3697   tree ubase = use->iv->base;
3698   tree ustep = use->iv->step;
3699   tree cbase = cand->iv->base;
3700   tree cstep = cand->iv->step, cstep_common;
3701   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3702   tree common_type, var;
3703   tree uutype;
3704   aff_tree cbase_aff, var_aff;
3705   widest_int rat;
3706
3707   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3708     {
3709       /* We do not have a precision to express the values of use.  */
3710       return false;
3711     }
3712
3713   var = var_at_stmt (loop, cand, at);
3714   uutype = unsigned_type_for (utype);
3715
3716   /* If the conversion is not noop, perform it.  */
3717   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3718     {
3719       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3720           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3721         {
3722           tree inner_base, inner_step, inner_type;
3723           inner_base = TREE_OPERAND (cbase, 0);
3724           if (CONVERT_EXPR_P (cstep))
3725             inner_step = TREE_OPERAND (cstep, 0);
3726           else
3727             inner_step = cstep;
3728
3729           inner_type = TREE_TYPE (inner_base);
3730           /* If candidate is added from a biv whose type is smaller than
3731              ctype, we know both candidate and the biv won't overflow.
3732              In this case, it's safe to skip the convertion in candidate.
3733              As an example, (unsigned short)((unsigned long)A) equals to
3734              (unsigned short)A, if A has a type no larger than short.  */
3735           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3736             {
3737               cbase = inner_base;
3738               cstep = inner_step;
3739             }
3740         }
3741       cstep = fold_convert (uutype, cstep);
3742       cbase = fold_convert (uutype, cbase);
3743       var = fold_convert (uutype, var);
3744     }
3745
3746   /* Ratio is 1 when computing the value of biv cand by itself.
3747      We can't rely on constant_multiple_of in this case because the
3748      use is created after the original biv is selected.  The call
3749      could fail because of inconsistent fold behavior.  See PR68021
3750      for more information.  */
3751   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3752     {
3753       gcc_assert (is_gimple_assign (use->stmt));
3754       gcc_assert (use->iv->ssa_name == cand->var_after);
3755       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3756       rat = 1;
3757     }
3758   else if (!constant_multiple_of (ustep, cstep, &rat))
3759     return false;
3760
3761   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3762      type, we achieve better folding by computing their difference in this
3763      wider type, and cast the result to UUTYPE.  We do not need to worry about
3764      overflows, as all the arithmetics will in the end be performed in UUTYPE
3765      anyway.  */
3766   common_type = determine_common_wider_type (&ubase, &cbase);
3767
3768   /* use = ubase - ratio * cbase + ratio * var.  */
3769   tree_to_aff_combination (ubase, common_type, aff);
3770   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3771   tree_to_aff_combination (var, uutype, &var_aff);
3772
3773   /* We need to shift the value if we are after the increment.  */
3774   if (stmt_after_increment (loop, cand, at))
3775     {
3776       aff_tree cstep_aff;
3777
3778       if (common_type != uutype)
3779         cstep_common = fold_convert (common_type, cstep);
3780       else
3781         cstep_common = cstep;
3782
3783       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3784       aff_combination_add (&cbase_aff, &cstep_aff);
3785     }
3786
3787   aff_combination_scale (&cbase_aff, -rat);
3788   aff_combination_add (aff, &cbase_aff);
3789   if (common_type != uutype)
3790     aff_combination_convert (aff, uutype);
3791
3792   aff_combination_scale (&var_aff, rat);
3793   aff_combination_add (aff, &var_aff);
3794
3795   return true;
3796 }
3797
3798 /* Return the type of USE.  */
3799
3800 static tree
3801 get_use_type (struct iv_use *use)
3802 {
3803   tree base_type = TREE_TYPE (use->iv->base);
3804   tree type;
3805
3806   if (use->type == USE_ADDRESS)
3807     {
3808       /* The base_type may be a void pointer.  Create a pointer type based on
3809          the mem_ref instead.  */
3810       type = build_pointer_type (TREE_TYPE (*use->op_p));
3811       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3812                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3813     }
3814   else
3815     type = base_type;
3816
3817   return type;
3818 }
3819
3820 /* Determines the expression by that USE is expressed from induction variable
3821    CAND at statement AT in LOOP.  The computation is unshared.  */
3822
3823 static tree
3824 get_computation_at (struct loop *loop,
3825                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3826 {
3827   aff_tree aff;
3828   tree type = get_use_type (use);
3829
3830   if (!get_computation_aff (loop, use, cand, at, &aff))
3831     return NULL_TREE;
3832   unshare_aff_combination (&aff);
3833   return fold_convert (type, aff_combination_to_tree (&aff));
3834 }
3835
3836 /* Determines the expression by that USE is expressed from induction variable
3837    CAND in LOOP.  The computation is unshared.  */
3838
3839 static tree
3840 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3841 {
3842   return get_computation_at (loop, use, cand, use->stmt);
3843 }
3844
3845 /* Adjust the cost COST for being in loop setup rather than loop body.
3846    If we're optimizing for space, the loop setup overhead is constant;
3847    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3848 static unsigned
3849 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3850 {
3851   if (cost == INFTY)
3852     return cost;
3853   else if (optimize_loop_for_speed_p (data->current_loop))
3854     return cost / avg_loop_niter (data->current_loop);
3855   else
3856     return cost;
3857 }
3858
3859 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3860    validity for a memory reference accessing memory of mode MODE in
3861    address space AS.  */
3862
3863
3864 bool
3865 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3866                                  addr_space_t as)
3867 {
3868 #define MAX_RATIO 128
3869   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3870   static vec<sbitmap> valid_mult_list;
3871   sbitmap valid_mult;
3872
3873   if (data_index >= valid_mult_list.length ())
3874     valid_mult_list.safe_grow_cleared (data_index + 1);
3875
3876   valid_mult = valid_mult_list[data_index];
3877   if (!valid_mult)
3878     {
3879       machine_mode address_mode = targetm.addr_space.address_mode (as);
3880       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3881       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3882       rtx addr, scaled;
3883       HOST_WIDE_INT i;
3884
3885       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3886       bitmap_clear (valid_mult);
3887       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3888       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3889       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3890         {
3891           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3892           if (memory_address_addr_space_p (mode, addr, as)
3893               || memory_address_addr_space_p (mode, scaled, as))
3894             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3895         }
3896
3897       if (dump_file && (dump_flags & TDF_DETAILS))
3898         {
3899           fprintf (dump_file, "  allowed multipliers:");
3900           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3901             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3902               fprintf (dump_file, " %d", (int) i);
3903           fprintf (dump_file, "\n");
3904           fprintf (dump_file, "\n");
3905         }
3906
3907       valid_mult_list[data_index] = valid_mult;
3908     }
3909
3910   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3911     return false;
3912
3913   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3914 }
3915
3916 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3917    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3918    variable is omitted.  Compute the cost for a memory reference that accesses
3919    a memory location of mode MEM_MODE in address space AS.
3920
3921    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3922    size of MEM_MODE / RATIO) is available.  To make this determination, we
3923    look at the size of the increment to be made, which is given in CSTEP.
3924    CSTEP may be zero if the step is unknown.
3925    STMT_AFTER_INC is true iff the statement we're looking at is after the
3926    increment of the original biv.
3927
3928    TODO -- there must be some better way.  This all is quite crude.  */
3929
3930 enum ainc_type
3931 {
3932   AINC_PRE_INC,         /* Pre increment.  */
3933   AINC_PRE_DEC,         /* Pre decrement.  */
3934   AINC_POST_INC,        /* Post increment.  */
3935   AINC_POST_DEC,        /* Post decrement.  */
3936   AINC_NONE             /* Also the number of auto increment types.  */
3937 };
3938
3939 struct address_cost_data
3940 {
3941   HOST_WIDE_INT min_offset, max_offset;
3942   unsigned costs[2][2][2][2];
3943   unsigned ainc_costs[AINC_NONE];
3944 };
3945
3946
3947 static comp_cost
3948 get_address_cost (bool symbol_present, bool var_present,
3949                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3950                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3951                   addr_space_t as, bool speed,
3952                   bool stmt_after_inc, bool *may_autoinc)
3953 {
3954   machine_mode address_mode = targetm.addr_space.address_mode (as);
3955   static vec<address_cost_data *> address_cost_data_list;
3956   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3957   address_cost_data *data;
3958   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3959   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3960   unsigned cost, acost, complexity;
3961   enum ainc_type autoinc_type;
3962   bool offset_p, ratio_p, autoinc;
3963   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3964   unsigned HOST_WIDE_INT mask;
3965   unsigned bits;
3966
3967   if (data_index >= address_cost_data_list.length ())
3968     address_cost_data_list.safe_grow_cleared (data_index + 1);
3969
3970   data = address_cost_data_list[data_index];
3971   if (!data)
3972     {
3973       HOST_WIDE_INT i;
3974       HOST_WIDE_INT rat, off = 0;
3975       int old_cse_not_expected, width;
3976       unsigned sym_p, var_p, off_p, rat_p, add_c;
3977       rtx_insn *seq;
3978       rtx addr, base;
3979       rtx reg0, reg1;
3980
3981       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3982
3983       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3984
3985       width = GET_MODE_BITSIZE (address_mode) - 1;
3986       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3987         width = HOST_BITS_PER_WIDE_INT - 1;
3988       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3989
3990       for (i = width; i >= 0; i--)
3991         {
3992           off = -((unsigned HOST_WIDE_INT) 1 << i);
3993           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3994           if (memory_address_addr_space_p (mem_mode, addr, as))
3995             break;
3996         }
3997       data->min_offset = (i == -1? 0 : off);
3998
3999       for (i = width; i >= 0; i--)
4000         {
4001           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4002           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4003           if (memory_address_addr_space_p (mem_mode, addr, as))
4004             break;
4005           /* For some strict-alignment targets, the offset must be naturally
4006              aligned.  Try an aligned offset if mem_mode is not QImode.  */
4007           off = mem_mode != QImode
4008                 ? ((unsigned HOST_WIDE_INT) 1 << i)
4009                     - GET_MODE_SIZE (mem_mode)
4010                 : 0;
4011           if (off > 0)
4012             {
4013               XEXP (addr, 1) = gen_int_mode (off, address_mode);
4014               if (memory_address_addr_space_p (mem_mode, addr, as))
4015                 break;
4016             }
4017         }
4018       if (i == -1)
4019         off = 0;
4020       data->max_offset = off;
4021
4022       if (dump_file && (dump_flags & TDF_DETAILS))
4023         {
4024           fprintf (dump_file, "get_address_cost:\n");
4025           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4026                    GET_MODE_NAME (mem_mode),
4027                    data->min_offset);
4028           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4029                    GET_MODE_NAME (mem_mode),
4030                    data->max_offset);
4031         }
4032
4033       rat = 1;
4034       for (i = 2; i <= MAX_RATIO; i++)
4035         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4036           {
4037             rat = i;
4038             break;
4039           }
4040
4041       /* Compute the cost of various addressing modes.  */
4042       acost = 0;
4043       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4044       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4045
4046       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4047           || USE_STORE_PRE_DECREMENT (mem_mode))
4048         {
4049           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4050           has_predec[mem_mode]
4051             = memory_address_addr_space_p (mem_mode, addr, as);
4052
4053           if (has_predec[mem_mode])
4054             data->ainc_costs[AINC_PRE_DEC]
4055               = address_cost (addr, mem_mode, as, speed);
4056         }
4057       if (USE_LOAD_POST_DECREMENT (mem_mode)
4058           || USE_STORE_POST_DECREMENT (mem_mode))
4059         {
4060           addr = gen_rtx_POST_DEC (address_mode, reg0);
4061           has_postdec[mem_mode]
4062             = memory_address_addr_space_p (mem_mode, addr, as);
4063
4064           if (has_postdec[mem_mode])
4065             data->ainc_costs[AINC_POST_DEC]
4066               = address_cost (addr, mem_mode, as, speed);
4067         }
4068       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4069           || USE_STORE_PRE_DECREMENT (mem_mode))
4070         {
4071           addr = gen_rtx_PRE_INC (address_mode, reg0);
4072           has_preinc[mem_mode]
4073             = memory_address_addr_space_p (mem_mode, addr, as);
4074
4075           if (has_preinc[mem_mode])
4076             data->ainc_costs[AINC_PRE_INC]
4077               = address_cost (addr, mem_mode, as, speed);
4078         }
4079       if (USE_LOAD_POST_INCREMENT (mem_mode)
4080           || USE_STORE_POST_INCREMENT (mem_mode))
4081         {
4082           addr = gen_rtx_POST_INC (address_mode, reg0);
4083           has_postinc[mem_mode]
4084             = memory_address_addr_space_p (mem_mode, addr, as);
4085
4086           if (has_postinc[mem_mode])
4087             data->ainc_costs[AINC_POST_INC]
4088               = address_cost (addr, mem_mode, as, speed);
4089         }
4090       for (i = 0; i < 16; i++)
4091         {
4092           sym_p = i & 1;
4093           var_p = (i >> 1) & 1;
4094           off_p = (i >> 2) & 1;
4095           rat_p = (i >> 3) & 1;
4096
4097           addr = reg0;
4098           if (rat_p)
4099             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4100                                    gen_int_mode (rat, address_mode));
4101
4102           if (var_p)
4103             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4104
4105           if (sym_p)
4106             {
4107               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4108               /* ??? We can run into trouble with some backends by presenting
4109                  it with symbols which haven't been properly passed through
4110                  targetm.encode_section_info.  By setting the local bit, we
4111                  enhance the probability of things working.  */
4112               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4113
4114               if (off_p)
4115                 base = gen_rtx_fmt_e (CONST, address_mode,
4116                                       gen_rtx_fmt_ee
4117                                         (PLUS, address_mode, base,
4118                                          gen_int_mode (off, address_mode)));
4119             }
4120           else if (off_p)
4121             base = gen_int_mode (off, address_mode);
4122           else
4123             base = NULL_RTX;
4124
4125           if (base)
4126             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4127
4128           start_sequence ();
4129           /* To avoid splitting addressing modes, pretend that no cse will
4130              follow.  */
4131           old_cse_not_expected = cse_not_expected;
4132           cse_not_expected = true;
4133           addr = memory_address_addr_space (mem_mode, addr, as);
4134           cse_not_expected = old_cse_not_expected;
4135           seq = get_insns ();
4136           end_sequence ();
4137
4138           acost = seq_cost (seq, speed);
4139           acost += address_cost (addr, mem_mode, as, speed);
4140
4141           if (!acost)
4142             acost = 1;
4143           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4144         }
4145
4146       /* On some targets, it is quite expensive to load symbol to a register,
4147          which makes addresses that contain symbols look much more expensive.
4148          However, the symbol will have to be loaded in any case before the
4149          loop (and quite likely we have it in register already), so it does not
4150          make much sense to penalize them too heavily.  So make some final
4151          tweaks for the SYMBOL_PRESENT modes:
4152
4153          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4154          var is cheaper, use this mode with small penalty.
4155          If VAR_PRESENT is true, try whether the mode with
4156          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4157          if this is the case, use it.  */
4158       add_c = add_cost (speed, address_mode);
4159       for (i = 0; i < 8; i++)
4160         {
4161           var_p = i & 1;
4162           off_p = (i >> 1) & 1;
4163           rat_p = (i >> 2) & 1;
4164
4165           acost = data->costs[0][1][off_p][rat_p] + 1;
4166           if (var_p)
4167             acost += add_c;
4168
4169           if (acost < data->costs[1][var_p][off_p][rat_p])
4170             data->costs[1][var_p][off_p][rat_p] = acost;
4171         }
4172
4173       if (dump_file && (dump_flags & TDF_DETAILS))
4174         {
4175           fprintf (dump_file, "Address costs:\n");
4176
4177           for (i = 0; i < 16; i++)
4178             {
4179               sym_p = i & 1;
4180               var_p = (i >> 1) & 1;
4181               off_p = (i >> 2) & 1;
4182               rat_p = (i >> 3) & 1;
4183
4184               fprintf (dump_file, "  ");
4185               if (sym_p)
4186                 fprintf (dump_file, "sym + ");
4187               if (var_p)
4188                 fprintf (dump_file, "var + ");
4189               if (off_p)
4190                 fprintf (dump_file, "cst + ");
4191               if (rat_p)
4192                 fprintf (dump_file, "rat * ");
4193
4194               acost = data->costs[sym_p][var_p][off_p][rat_p];
4195               fprintf (dump_file, "index costs %d\n", acost);
4196             }
4197           if (has_predec[mem_mode] || has_postdec[mem_mode]
4198               || has_preinc[mem_mode] || has_postinc[mem_mode])
4199             fprintf (dump_file, "  May include autoinc/dec\n");
4200           fprintf (dump_file, "\n");
4201         }
4202
4203       address_cost_data_list[data_index] = data;
4204     }
4205
4206   bits = GET_MODE_BITSIZE (address_mode);
4207   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4208   offset &= mask;
4209   if ((offset >> (bits - 1) & 1))
4210     offset |= ~mask;
4211   s_offset = offset;
4212
4213   autoinc = false;
4214   autoinc_type = AINC_NONE;
4215   msize = GET_MODE_SIZE (mem_mode);
4216   autoinc_offset = offset;
4217   if (stmt_after_inc)
4218     autoinc_offset += ratio * cstep;
4219   if (symbol_present || var_present || ratio != 1)
4220     autoinc = false;
4221   else
4222     {
4223       if (has_postinc[mem_mode] && autoinc_offset == 0
4224           && msize == cstep)
4225         autoinc_type = AINC_POST_INC;
4226       else if (has_postdec[mem_mode] && autoinc_offset == 0
4227                && msize == -cstep)
4228         autoinc_type = AINC_POST_DEC;
4229       else if (has_preinc[mem_mode] && autoinc_offset == msize
4230                && msize == cstep)
4231         autoinc_type = AINC_PRE_INC;
4232       else if (has_predec[mem_mode] && autoinc_offset == -msize
4233                && msize == -cstep)
4234         autoinc_type = AINC_PRE_DEC;
4235
4236       if (autoinc_type != AINC_NONE)
4237         autoinc = true;
4238     }
4239
4240   cost = 0;
4241   offset_p = (s_offset != 0
4242               && data->min_offset <= s_offset
4243               && s_offset <= data->max_offset);
4244   ratio_p = (ratio != 1
4245              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4246
4247   if (ratio != 1 && !ratio_p)
4248     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4249
4250   if (s_offset && !offset_p && !symbol_present)
4251     cost += add_cost (speed, address_mode);
4252
4253   if (may_autoinc)
4254     *may_autoinc = autoinc;
4255   if (autoinc)
4256     acost = data->ainc_costs[autoinc_type];
4257   else
4258     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4259   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4260   return new_cost (cost + acost, complexity);
4261 }
4262
4263  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4264     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4265     calculating the operands of EXPR.  Returns true if successful, and returns
4266     the cost in COST.  */
4267
4268 static bool
4269 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4270                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4271 {
4272   comp_cost res;
4273   tree op1 = TREE_OPERAND (expr, 1);
4274   tree cst = TREE_OPERAND (mult, 1);
4275   tree multop = TREE_OPERAND (mult, 0);
4276   int m = exact_log2 (int_cst_value (cst));
4277   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4278   int as_cost, sa_cost;
4279   bool mult_in_op1;
4280
4281   if (!(m >= 0 && m < maxm))
4282     return false;
4283
4284   STRIP_NOPS (op1);
4285   mult_in_op1 = operand_equal_p (op1, mult, 0);
4286
4287   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4288
4289   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4290      use that in preference to a shift insn followed by an add insn.  */
4291   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4292              ? shiftadd_cost (speed, mode, m)
4293              : (mult_in_op1
4294                 ? shiftsub1_cost (speed, mode, m)
4295                 : shiftsub0_cost (speed, mode, m)));
4296
4297   res = new_cost (MIN (as_cost, sa_cost), 0);
4298   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4299
4300   STRIP_NOPS (multop);
4301   if (!is_gimple_val (multop))
4302     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4303
4304   *cost = res;
4305   return true;
4306 }
4307
4308 /* Estimates cost of forcing expression EXPR into a variable.  */
4309
4310 static comp_cost
4311 force_expr_to_var_cost (tree expr, bool speed)
4312 {
4313   static bool costs_initialized = false;
4314   static unsigned integer_cost [2];
4315   static unsigned symbol_cost [2];
4316   static unsigned address_cost [2];
4317   tree op0, op1;
4318   comp_cost cost0, cost1, cost;
4319   machine_mode mode;
4320
4321   if (!costs_initialized)
4322     {
4323       tree type = build_pointer_type (integer_type_node);
4324       tree var, addr;
4325       rtx x;
4326       int i;
4327
4328       var = create_tmp_var_raw (integer_type_node, "test_var");
4329       TREE_STATIC (var) = 1;
4330       x = produce_memory_decl_rtl (var, NULL);
4331       SET_DECL_RTL (var, x);
4332
4333       addr = build1 (ADDR_EXPR, type, var);
4334
4335
4336       for (i = 0; i < 2; i++)
4337         {
4338           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4339                                                              2000), i);
4340
4341           symbol_cost[i] = computation_cost (addr, i) + 1;
4342
4343           address_cost[i]
4344             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4345           if (dump_file && (dump_flags & TDF_DETAILS))
4346             {
4347               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4348               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4349               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4350               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4351               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4352               fprintf (dump_file, "\n");
4353             }
4354         }
4355
4356       costs_initialized = true;
4357     }
4358
4359   STRIP_NOPS (expr);
4360
4361   if (SSA_VAR_P (expr))
4362     return no_cost;
4363
4364   if (is_gimple_min_invariant (expr))
4365     {
4366       if (TREE_CODE (expr) == INTEGER_CST)
4367         return new_cost (integer_cost [speed], 0);
4368
4369       if (TREE_CODE (expr) == ADDR_EXPR)
4370         {
4371           tree obj = TREE_OPERAND (expr, 0);
4372
4373           if (TREE_CODE (obj) == VAR_DECL
4374               || TREE_CODE (obj) == PARM_DECL
4375               || TREE_CODE (obj) == RESULT_DECL)
4376             return new_cost (symbol_cost [speed], 0);
4377         }
4378
4379       return new_cost (address_cost [speed], 0);
4380     }
4381
4382   switch (TREE_CODE (expr))
4383     {
4384     case POINTER_PLUS_EXPR:
4385     case PLUS_EXPR:
4386     case MINUS_EXPR:
4387     case MULT_EXPR:
4388       op0 = TREE_OPERAND (expr, 0);
4389       op1 = TREE_OPERAND (expr, 1);
4390       STRIP_NOPS (op0);
4391       STRIP_NOPS (op1);
4392       break;
4393
4394     CASE_CONVERT:
4395     case NEGATE_EXPR:
4396       op0 = TREE_OPERAND (expr, 0);
4397       STRIP_NOPS (op0);
4398       op1 = NULL_TREE;
4399       break;
4400
4401     default:
4402       /* Just an arbitrary value, FIXME.  */
4403       return new_cost (target_spill_cost[speed], 0);
4404     }
4405
4406   if (op0 == NULL_TREE
4407       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4408     cost0 = no_cost;
4409   else
4410     cost0 = force_expr_to_var_cost (op0, speed);
4411
4412   if (op1 == NULL_TREE
4413       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4414     cost1 = no_cost;
4415   else
4416     cost1 = force_expr_to_var_cost (op1, speed);
4417
4418   mode = TYPE_MODE (TREE_TYPE (expr));
4419   switch (TREE_CODE (expr))
4420     {
4421     case POINTER_PLUS_EXPR:
4422     case PLUS_EXPR:
4423     case MINUS_EXPR:
4424     case NEGATE_EXPR:
4425       cost = new_cost (add_cost (speed, mode), 0);
4426       if (TREE_CODE (expr) != NEGATE_EXPR)
4427         {
4428           tree mult = NULL_TREE;
4429           comp_cost sa_cost;
4430           if (TREE_CODE (op1) == MULT_EXPR)
4431             mult = op1;
4432           else if (TREE_CODE (op0) == MULT_EXPR)
4433             mult = op0;
4434
4435           if (mult != NULL_TREE
4436               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4437               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4438                                     speed, &sa_cost))
4439             return sa_cost;
4440         }
4441       break;
4442
4443     CASE_CONVERT:
4444       {
4445         tree inner_mode, outer_mode;
4446         outer_mode = TREE_TYPE (expr);
4447         inner_mode = TREE_TYPE (op0);
4448         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4449                                        TYPE_MODE (inner_mode), speed), 0);
4450       }
4451       break;
4452
4453     case MULT_EXPR:
4454       if (cst_and_fits_in_hwi (op0))
4455         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4456                                              mode, speed), 0);
4457       else if (cst_and_fits_in_hwi (op1))
4458         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4459                                              mode, speed), 0);
4460       else
4461         return new_cost (target_spill_cost [speed], 0);
4462       break;
4463
4464     default:
4465       gcc_unreachable ();
4466     }
4467
4468   cost = add_costs (cost, cost0);
4469   cost = add_costs (cost, cost1);
4470
4471   /* Bound the cost by target_spill_cost.  The parts of complicated
4472      computations often are either loop invariant or at least can
4473      be shared between several iv uses, so letting this grow without
4474      limits would not give reasonable results.  */
4475   if (cost.cost > (int) target_spill_cost [speed])
4476     cost.cost = target_spill_cost [speed];
4477
4478   return cost;
4479 }
4480
4481 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4482    invariants the computation depends on.  */
4483
4484 static comp_cost
4485 force_var_cost (struct ivopts_data *data,
4486                 tree expr, bitmap *depends_on)
4487 {
4488   if (depends_on)
4489     {
4490       fd_ivopts_data = data;
4491       walk_tree (&expr, find_depends, depends_on, NULL);
4492     }
4493
4494   return force_expr_to_var_cost (expr, data->speed);
4495 }
4496
4497 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4498    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4499    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4500    invariants the computation depends on.  */
4501
4502 static comp_cost
4503 split_address_cost (struct ivopts_data *data,
4504                     tree addr, bool *symbol_present, bool *var_present,
4505                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4506 {
4507   tree core;
4508   HOST_WIDE_INT bitsize;
4509   HOST_WIDE_INT bitpos;
4510   tree toffset;
4511   machine_mode mode;
4512   int unsignedp, reversep, volatilep;
4513
4514   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4515                               &unsignedp, &reversep, &volatilep, false);
4516
4517   if (toffset != 0
4518       || bitpos % BITS_PER_UNIT != 0
4519       || reversep
4520       || TREE_CODE (core) != VAR_DECL)
4521     {
4522       *symbol_present = false;
4523       *var_present = true;
4524       fd_ivopts_data = data;
4525       if (depends_on)
4526         walk_tree (&addr, find_depends, depends_on, NULL);
4527
4528       return new_cost (target_spill_cost[data->speed], 0);
4529     }
4530
4531   *offset += bitpos / BITS_PER_UNIT;
4532   if (TREE_STATIC (core)
4533       || DECL_EXTERNAL (core))
4534     {
4535       *symbol_present = true;
4536       *var_present = false;
4537       return no_cost;
4538     }
4539
4540   *symbol_present = false;
4541   *var_present = true;
4542   return no_cost;
4543 }
4544
4545 /* Estimates cost of expressing difference of addresses E1 - E2 as
4546    var + symbol + offset.  The value of offset is added to OFFSET,
4547    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4548    part is missing.  DEPENDS_ON is a set of the invariants the computation
4549    depends on.  */
4550
4551 static comp_cost
4552 ptr_difference_cost (struct ivopts_data *data,
4553                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4554                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4555 {
4556   HOST_WIDE_INT diff = 0;
4557   aff_tree aff_e1, aff_e2;
4558   tree type;
4559
4560   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4561
4562   if (ptr_difference_const (e1, e2, &diff))
4563     {
4564       *offset += diff;
4565       *symbol_present = false;
4566       *var_present = false;
4567       return no_cost;
4568     }
4569
4570   if (integer_zerop (e2))
4571     return split_address_cost (data, TREE_OPERAND (e1, 0),
4572                                symbol_present, var_present, offset, depends_on);
4573
4574   *symbol_present = false;
4575   *var_present = true;
4576
4577   type = signed_type_for (TREE_TYPE (e1));
4578   tree_to_aff_combination (e1, type, &aff_e1);
4579   tree_to_aff_combination (e2, type, &aff_e2);
4580   aff_combination_scale (&aff_e2, -1);
4581   aff_combination_add (&aff_e1, &aff_e2);
4582
4583   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4584 }
4585
4586 /* Estimates cost of expressing difference E1 - E2 as
4587    var + symbol + offset.  The value of offset is added to OFFSET,
4588    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4589    part is missing.  DEPENDS_ON is a set of the invariants the computation
4590    depends on.  */
4591
4592 static comp_cost
4593 difference_cost (struct ivopts_data *data,
4594                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4595                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4596 {
4597   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4598   unsigned HOST_WIDE_INT off1, off2;
4599   aff_tree aff_e1, aff_e2;
4600   tree type;
4601
4602   e1 = strip_offset (e1, &off1);
4603   e2 = strip_offset (e2, &off2);
4604   *offset += off1 - off2;
4605
4606   STRIP_NOPS (e1);
4607   STRIP_NOPS (e2);
4608
4609   if (TREE_CODE (e1) == ADDR_EXPR)
4610     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4611                                 offset, depends_on);
4612   *symbol_present = false;
4613
4614   if (operand_equal_p (e1, e2, 0))
4615     {
4616       *var_present = false;
4617       return no_cost;
4618     }
4619
4620   *var_present = true;
4621
4622   if (integer_zerop (e2))
4623     return force_var_cost (data, e1, depends_on);
4624
4625   if (integer_zerop (e1))
4626     {
4627       comp_cost cost = force_var_cost (data, e2, depends_on);
4628       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4629       return cost;
4630     }
4631
4632   type = signed_type_for (TREE_TYPE (e1));
4633   tree_to_aff_combination (e1, type, &aff_e1);
4634   tree_to_aff_combination (e2, type, &aff_e2);
4635   aff_combination_scale (&aff_e2, -1);
4636   aff_combination_add (&aff_e1, &aff_e2);
4637
4638   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4639 }
4640
4641 /* Returns true if AFF1 and AFF2 are identical.  */
4642
4643 static bool
4644 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4645 {
4646   unsigned i;
4647
4648   if (aff1->n != aff2->n)
4649     return false;
4650
4651   for (i = 0; i < aff1->n; i++)
4652     {
4653       if (aff1->elts[i].coef != aff2->elts[i].coef)
4654         return false;
4655
4656       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4657         return false;
4658     }
4659   return true;
4660 }
4661
4662 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4663
4664 static int
4665 get_expr_id (struct ivopts_data *data, tree expr)
4666 {
4667   struct iv_inv_expr_ent ent;
4668   struct iv_inv_expr_ent **slot;
4669
4670   ent.expr = expr;
4671   ent.hash = iterative_hash_expr (expr, 0);
4672   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4673   if (*slot)
4674     return (*slot)->id;
4675
4676   *slot = XNEW (struct iv_inv_expr_ent);
4677   (*slot)->expr = expr;
4678   (*slot)->hash = ent.hash;
4679   (*slot)->id = data->inv_expr_id++;
4680   return (*slot)->id;
4681 }
4682
4683 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4684    requires a new compiler generated temporary.  Returns -1 otherwise.
4685    ADDRESS_P is a flag indicating if the expression is for address
4686    computation.  */
4687
4688 static int
4689 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4690                             tree cbase, HOST_WIDE_INT ratio,
4691                             bool address_p)
4692 {
4693   aff_tree ubase_aff, cbase_aff;
4694   tree expr, ub, cb;
4695
4696   STRIP_NOPS (ubase);
4697   STRIP_NOPS (cbase);
4698   ub = ubase;
4699   cb = cbase;
4700
4701   if ((TREE_CODE (ubase) == INTEGER_CST)
4702       && (TREE_CODE (cbase) == INTEGER_CST))
4703     return -1;
4704
4705   /* Strips the constant part. */
4706   if (TREE_CODE (ubase) == PLUS_EXPR
4707       || TREE_CODE (ubase) == MINUS_EXPR
4708       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4709     {
4710       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4711         ubase = TREE_OPERAND (ubase, 0);
4712     }
4713
4714   /* Strips the constant part. */
4715   if (TREE_CODE (cbase) == PLUS_EXPR
4716       || TREE_CODE (cbase) == MINUS_EXPR
4717       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4718     {
4719       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4720         cbase = TREE_OPERAND (cbase, 0);
4721     }
4722
4723   if (address_p)
4724     {
4725       if (((TREE_CODE (ubase) == SSA_NAME)
4726            || (TREE_CODE (ubase) == ADDR_EXPR
4727                && is_gimple_min_invariant (ubase)))
4728           && (TREE_CODE (cbase) == INTEGER_CST))
4729         return -1;
4730
4731       if (((TREE_CODE (cbase) == SSA_NAME)
4732            || (TREE_CODE (cbase) == ADDR_EXPR
4733                && is_gimple_min_invariant (cbase)))
4734           && (TREE_CODE (ubase) == INTEGER_CST))
4735         return -1;
4736     }
4737
4738   if (ratio == 1)
4739     {
4740       if (operand_equal_p (ubase, cbase, 0))
4741         return -1;
4742
4743       if (TREE_CODE (ubase) == ADDR_EXPR
4744           && TREE_CODE (cbase) == ADDR_EXPR)
4745         {
4746           tree usym, csym;
4747
4748           usym = TREE_OPERAND (ubase, 0);
4749           csym = TREE_OPERAND (cbase, 0);
4750           if (TREE_CODE (usym) == ARRAY_REF)
4751             {
4752               tree ind = TREE_OPERAND (usym, 1);
4753               if (TREE_CODE (ind) == INTEGER_CST
4754                   && tree_fits_shwi_p (ind)
4755                   && tree_to_shwi (ind) == 0)
4756                 usym = TREE_OPERAND (usym, 0);
4757             }
4758           if (TREE_CODE (csym) == ARRAY_REF)
4759             {
4760               tree ind = TREE_OPERAND (csym, 1);
4761               if (TREE_CODE (ind) == INTEGER_CST
4762                   && tree_fits_shwi_p (ind)
4763                   && tree_to_shwi (ind) == 0)
4764                 csym = TREE_OPERAND (csym, 0);
4765             }
4766           if (operand_equal_p (usym, csym, 0))
4767             return -1;
4768         }
4769       /* Now do more complex comparison  */
4770       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4771       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4772       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4773         return -1;
4774     }
4775
4776   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4777   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4778
4779   aff_combination_scale (&cbase_aff, -1 * ratio);
4780   aff_combination_add (&ubase_aff, &cbase_aff);
4781   expr = aff_combination_to_tree (&ubase_aff);
4782   return get_expr_id (data, expr);
4783 }
4784
4785
4786
4787 /* Determines the cost of the computation by that USE is expressed
4788    from induction variable CAND.  If ADDRESS_P is true, we just need
4789    to create an address from it, otherwise we want to get it into
4790    register.  A set of invariants we depend on is stored in
4791    DEPENDS_ON.  AT is the statement at that the value is computed.
4792    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4793    addressing is likely.  */
4794
4795 static comp_cost
4796 get_computation_cost_at (struct ivopts_data *data,
4797                          struct iv_use *use, struct iv_cand *cand,
4798                          bool address_p, bitmap *depends_on, gimple *at,
4799                          bool *can_autoinc,
4800                          int *inv_expr_id)
4801 {
4802   tree ubase = use->iv->base, ustep = use->iv->step;
4803   tree cbase, cstep;
4804   tree utype = TREE_TYPE (ubase), ctype;
4805   unsigned HOST_WIDE_INT cstepi, offset = 0;
4806   HOST_WIDE_INT ratio, aratio;
4807   bool var_present, symbol_present, stmt_is_after_inc;
4808   comp_cost cost;
4809   widest_int rat;
4810   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4811   machine_mode mem_mode = (address_p
4812                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4813                                 : VOIDmode);
4814
4815   if (depends_on)
4816     *depends_on = NULL;
4817
4818   /* Only consider real candidates.  */
4819   if (!cand->iv)
4820     return infinite_cost;
4821
4822   cbase = cand->iv->base;
4823   cstep = cand->iv->step;
4824   ctype = TREE_TYPE (cbase);
4825
4826   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4827     {
4828       /* We do not have a precision to express the values of use.  */
4829       return infinite_cost;
4830     }
4831
4832   if (address_p
4833       || (use->iv->base_object
4834           && cand->iv->base_object
4835           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4836           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4837     {
4838       /* Do not try to express address of an object with computation based
4839          on address of a different object.  This may cause problems in rtl
4840          level alias analysis (that does not expect this to be happening,
4841          as this is illegal in C), and would be unlikely to be useful
4842          anyway.  */
4843       if (use->iv->base_object
4844           && cand->iv->base_object
4845           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4846         return infinite_cost;
4847     }
4848
4849   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4850     {
4851       /* TODO -- add direct handling of this case.  */
4852       goto fallback;
4853     }
4854
4855   /* CSTEPI is removed from the offset in case statement is after the
4856      increment.  If the step is not constant, we use zero instead.
4857      This is a bit imprecise (there is the extra addition), but
4858      redundancy elimination is likely to transform the code so that
4859      it uses value of the variable before increment anyway,
4860      so it is not that much unrealistic.  */
4861   if (cst_and_fits_in_hwi (cstep))
4862     cstepi = int_cst_value (cstep);
4863   else
4864     cstepi = 0;
4865
4866   if (!constant_multiple_of (ustep, cstep, &rat))
4867     return infinite_cost;
4868
4869   if (wi::fits_shwi_p (rat))
4870     ratio = rat.to_shwi ();
4871   else
4872     return infinite_cost;
4873
4874   STRIP_NOPS (cbase);
4875   ctype = TREE_TYPE (cbase);
4876
4877   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4878
4879   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4880      or ratio == 1, it is better to handle this like
4881
4882      ubase - ratio * cbase + ratio * var
4883
4884      (also holds in the case ratio == -1, TODO.  */
4885
4886   if (cst_and_fits_in_hwi (cbase))
4887     {
4888       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4889       cost = difference_cost (data,
4890                               ubase, build_int_cst (utype, 0),
4891                               &symbol_present, &var_present, &offset,
4892                               depends_on);
4893       cost.cost /= avg_loop_niter (data->current_loop);
4894     }
4895   else if (ratio == 1)
4896     {
4897       tree real_cbase = cbase;
4898
4899       /* Check to see if any adjustment is needed.  */
4900       if (cstepi == 0 && stmt_is_after_inc)
4901         {
4902           aff_tree real_cbase_aff;
4903           aff_tree cstep_aff;
4904
4905           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4906                                    &real_cbase_aff);
4907           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4908
4909           aff_combination_add (&real_cbase_aff, &cstep_aff);
4910           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4911         }
4912
4913       cost = difference_cost (data,
4914                               ubase, real_cbase,
4915                               &symbol_present, &var_present, &offset,
4916                               depends_on);
4917       cost.cost /= avg_loop_niter (data->current_loop);
4918     }
4919   else if (address_p
4920            && !POINTER_TYPE_P (ctype)
4921            && multiplier_allowed_in_address_p
4922                 (ratio, mem_mode,
4923                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4924     {
4925       if (cstepi == 0 && stmt_is_after_inc)
4926         {
4927           if (POINTER_TYPE_P (ctype))
4928             cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4929           else
4930             cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4931         }
4932       cbase
4933         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4934       cost = difference_cost (data,
4935                               ubase, cbase,
4936                               &symbol_present, &var_present, &offset,
4937                               depends_on);
4938       cost.cost /= avg_loop_niter (data->current_loop);
4939     }
4940   else
4941     {
4942       cost = force_var_cost (data, cbase, depends_on);
4943       cost = add_costs (cost,
4944                         difference_cost (data,
4945                                          ubase, build_int_cst (utype, 0),
4946                                          &symbol_present, &var_present,
4947                                          &offset, depends_on));
4948       cost.cost /= avg_loop_niter (data->current_loop);
4949       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4950     }
4951
4952   /* Set of invariants depended on by sub use has already been computed
4953      for the first use in the group.  */
4954   if (use->sub_id)
4955     {
4956       cost.cost = 0;
4957       if (depends_on && *depends_on)
4958         bitmap_clear (*depends_on);
4959     }
4960   else if (inv_expr_id)
4961     {
4962       *inv_expr_id =
4963           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4964       /* Clear depends on.  */
4965       if (*inv_expr_id != -1 && depends_on && *depends_on)
4966         bitmap_clear (*depends_on);
4967     }
4968
4969   /* If we are after the increment, the value of the candidate is higher by
4970      one iteration.  */
4971   if (stmt_is_after_inc)
4972     offset -= ratio * cstepi;
4973
4974   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4975      (symbol/var1/const parts may be omitted).  If we are looking for an
4976      address, find the cost of addressing this.  */
4977   if (address_p)
4978     return add_costs (cost,
4979                       get_address_cost (symbol_present, var_present,
4980                                         offset, ratio, cstepi,
4981                                         mem_mode,
4982                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4983                                         speed, stmt_is_after_inc,
4984                                         can_autoinc));
4985
4986   /* Otherwise estimate the costs for computing the expression.  */
4987   if (!symbol_present && !var_present && !offset)
4988     {
4989       if (ratio != 1)
4990         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4991       return cost;
4992     }
4993
4994   /* Symbol + offset should be compile-time computable so consider that they
4995       are added once to the variable, if present.  */
4996   if (var_present && (symbol_present || offset))
4997     cost.cost += adjust_setup_cost (data,
4998                                     add_cost (speed, TYPE_MODE (ctype)));
4999
5000   /* Having offset does not affect runtime cost in case it is added to
5001      symbol, but it increases complexity.  */
5002   if (offset)
5003     cost.complexity++;
5004
5005   cost.cost += add_cost (speed, TYPE_MODE (ctype));
5006
5007   aratio = ratio > 0 ? ratio : -ratio;
5008   if (aratio != 1)
5009     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5010   return cost;
5011
5012 fallback:
5013   if (can_autoinc)
5014     *can_autoinc = false;
5015
5016   {
5017     /* Just get the expression, expand it and measure the cost.  */
5018     tree comp = get_computation_at (data->current_loop, use, cand, at);
5019
5020     if (!comp)
5021       return infinite_cost;
5022
5023     if (address_p)
5024       comp = build_simple_mem_ref (comp);
5025
5026     return new_cost (computation_cost (comp, speed), 0);
5027   }
5028 }
5029
5030 /* Determines the cost of the computation by that USE is expressed
5031    from induction variable CAND.  If ADDRESS_P is true, we just need
5032    to create an address from it, otherwise we want to get it into
5033    register.  A set of invariants we depend on is stored in
5034    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5035    autoinc addressing is likely.  */
5036
5037 static comp_cost
5038 get_computation_cost (struct ivopts_data *data,
5039                       struct iv_use *use, struct iv_cand *cand,
5040                       bool address_p, bitmap *depends_on,
5041                       bool *can_autoinc, int *inv_expr_id)
5042 {
5043   return get_computation_cost_at (data,
5044                                   use, cand, address_p, depends_on, use->stmt,
5045                                   can_autoinc, inv_expr_id);
5046 }
5047
5048 /* Determines cost of basing replacement of USE on CAND in a generic
5049    expression.  */
5050
5051 static bool
5052 determine_use_iv_cost_generic (struct ivopts_data *data,
5053                                struct iv_use *use, struct iv_cand *cand)
5054 {
5055   bitmap depends_on;
5056   comp_cost cost;
5057   int inv_expr_id = -1;
5058
5059   /* The simple case first -- if we need to express value of the preserved
5060      original biv, the cost is 0.  This also prevents us from counting the
5061      cost of increment twice -- once at this use and once in the cost of
5062      the candidate.  */
5063   if (cand->pos == IP_ORIGINAL
5064       && cand->incremented_at == use->stmt)
5065     {
5066       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5067                        ERROR_MARK, -1);
5068       return true;
5069     }
5070
5071   cost = get_computation_cost (data, use, cand, false, &depends_on,
5072                                NULL, &inv_expr_id);
5073
5074   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5075                    inv_expr_id);
5076
5077   return !infinite_cost_p (cost);
5078 }
5079
5080 /* Determines cost of basing replacement of USE on CAND in an address.  */
5081
5082 static bool
5083 determine_use_iv_cost_address (struct ivopts_data *data,
5084                                struct iv_use *use, struct iv_cand *cand)
5085 {
5086   bitmap depends_on;
5087   bool can_autoinc;
5088   int inv_expr_id = -1;
5089   struct iv_use *sub_use;
5090   comp_cost sub_cost;
5091   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5092                                          &can_autoinc, &inv_expr_id);
5093
5094   if (cand->ainc_use == use)
5095     {
5096       if (can_autoinc)
5097         cost.cost -= cand->cost_step;
5098       /* If we generated the candidate solely for exploiting autoincrement
5099          opportunities, and it turns out it can't be used, set the cost to
5100          infinity to make sure we ignore it.  */
5101       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5102         cost = infinite_cost;
5103     }
5104   for (sub_use = use->next;
5105        sub_use && !infinite_cost_p (cost);
5106        sub_use = sub_use->next)
5107     {
5108       sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
5109                                        &can_autoinc, NULL);
5110       cost = add_costs (cost, sub_cost);
5111     }
5112
5113   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5114                    inv_expr_id);
5115
5116   return !infinite_cost_p (cost);
5117 }
5118
5119 /* Computes value of candidate CAND at position AT in iteration NITER, and
5120    stores it to VAL.  */
5121
5122 static void
5123 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5124                aff_tree *val)
5125 {
5126   aff_tree step, delta, nit;
5127   struct iv *iv = cand->iv;
5128   tree type = TREE_TYPE (iv->base);
5129   tree steptype = type;
5130   if (POINTER_TYPE_P (type))
5131     steptype = sizetype;
5132   steptype = unsigned_type_for (type);
5133
5134   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5135   aff_combination_convert (&step, steptype);
5136   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5137   aff_combination_convert (&nit, steptype);
5138   aff_combination_mult (&nit, &step, &delta);
5139   if (stmt_after_increment (loop, cand, at))
5140     aff_combination_add (&delta, &step);
5141
5142   tree_to_aff_combination (iv->base, type, val);
5143   if (!POINTER_TYPE_P (type))
5144     aff_combination_convert (val, steptype);
5145   aff_combination_add (val, &delta);
5146 }
5147
5148 /* Returns period of induction variable iv.  */
5149
5150 static tree
5151 iv_period (struct iv *iv)
5152 {
5153   tree step = iv->step, period, type;
5154   tree pow2div;
5155
5156   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5157
5158   type = unsigned_type_for (TREE_TYPE (step));
5159   /* Period of the iv is lcm (step, type_range)/step -1,
5160      i.e., N*type_range/step - 1. Since type range is power
5161      of two, N == (step >> num_of_ending_zeros_binary (step),
5162      so the final result is
5163
5164        (type_range >> num_of_ending_zeros_binary (step)) - 1
5165
5166   */
5167   pow2div = num_ending_zeros (step);
5168
5169   period = build_low_bits_mask (type,
5170                                 (TYPE_PRECISION (type)
5171                                  - tree_to_uhwi (pow2div)));
5172
5173   return period;
5174 }
5175
5176 /* Returns the comparison operator used when eliminating the iv USE.  */
5177
5178 static enum tree_code
5179 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5180 {
5181   struct loop *loop = data->current_loop;
5182   basic_block ex_bb;
5183   edge exit;
5184
5185   ex_bb = gimple_bb (use->stmt);
5186   exit = EDGE_SUCC (ex_bb, 0);
5187   if (flow_bb_inside_loop_p (loop, exit->dest))
5188     exit = EDGE_SUCC (ex_bb, 1);
5189
5190   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5191 }
5192
5193 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5194    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5195    calculation is performed in non-wrapping type.
5196
5197    TODO: More generally, we could test for the situation that
5198          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5199          This would require knowing the sign of OFFSET.  */
5200
5201 static bool
5202 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5203 {
5204   enum tree_code code;
5205   tree e1, e2;
5206   aff_tree aff_e1, aff_e2, aff_offset;
5207
5208   if (!nowrap_type_p (TREE_TYPE (base)))
5209     return false;
5210
5211   base = expand_simple_operations (base);
5212
5213   if (TREE_CODE (base) == SSA_NAME)
5214     {
5215       gimple *stmt = SSA_NAME_DEF_STMT (base);
5216
5217       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5218         return false;
5219
5220       code = gimple_assign_rhs_code (stmt);
5221       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5222         return false;
5223
5224       e1 = gimple_assign_rhs1 (stmt);
5225       e2 = gimple_assign_rhs2 (stmt);
5226     }
5227   else
5228     {
5229       code = TREE_CODE (base);
5230       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5231         return false;
5232       e1 = TREE_OPERAND (base, 0);
5233       e2 = TREE_OPERAND (base, 1);
5234     }
5235
5236   /* Use affine expansion as deeper inspection to prove the equality.  */
5237   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5238                                   &aff_e2, &data->name_expansion_cache);
5239   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5240                                   &aff_offset, &data->name_expansion_cache);
5241   aff_combination_scale (&aff_offset, -1);
5242   switch (code)
5243     {
5244     case PLUS_EXPR:
5245       aff_combination_add (&aff_e2, &aff_offset);
5246       if (aff_combination_zero_p (&aff_e2))
5247         return true;
5248
5249       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5250                                       &aff_e1, &data->name_expansion_cache);
5251       aff_combination_add (&aff_e1, &aff_offset);
5252       return aff_combination_zero_p (&aff_e1);
5253
5254     case POINTER_PLUS_EXPR:
5255       aff_combination_add (&aff_e2, &aff_offset);
5256       return aff_combination_zero_p (&aff_e2);
5257
5258     default:
5259       return false;
5260     }
5261 }
5262
5263 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5264    comparison with CAND.  NITER describes the number of iterations of
5265    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5266
5267    We aim to handle the following situation:
5268
5269    sometype *base, *p;
5270    int a, b, i;
5271
5272    i = a;
5273    p = p_0 = base + a;
5274
5275    do
5276      {
5277        bla (*p);
5278        p++;
5279        i++;
5280      }
5281    while (i < b);
5282
5283    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5284    We aim to optimize this to
5285
5286    p = p_0 = base + a;
5287    do
5288      {
5289        bla (*p);
5290        p++;
5291      }
5292    while (p < p_0 - a + b);
5293
5294    This preserves the correctness, since the pointer arithmetics does not
5295    overflow.  More precisely:
5296
5297    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5298       overflow in computing it or the values of p.
5299    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5300       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5301
5302 static bool
5303 iv_elimination_compare_lt (struct ivopts_data *data,
5304                            struct iv_cand *cand, enum tree_code *comp_p,
5305                            struct tree_niter_desc *niter)
5306 {
5307   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5308   struct aff_tree nit, tmpa, tmpb;
5309   enum tree_code comp;
5310   HOST_WIDE_INT step;
5311
5312   /* We need to know that the candidate induction variable does not overflow.
5313      While more complex analysis may be used to prove this, for now just
5314      check that the variable appears in the original program and that it
5315      is computed in a type that guarantees no overflows.  */
5316   cand_type = TREE_TYPE (cand->iv->base);
5317   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5318     return false;
5319
5320   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5321      the calculation of the BOUND could overflow, making the comparison
5322      invalid.  */
5323   if (!data->loop_single_exit_p)
5324     return false;
5325
5326   /* We need to be able to decide whether candidate is increasing or decreasing
5327      in order to choose the right comparison operator.  */
5328   if (!cst_and_fits_in_hwi (cand->iv->step))
5329     return false;
5330   step = int_cst_value (cand->iv->step);
5331
5332   /* Check that the number of iterations matches the expected pattern:
5333      a + 1 > b ? 0 : b - a - 1.  */
5334   mbz = niter->may_be_zero;
5335   if (TREE_CODE (mbz) == GT_EXPR)
5336     {
5337       /* Handle a + 1 > b.  */
5338       tree op0 = TREE_OPERAND (mbz, 0);
5339       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5340         {
5341           a = TREE_OPERAND (op0, 0);
5342           b = TREE_OPERAND (mbz, 1);
5343         }
5344       else
5345         return false;
5346     }
5347   else if (TREE_CODE (mbz) == LT_EXPR)
5348     {
5349       tree op1 = TREE_OPERAND (mbz, 1);
5350
5351       /* Handle b < a + 1.  */
5352       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5353         {
5354           a = TREE_OPERAND (op1, 0);
5355           b = TREE_OPERAND (mbz, 0);
5356         }
5357       else
5358         return false;
5359     }
5360   else
5361     return false;
5362
5363   /* Expected number of iterations is B - A - 1.  Check that it matches
5364      the actual number, i.e., that B - A - NITER = 1.  */
5365   tree_to_aff_combination (niter->niter, nit_type, &nit);
5366   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5367   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5368   aff_combination_scale (&nit, -1);
5369   aff_combination_scale (&tmpa, -1);
5370   aff_combination_add (&tmpb, &tmpa);
5371   aff_combination_add (&tmpb, &nit);
5372   if (tmpb.n != 0 || tmpb.offset != 1)
5373     return false;
5374
5375   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5376      overflow.  */
5377   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5378                         cand->iv->step,
5379                         fold_convert (TREE_TYPE (cand->iv->step), a));
5380   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5381     return false;
5382
5383   /* Determine the new comparison operator.  */
5384   comp = step < 0 ? GT_EXPR : LT_EXPR;
5385   if (*comp_p == NE_EXPR)
5386     *comp_p = comp;
5387   else if (*comp_p == EQ_EXPR)
5388     *comp_p = invert_tree_comparison (comp, false);
5389   else
5390     gcc_unreachable ();
5391
5392   return true;
5393 }
5394
5395 /* Check whether it is possible to express the condition in USE by comparison
5396    of candidate CAND.  If so, store the value compared with to BOUND, and the
5397    comparison operator to COMP.  */
5398
5399 static bool
5400 may_eliminate_iv (struct ivopts_data *data,
5401                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5402                   enum tree_code *comp)
5403 {
5404   basic_block ex_bb;
5405   edge exit;
5406   tree period;
5407   struct loop *loop = data->current_loop;
5408   aff_tree bnd;
5409   struct tree_niter_desc *desc = NULL;
5410
5411   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5412     return false;
5413
5414   /* For now works only for exits that dominate the loop latch.
5415      TODO: extend to other conditions inside loop body.  */
5416   ex_bb = gimple_bb (use->stmt);
5417   if (use->stmt != last_stmt (ex_bb)
5418       || gimple_code (use->stmt) != GIMPLE_COND
5419       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5420     return false;
5421
5422   exit = EDGE_SUCC (ex_bb, 0);
5423   if (flow_bb_inside_loop_p (loop, exit->dest))
5424     exit = EDGE_SUCC (ex_bb, 1);
5425   if (flow_bb_inside_loop_p (loop, exit->dest))
5426     return false;
5427
5428   desc = niter_for_exit (data, exit);
5429   if (!desc)
5430     return false;
5431
5432   /* Determine whether we can use the variable to test the exit condition.
5433      This is the case iff the period of the induction variable is greater
5434      than the number of iterations for which the exit condition is true.  */
5435   period = iv_period (cand->iv);
5436
5437   /* If the number of iterations is constant, compare against it directly.  */
5438   if (TREE_CODE (desc->niter) == INTEGER_CST)
5439     {
5440       /* See cand_value_at.  */
5441       if (stmt_after_increment (loop, cand, use->stmt))
5442         {
5443           if (!tree_int_cst_lt (desc->niter, period))
5444             return false;
5445         }
5446       else
5447         {
5448           if (tree_int_cst_lt (period, desc->niter))
5449             return false;
5450         }
5451     }
5452
5453   /* If not, and if this is the only possible exit of the loop, see whether
5454      we can get a conservative estimate on the number of iterations of the
5455      entire loop and compare against that instead.  */
5456   else
5457     {
5458       widest_int period_value, max_niter;
5459
5460       max_niter = desc->max;
5461       if (stmt_after_increment (loop, cand, use->stmt))
5462         max_niter += 1;
5463       period_value = wi::to_widest (period);
5464       if (wi::gtu_p (max_niter, period_value))
5465         {
5466           /* See if we can take advantage of inferred loop bound information.  */
5467           if (data->loop_single_exit_p)
5468             {
5469               if (!max_loop_iterations (loop, &max_niter))
5470                 return false;
5471               /* The loop bound is already adjusted by adding 1.  */
5472               if (wi::gtu_p (max_niter, period_value))
5473                 return false;
5474             }
5475           else
5476             return false;
5477         }
5478     }
5479
5480   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5481
5482   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5483                          aff_combination_to_tree (&bnd));
5484   *comp = iv_elimination_compare (data, use);
5485
5486   /* It is unlikely that computing the number of iterations using division
5487      would be more profitable than keeping the original induction variable.  */
5488   if (expression_expensive_p (*bound))
5489     return false;
5490
5491   /* Sometimes, it is possible to handle the situation that the number of
5492      iterations may be zero unless additional assumtions by using <
5493      instead of != in the exit condition.
5494
5495      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5496            base the exit condition on it.  However, that is often too
5497            expensive.  */
5498   if (!integer_zerop (desc->may_be_zero))
5499     return iv_elimination_compare_lt (data, cand, comp, desc);
5500
5501   return true;
5502 }
5503
5504  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5505     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5506
5507 static int
5508 parm_decl_cost (struct ivopts_data *data, tree bound)
5509 {
5510   tree sbound = bound;
5511   STRIP_NOPS (sbound);
5512
5513   if (TREE_CODE (sbound) == SSA_NAME
5514       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5515       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5516       && data->body_includes_call)
5517     return COSTS_N_INSNS (1);
5518
5519   return 0;
5520 }
5521
5522 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5523
5524 static bool
5525 determine_use_iv_cost_condition (struct ivopts_data *data,
5526                                  struct iv_use *use, struct iv_cand *cand)
5527 {
5528   tree bound = NULL_TREE;
5529   struct iv *cmp_iv;
5530   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5531   comp_cost elim_cost, express_cost, cost, bound_cost;
5532   bool ok;
5533   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5534   tree *control_var, *bound_cst;
5535   enum tree_code comp = ERROR_MARK;
5536
5537   /* Only consider real candidates.  */
5538   if (!cand->iv)
5539     {
5540       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5541                        ERROR_MARK, -1);
5542       return false;
5543     }
5544
5545   /* Try iv elimination.  */
5546   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5547     {
5548       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5549       if (elim_cost.cost == 0)
5550         elim_cost.cost = parm_decl_cost (data, bound);
5551       else if (TREE_CODE (bound) == INTEGER_CST)
5552         elim_cost.cost = 0;
5553       /* If we replace a loop condition 'i < n' with 'p < base + n',
5554          depends_on_elim will have 'base' and 'n' set, which implies
5555          that both 'base' and 'n' will be live during the loop.  More likely,
5556          'base + n' will be loop invariant, resulting in only one live value
5557          during the loop.  So in that case we clear depends_on_elim and set
5558         elim_inv_expr_id instead.  */
5559       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5560         {
5561           elim_inv_expr_id = get_expr_id (data, bound);
5562           bitmap_clear (depends_on_elim);
5563         }
5564       /* The bound is a loop invariant, so it will be only computed
5565          once.  */
5566       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5567     }
5568   else
5569     elim_cost = infinite_cost;
5570
5571   /* Try expressing the original giv.  If it is compared with an invariant,
5572      note that we cannot get rid of it.  */
5573   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5574                               NULL, &cmp_iv);
5575   gcc_assert (ok);
5576
5577   /* When the condition is a comparison of the candidate IV against
5578      zero, prefer this IV.
5579
5580      TODO: The constant that we're subtracting from the cost should
5581      be target-dependent.  This information should be added to the
5582      target costs for each backend.  */
5583   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5584       && integer_zerop (*bound_cst)
5585       && (operand_equal_p (*control_var, cand->var_after, 0)
5586           || operand_equal_p (*control_var, cand->var_before, 0)))
5587     elim_cost.cost -= 1;
5588
5589   express_cost = get_computation_cost (data, use, cand, false,
5590                                        &depends_on_express, NULL,
5591                                        &express_inv_expr_id);
5592   fd_ivopts_data = data;
5593   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5594
5595   /* Count the cost of the original bound as well.  */
5596   bound_cost = force_var_cost (data, *bound_cst, NULL);
5597   if (bound_cost.cost == 0)
5598     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5599   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5600     bound_cost.cost = 0;
5601   express_cost.cost += bound_cost.cost;
5602
5603   /* Choose the better approach, preferring the eliminated IV. */
5604   if (compare_costs (elim_cost, express_cost) <= 0)
5605     {
5606       cost = elim_cost;
5607       depends_on = depends_on_elim;
5608       depends_on_elim = NULL;
5609       inv_expr_id = elim_inv_expr_id;
5610     }
5611   else
5612     {
5613       cost = express_cost;
5614       depends_on = depends_on_express;
5615       depends_on_express = NULL;
5616       bound = NULL_TREE;
5617       comp = ERROR_MARK;
5618       inv_expr_id = express_inv_expr_id;
5619     }
5620
5621   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5622
5623   if (depends_on_elim)
5624     BITMAP_FREE (depends_on_elim);
5625   if (depends_on_express)
5626     BITMAP_FREE (depends_on_express);
5627
5628   return !infinite_cost_p (cost);
5629 }
5630
5631 /* Determines cost of basing replacement of USE on CAND.  Returns false
5632    if USE cannot be based on CAND.  */
5633
5634 static bool
5635 determine_use_iv_cost (struct ivopts_data *data,
5636                        struct iv_use *use, struct iv_cand *cand)
5637 {
5638   switch (use->type)
5639     {
5640     case USE_NONLINEAR_EXPR:
5641       return determine_use_iv_cost_generic (data, use, cand);
5642
5643     case USE_ADDRESS:
5644       return determine_use_iv_cost_address (data, use, cand);
5645
5646     case USE_COMPARE:
5647       return determine_use_iv_cost_condition (data, use, cand);
5648
5649     default:
5650       gcc_unreachable ();
5651     }
5652 }
5653
5654 /* Return true if get_computation_cost indicates that autoincrement is
5655    a possibility for the pair of USE and CAND, false otherwise.  */
5656
5657 static bool
5658 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5659                            struct iv_cand *cand)
5660 {
5661   bitmap depends_on;
5662   bool can_autoinc;
5663   comp_cost cost;
5664
5665   if (use->type != USE_ADDRESS)
5666     return false;
5667
5668   cost = get_computation_cost (data, use, cand, true, &depends_on,
5669                                &can_autoinc, NULL);
5670
5671   BITMAP_FREE (depends_on);
5672
5673   return !infinite_cost_p (cost) && can_autoinc;
5674 }
5675
5676 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5677    use that allows autoincrement, and set their AINC_USE if possible.  */
5678
5679 static void
5680 set_autoinc_for_original_candidates (struct ivopts_data *data)
5681 {
5682   unsigned i, j;
5683
5684   for (i = 0; i < n_iv_cands (data); i++)
5685     {
5686       struct iv_cand *cand = iv_cand (data, i);
5687       struct iv_use *closest_before = NULL;
5688       struct iv_use *closest_after = NULL;
5689       if (cand->pos != IP_ORIGINAL)
5690         continue;
5691
5692       for (j = 0; j < n_iv_uses (data); j++)
5693         {
5694           struct iv_use *use = iv_use (data, j);
5695           unsigned uid = gimple_uid (use->stmt);
5696
5697           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5698             continue;
5699
5700           if (uid < gimple_uid (cand->incremented_at)
5701               && (closest_before == NULL
5702                   || uid > gimple_uid (closest_before->stmt)))
5703             closest_before = use;
5704
5705           if (uid > gimple_uid (cand->incremented_at)
5706               && (closest_after == NULL
5707                   || uid < gimple_uid (closest_after->stmt)))
5708             closest_after = use;
5709         }
5710
5711       if (closest_before != NULL
5712           && autoinc_possible_for_pair (data, closest_before, cand))
5713         cand->ainc_use = closest_before;
5714       else if (closest_after != NULL
5715                && autoinc_possible_for_pair (data, closest_after, cand))
5716         cand->ainc_use = closest_after;
5717     }
5718 }
5719
5720 /* Finds the candidates for the induction variables.  */
5721
5722 static void
5723 find_iv_candidates (struct ivopts_data *data)
5724 {
5725   /* Add commonly used ivs.  */
5726   add_standard_iv_candidates (data);
5727
5728   /* Add old induction variables.  */
5729   add_iv_candidate_for_bivs (data);
5730
5731   /* Add induction variables derived from uses.  */
5732   add_iv_candidate_for_uses (data);
5733
5734   set_autoinc_for_original_candidates (data);
5735
5736   /* Record the important candidates.  */
5737   record_important_candidates (data);
5738 }
5739
5740 /* Determines costs of basing the use of the iv on an iv candidate.  */
5741
5742 static void
5743 determine_use_iv_costs (struct ivopts_data *data)
5744 {
5745   unsigned i, j;
5746   struct iv_use *use;
5747   struct iv_cand *cand;
5748   bitmap to_clear = BITMAP_ALLOC (NULL);
5749
5750   alloc_use_cost_map (data);
5751
5752   for (i = 0; i < n_iv_uses (data); i++)
5753     {
5754       use = iv_use (data, i);
5755
5756       if (data->consider_all_candidates)
5757         {
5758           for (j = 0; j < n_iv_cands (data); j++)
5759             {
5760               cand = iv_cand (data, j);
5761               determine_use_iv_cost (data, use, cand);
5762             }
5763         }
5764       else
5765         {
5766           bitmap_iterator bi;
5767
5768           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5769             {
5770               cand = iv_cand (data, j);
5771               if (!determine_use_iv_cost (data, use, cand))
5772                 bitmap_set_bit (to_clear, j);
5773             }
5774
5775           /* Remove the candidates for that the cost is infinite from
5776              the list of related candidates.  */
5777           bitmap_and_compl_into (use->related_cands, to_clear);
5778           bitmap_clear (to_clear);
5779         }
5780     }
5781
5782   BITMAP_FREE (to_clear);
5783
5784   if (dump_file && (dump_flags & TDF_DETAILS))
5785     {
5786       fprintf (dump_file, "Use-candidate costs:\n");
5787
5788       for (i = 0; i < n_iv_uses (data); i++)
5789         {
5790           use = iv_use (data, i);
5791
5792           fprintf (dump_file, "Use %d:\n", i);
5793           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5794           for (j = 0; j < use->n_map_members; j++)
5795             {
5796               if (!use->cost_map[j].cand
5797                   || infinite_cost_p (use->cost_map[j].cost))
5798                 continue;
5799
5800               fprintf (dump_file, "  %d\t%d\t%d\t",
5801                        use->cost_map[j].cand->id,
5802                        use->cost_map[j].cost.cost,
5803                        use->cost_map[j].cost.complexity);
5804               if (use->cost_map[j].depends_on)
5805                 bitmap_print (dump_file,
5806                               use->cost_map[j].depends_on, "","");
5807               if (use->cost_map[j].inv_expr_id != -1)
5808                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5809               fprintf (dump_file, "\n");
5810             }
5811
5812           fprintf (dump_file, "\n");
5813         }
5814       fprintf (dump_file, "\n");
5815     }
5816 }
5817
5818 /* Determines cost of the candidate CAND.  */
5819
5820 static void
5821 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5822 {
5823   comp_cost cost_base;
5824   unsigned cost, cost_step;
5825   tree base;
5826
5827   if (!cand->iv)
5828     {
5829       cand->cost = 0;
5830       return;
5831     }
5832
5833   /* There are two costs associated with the candidate -- its increment
5834      and its initialization.  The second is almost negligible for any loop
5835      that rolls enough, so we take it just very little into account.  */
5836
5837   base = cand->iv->base;
5838   cost_base = force_var_cost (data, base, NULL);
5839   /* It will be exceptional that the iv register happens to be initialized with
5840      the proper value at no cost.  In general, there will at least be a regcopy
5841      or a const set.  */
5842   if (cost_base.cost == 0)
5843     cost_base.cost = COSTS_N_INSNS (1);
5844   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5845
5846   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5847
5848   /* Prefer the original ivs unless we may gain something by replacing it.
5849      The reason is to make debugging simpler; so this is not relevant for
5850      artificial ivs created by other optimization passes.  */
5851   if (cand->pos != IP_ORIGINAL
5852       || !SSA_NAME_VAR (cand->var_before)
5853       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5854     cost++;
5855
5856   /* Prefer not to insert statements into latch unless there are some
5857      already (so that we do not create unnecessary jumps).  */
5858   if (cand->pos == IP_END
5859       && empty_block_p (ip_end_pos (data->current_loop)))
5860     cost++;
5861
5862   cand->cost = cost;
5863   cand->cost_step = cost_step;
5864 }
5865
5866 /* Determines costs of computation of the candidates.  */
5867
5868 static void
5869 determine_iv_costs (struct ivopts_data *data)
5870 {
5871   unsigned i;
5872
5873   if (dump_file && (dump_flags & TDF_DETAILS))
5874     {
5875       fprintf (dump_file, "Candidate costs:\n");
5876       fprintf (dump_file, "  cand\tcost\n");
5877     }
5878
5879   for (i = 0; i < n_iv_cands (data); i++)
5880     {
5881       struct iv_cand *cand = iv_cand (data, i);
5882
5883       determine_iv_cost (data, cand);
5884
5885       if (dump_file && (dump_flags & TDF_DETAILS))
5886         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5887     }
5888
5889   if (dump_file && (dump_flags & TDF_DETAILS))
5890     fprintf (dump_file, "\n");
5891 }
5892
5893 /* Calculates cost for having SIZE induction variables.  */
5894
5895 static unsigned
5896 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5897 {
5898   /* We add size to the cost, so that we prefer eliminating ivs
5899      if possible.  */
5900   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5901                                             data->body_includes_call);
5902 }
5903
5904 /* For each size of the induction variable set determine the penalty.  */
5905
5906 static void
5907 determine_set_costs (struct ivopts_data *data)
5908 {
5909   unsigned j, n;
5910   gphi *phi;
5911   gphi_iterator psi;
5912   tree op;
5913   struct loop *loop = data->current_loop;
5914   bitmap_iterator bi;
5915
5916   if (dump_file && (dump_flags & TDF_DETAILS))
5917     {
5918       fprintf (dump_file, "Global costs:\n");
5919       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5920       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5921       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5922       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5923     }
5924
5925   n = 0;
5926   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5927     {
5928       phi = psi.phi ();
5929       op = PHI_RESULT (phi);
5930
5931       if (virtual_operand_p (op))
5932         continue;
5933
5934       if (get_iv (data, op))
5935         continue;
5936
5937       n++;
5938     }
5939
5940   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5941     {
5942       struct version_info *info = ver_info (data, j);
5943
5944       if (info->inv_id && info->has_nonlin_use)
5945         n++;
5946     }
5947
5948   data->regs_used = n;
5949   if (dump_file && (dump_flags & TDF_DETAILS))
5950     fprintf (dump_file, "  regs_used %d\n", n);
5951
5952   if (dump_file && (dump_flags & TDF_DETAILS))
5953     {
5954       fprintf (dump_file, "  cost for size:\n");
5955       fprintf (dump_file, "  ivs\tcost\n");
5956       for (j = 0; j <= 2 * target_avail_regs; j++)
5957         fprintf (dump_file, "  %d\t%d\n", j,
5958                  ivopts_global_cost_for_size (data, j));
5959       fprintf (dump_file, "\n");
5960     }
5961 }
5962
5963 /* Returns true if A is a cheaper cost pair than B.  */
5964
5965 static bool
5966 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5967 {
5968   int cmp;
5969
5970   if (!a)
5971     return false;
5972
5973   if (!b)
5974     return true;
5975
5976   cmp = compare_costs (a->cost, b->cost);
5977   if (cmp < 0)
5978     return true;
5979
5980   if (cmp > 0)
5981     return false;
5982
5983   /* In case the costs are the same, prefer the cheaper candidate.  */
5984   if (a->cand->cost < b->cand->cost)
5985     return true;
5986
5987   return false;
5988 }
5989
5990
5991 /* Returns candidate by that USE is expressed in IVS.  */
5992
5993 static struct cost_pair *
5994 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5995 {
5996   return ivs->cand_for_use[use->id];
5997 }
5998
5999 /* Computes the cost field of IVS structure.  */
6000
6001 static void
6002 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6003 {
6004   comp_cost cost = ivs->cand_use_cost;
6005
6006   cost.cost += ivs->cand_cost;
6007
6008   cost.cost += ivopts_global_cost_for_size (data,
6009                                             ivs->n_regs + ivs->num_used_inv_expr);
6010
6011   ivs->cost = cost;
6012 }
6013
6014 /* Remove invariants in set INVS to set IVS.  */
6015
6016 static void
6017 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6018 {
6019   bitmap_iterator bi;
6020   unsigned iid;
6021
6022   if (!invs)
6023     return;
6024
6025   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6026     {
6027       ivs->n_invariant_uses[iid]--;
6028       if (ivs->n_invariant_uses[iid] == 0)
6029         ivs->n_regs--;
6030     }
6031 }
6032
6033 /* Set USE not to be expressed by any candidate in IVS.  */
6034
6035 static void
6036 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6037                  struct iv_use *use)
6038 {
6039   unsigned uid = use->id, cid;
6040   struct cost_pair *cp;
6041
6042   cp = ivs->cand_for_use[uid];
6043   if (!cp)
6044     return;
6045   cid = cp->cand->id;
6046
6047   ivs->bad_uses++;
6048   ivs->cand_for_use[uid] = NULL;
6049   ivs->n_cand_uses[cid]--;
6050
6051   if (ivs->n_cand_uses[cid] == 0)
6052     {
6053       bitmap_clear_bit (ivs->cands, cid);
6054       /* Do not count the pseudocandidates.  */
6055       if (cp->cand->iv)
6056         ivs->n_regs--;
6057       ivs->n_cands--;
6058       ivs->cand_cost -= cp->cand->cost;
6059
6060       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6061     }
6062
6063   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6064
6065   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6066
6067   if (cp->inv_expr_id != -1)
6068     {
6069       ivs->used_inv_expr[cp->inv_expr_id]--;
6070       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6071         ivs->num_used_inv_expr--;
6072     }
6073   iv_ca_recount_cost (data, ivs);
6074 }
6075
6076 /* Add invariants in set INVS to set IVS.  */
6077
6078 static void
6079 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6080 {
6081   bitmap_iterator bi;
6082   unsigned iid;
6083
6084   if (!invs)
6085     return;
6086
6087   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6088     {
6089       ivs->n_invariant_uses[iid]++;
6090       if (ivs->n_invariant_uses[iid] == 1)
6091         ivs->n_regs++;
6092     }
6093 }
6094
6095 /* Set cost pair for USE in set IVS to CP.  */
6096
6097 static void
6098 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6099               struct iv_use *use, struct cost_pair *cp)
6100 {
6101   unsigned uid = use->id, cid;
6102
6103   if (ivs->cand_for_use[uid] == cp)
6104     return;
6105
6106   if (ivs->cand_for_use[uid])
6107     iv_ca_set_no_cp (data, ivs, use);
6108
6109   if (cp)
6110     {
6111       cid = cp->cand->id;
6112
6113       ivs->bad_uses--;
6114       ivs->cand_for_use[uid] = cp;
6115       ivs->n_cand_uses[cid]++;
6116       if (ivs->n_cand_uses[cid] == 1)
6117         {
6118           bitmap_set_bit (ivs->cands, cid);
6119           /* Do not count the pseudocandidates.  */
6120           if (cp->cand->iv)
6121             ivs->n_regs++;
6122           ivs->n_cands++;
6123           ivs->cand_cost += cp->cand->cost;
6124
6125           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6126         }
6127
6128       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6129       iv_ca_set_add_invariants (ivs, cp->depends_on);
6130
6131       if (cp->inv_expr_id != -1)
6132         {
6133           ivs->used_inv_expr[cp->inv_expr_id]++;
6134           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6135             ivs->num_used_inv_expr++;
6136         }
6137       iv_ca_recount_cost (data, ivs);
6138     }
6139 }
6140
6141 /* Extend set IVS by expressing USE by some of the candidates in it
6142    if possible.  Consider all important candidates if candidates in
6143    set IVS don't give any result.  */
6144
6145 static void
6146 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6147                struct iv_use *use)
6148 {
6149   struct cost_pair *best_cp = NULL, *cp;
6150   bitmap_iterator bi;
6151   unsigned i;
6152   struct iv_cand *cand;
6153
6154   gcc_assert (ivs->upto >= use->id);
6155   ivs->upto++;
6156   ivs->bad_uses++;
6157
6158   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6159     {
6160       cand = iv_cand (data, i);
6161       cp = get_use_iv_cost (data, use, cand);
6162       if (cheaper_cost_pair (cp, best_cp))
6163         best_cp = cp;
6164     }
6165
6166   if (best_cp == NULL)
6167     {
6168       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6169         {
6170           cand = iv_cand (data, i);
6171           cp = get_use_iv_cost (data, use, cand);
6172           if (cheaper_cost_pair (cp, best_cp))
6173             best_cp = cp;
6174         }
6175     }
6176
6177   iv_ca_set_cp (data, ivs, use, best_cp);
6178 }
6179
6180 /* Get cost for assignment IVS.  */
6181
6182 static comp_cost
6183 iv_ca_cost (struct iv_ca *ivs)
6184 {
6185   /* This was a conditional expression but it triggered a bug in
6186      Sun C 5.5.  */
6187   if (ivs->bad_uses)
6188     return infinite_cost;
6189   else
6190     return ivs->cost;
6191 }
6192
6193 /* Returns true if all dependences of CP are among invariants in IVS.  */
6194
6195 static bool
6196 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6197 {
6198   unsigned i;
6199   bitmap_iterator bi;
6200
6201   if (!cp->depends_on)
6202     return true;
6203
6204   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6205     {
6206       if (ivs->n_invariant_uses[i] == 0)
6207         return false;
6208     }
6209
6210   return true;
6211 }
6212
6213 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6214    it before NEXT_CHANGE.  */
6215
6216 static struct iv_ca_delta *
6217 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6218                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6219 {
6220   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6221
6222   change->use = use;
6223   change->old_cp = old_cp;
6224   change->new_cp = new_cp;
6225   change->next_change = next_change;
6226
6227   return change;
6228 }
6229
6230 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6231    are rewritten.  */
6232
6233 static struct iv_ca_delta *
6234 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6235 {
6236   struct iv_ca_delta *last;
6237
6238   if (!l2)
6239     return l1;
6240
6241   if (!l1)
6242     return l2;
6243
6244   for (last = l1; last->next_change; last = last->next_change)
6245     continue;
6246   last->next_change = l2;
6247
6248   return l1;
6249 }
6250
6251 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6252
6253 static struct iv_ca_delta *
6254 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6255 {
6256   struct iv_ca_delta *act, *next, *prev = NULL;
6257
6258   for (act = delta; act; act = next)
6259     {
6260       next = act->next_change;
6261       act->next_change = prev;
6262       prev = act;
6263
6264       std::swap (act->old_cp, act->new_cp);
6265     }
6266
6267   return prev;
6268 }
6269
6270 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6271    reverted instead.  */
6272
6273 static void
6274 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6275                     struct iv_ca_delta *delta, bool forward)
6276 {
6277   struct cost_pair *from, *to;
6278   struct iv_ca_delta *act;
6279
6280   if (!forward)
6281     delta = iv_ca_delta_reverse (delta);
6282
6283   for (act = delta; act; act = act->next_change)
6284     {
6285       from = act->old_cp;
6286       to = act->new_cp;
6287       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6288       iv_ca_set_cp (data, ivs, act->use, to);
6289     }
6290
6291   if (!forward)
6292     iv_ca_delta_reverse (delta);
6293 }
6294
6295 /* Returns true if CAND is used in IVS.  */
6296
6297 static bool
6298 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6299 {
6300   return ivs->n_cand_uses[cand->id] > 0;
6301 }
6302
6303 /* Returns number of induction variable candidates in the set IVS.  */
6304
6305 static unsigned
6306 iv_ca_n_cands (struct iv_ca *ivs)
6307 {
6308   return ivs->n_cands;
6309 }
6310
6311 /* Free the list of changes DELTA.  */
6312
6313 static void
6314 iv_ca_delta_free (struct iv_ca_delta **delta)
6315 {
6316   struct iv_ca_delta *act, *next;
6317
6318   for (act = *delta; act; act = next)
6319     {
6320       next = act->next_change;
6321       free (act);
6322     }
6323
6324   *delta = NULL;
6325 }
6326
6327 /* Allocates new iv candidates assignment.  */
6328
6329 static struct iv_ca *
6330 iv_ca_new (struct ivopts_data *data)
6331 {
6332   struct iv_ca *nw = XNEW (struct iv_ca);
6333
6334   nw->upto = 0;
6335   nw->bad_uses = 0;
6336   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6337   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6338   nw->cands = BITMAP_ALLOC (NULL);
6339   nw->n_cands = 0;
6340   nw->n_regs = 0;
6341   nw->cand_use_cost = no_cost;
6342   nw->cand_cost = 0;
6343   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6344   nw->cost = no_cost;
6345   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6346   nw->num_used_inv_expr = 0;
6347
6348   return nw;
6349 }
6350
6351 /* Free memory occupied by the set IVS.  */
6352
6353 static void
6354 iv_ca_free (struct iv_ca **ivs)
6355 {
6356   free ((*ivs)->cand_for_use);
6357   free ((*ivs)->n_cand_uses);
6358   BITMAP_FREE ((*ivs)->cands);
6359   free ((*ivs)->n_invariant_uses);
6360   free ((*ivs)->used_inv_expr);
6361   free (*ivs);
6362   *ivs = NULL;
6363 }
6364
6365 /* Dumps IVS to FILE.  */
6366
6367 static void
6368 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6369 {
6370   const char *pref = "  invariants ";
6371   unsigned i;
6372   comp_cost cost = iv_ca_cost (ivs);
6373
6374   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6375   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6376            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6377   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6378
6379    for (i = 0; i < ivs->upto; i++)
6380     {
6381       struct iv_use *use = iv_use (data, i);
6382       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6383       if (cp)
6384         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6385                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6386       else
6387         fprintf (file, "   use:%d --> ??\n", use->id);
6388     }
6389
6390   for (i = 1; i <= data->max_inv_id; i++)
6391     if (ivs->n_invariant_uses[i])
6392       {
6393         fprintf (file, "%s%d", pref, i);
6394         pref = ", ";
6395       }
6396   fprintf (file, "\n\n");
6397 }
6398
6399 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6400    new set, and store differences in DELTA.  Number of induction variables
6401    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6402    the function will try to find a solution with mimimal iv candidates.  */
6403
6404 static comp_cost
6405 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6406               struct iv_cand *cand, struct iv_ca_delta **delta,
6407               unsigned *n_ivs, bool min_ncand)
6408 {
6409   unsigned i;
6410   comp_cost cost;
6411   struct iv_use *use;
6412   struct cost_pair *old_cp, *new_cp;
6413
6414   *delta = NULL;
6415   for (i = 0; i < ivs->upto; i++)
6416     {
6417       use = iv_use (data, i);
6418       old_cp = iv_ca_cand_for_use (ivs, use);
6419
6420       if (old_cp
6421           && old_cp->cand == cand)
6422         continue;
6423
6424       new_cp = get_use_iv_cost (data, use, cand);
6425       if (!new_cp)
6426         continue;
6427
6428       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6429         continue;
6430
6431       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6432         continue;
6433
6434       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6435     }
6436
6437   iv_ca_delta_commit (data, ivs, *delta, true);
6438   cost = iv_ca_cost (ivs);
6439   if (n_ivs)
6440     *n_ivs = iv_ca_n_cands (ivs);
6441   iv_ca_delta_commit (data, ivs, *delta, false);
6442
6443   return cost;
6444 }
6445
6446 /* Try narrowing set IVS by removing CAND.  Return the cost of
6447    the new set and store the differences in DELTA.  START is
6448    the candidate with which we start narrowing.  */
6449
6450 static comp_cost
6451 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6452               struct iv_cand *cand, struct iv_cand *start,
6453               struct iv_ca_delta **delta)
6454 {
6455   unsigned i, ci;
6456   struct iv_use *use;
6457   struct cost_pair *old_cp, *new_cp, *cp;
6458   bitmap_iterator bi;
6459   struct iv_cand *cnd;
6460   comp_cost cost, best_cost, acost;
6461
6462   *delta = NULL;
6463   for (i = 0; i < n_iv_uses (data); i++)
6464     {
6465       use = iv_use (data, i);
6466
6467       old_cp = iv_ca_cand_for_use (ivs, use);
6468       if (old_cp->cand != cand)
6469         continue;
6470
6471       best_cost = iv_ca_cost (ivs);
6472       /* Start narrowing with START.  */
6473       new_cp = get_use_iv_cost (data, use, start);
6474
6475       if (data->consider_all_candidates)
6476         {
6477           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6478             {
6479               if (ci == cand->id || (start && ci == start->id))
6480                 continue;
6481
6482               cnd = iv_cand (data, ci);
6483
6484               cp = get_use_iv_cost (data, use, cnd);
6485               if (!cp)
6486                 continue;
6487
6488               iv_ca_set_cp (data, ivs, use, cp);
6489               acost = iv_ca_cost (ivs);
6490
6491               if (compare_costs (acost, best_cost) < 0)
6492                 {
6493                   best_cost = acost;
6494                   new_cp = cp;
6495                 }
6496             }
6497         }
6498       else
6499         {
6500           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6501             {
6502               if (ci == cand->id || (start && ci == start->id))
6503                 continue;
6504
6505               cnd = iv_cand (data, ci);
6506
6507               cp = get_use_iv_cost (data, use, cnd);
6508               if (!cp)
6509                 continue;
6510
6511               iv_ca_set_cp (data, ivs, use, cp);
6512               acost = iv_ca_cost (ivs);
6513
6514               if (compare_costs (acost, best_cost) < 0)
6515                 {
6516                   best_cost = acost;
6517                   new_cp = cp;
6518                 }
6519             }
6520         }
6521       /* Restore to old cp for use.  */
6522       iv_ca_set_cp (data, ivs, use, old_cp);
6523
6524       if (!new_cp)
6525         {
6526           iv_ca_delta_free (delta);
6527           return infinite_cost;
6528         }
6529
6530       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6531     }
6532
6533   iv_ca_delta_commit (data, ivs, *delta, true);
6534   cost = iv_ca_cost (ivs);
6535   iv_ca_delta_commit (data, ivs, *delta, false);
6536
6537   return cost;
6538 }
6539
6540 /* Try optimizing the set of candidates IVS by removing candidates different
6541    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6542    differences in DELTA.  */
6543
6544 static comp_cost
6545 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6546              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6547 {
6548   bitmap_iterator bi;
6549   struct iv_ca_delta *act_delta, *best_delta;
6550   unsigned i;
6551   comp_cost best_cost, acost;
6552   struct iv_cand *cand;
6553
6554   best_delta = NULL;
6555   best_cost = iv_ca_cost (ivs);
6556
6557   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6558     {
6559       cand = iv_cand (data, i);
6560
6561       if (cand == except_cand)
6562         continue;
6563
6564       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6565
6566       if (compare_costs (acost, best_cost) < 0)
6567         {
6568           best_cost = acost;
6569           iv_ca_delta_free (&best_delta);
6570           best_delta = act_delta;
6571         }
6572       else
6573         iv_ca_delta_free (&act_delta);
6574     }
6575
6576   if (!best_delta)
6577     {
6578       *delta = NULL;
6579       return best_cost;
6580     }
6581
6582   /* Recurse to possibly remove other unnecessary ivs.  */
6583   iv_ca_delta_commit (data, ivs, best_delta, true);
6584   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6585   iv_ca_delta_commit (data, ivs, best_delta, false);
6586   *delta = iv_ca_delta_join (best_delta, *delta);
6587   return best_cost;
6588 }
6589
6590 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6591    cheaper local cost for USE than BEST_CP.  Return pointer to
6592    the corresponding cost_pair, otherwise just return BEST_CP.  */
6593
6594 static struct cost_pair*
6595 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6596                         unsigned int cand_idx, struct iv_cand *old_cand,
6597                         struct cost_pair *best_cp)
6598 {
6599   struct iv_cand *cand;
6600   struct cost_pair *cp;
6601
6602   gcc_assert (old_cand != NULL && best_cp != NULL);
6603   if (cand_idx == old_cand->id)
6604     return best_cp;
6605
6606   cand = iv_cand (data, cand_idx);
6607   cp = get_use_iv_cost (data, use, cand);
6608   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6609     return cp;
6610
6611   return best_cp;
6612 }
6613
6614 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6615    which are used by more than one iv uses.  For each of those candidates,
6616    this function tries to represent iv uses under that candidate using
6617    other ones with lower local cost, then tries to prune the new set.
6618    If the new set has lower cost, It returns the new cost after recording
6619    candidate replacement in list DELTA.  */
6620
6621 static comp_cost
6622 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6623                struct iv_ca_delta **delta)
6624 {
6625   bitmap_iterator bi, bj;
6626   unsigned int i, j, k;
6627   struct iv_use *use;
6628   struct iv_cand *cand;
6629   comp_cost orig_cost, acost;
6630   struct iv_ca_delta *act_delta, *tmp_delta;
6631   struct cost_pair *old_cp, *best_cp = NULL;
6632
6633   *delta = NULL;
6634   orig_cost = iv_ca_cost (ivs);
6635
6636   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6637     {
6638       if (ivs->n_cand_uses[i] == 1
6639           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6640         continue;
6641
6642       cand = iv_cand (data, i);
6643
6644       act_delta = NULL;
6645       /*  Represent uses under current candidate using other ones with
6646           lower local cost.  */
6647       for (j = 0; j < ivs->upto; j++)
6648         {
6649           use = iv_use (data, j);
6650           old_cp = iv_ca_cand_for_use (ivs, use);
6651
6652           if (old_cp->cand != cand)
6653             continue;
6654
6655           best_cp = old_cp;
6656           if (data->consider_all_candidates)
6657             for (k = 0; k < n_iv_cands (data); k++)
6658               best_cp = cheaper_cost_with_cand (data, use, k,
6659                                                 old_cp->cand, best_cp);
6660           else
6661             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6662               best_cp = cheaper_cost_with_cand (data, use, k,
6663                                                 old_cp->cand, best_cp);
6664
6665           if (best_cp == old_cp)
6666             continue;
6667
6668           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6669         }
6670       /* No need for further prune.  */
6671       if (!act_delta)
6672         continue;
6673
6674       /* Prune the new candidate set.  */
6675       iv_ca_delta_commit (data, ivs, act_delta, true);
6676       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6677       iv_ca_delta_commit (data, ivs, act_delta, false);
6678       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6679
6680       if (compare_costs (acost, orig_cost) < 0)
6681         {
6682           *delta = act_delta;
6683           return acost;
6684         }
6685       else
6686         iv_ca_delta_free (&act_delta);
6687     }
6688
6689   return orig_cost;
6690 }
6691
6692 /* Tries to extend the sets IVS in the best possible way in order
6693    to express the USE.  If ORIGINALP is true, prefer candidates from
6694    the original set of IVs, otherwise favor important candidates not
6695    based on any memory object.  */
6696
6697 static bool
6698 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6699                   struct iv_use *use, bool originalp)
6700 {
6701   comp_cost best_cost, act_cost;
6702   unsigned i;
6703   bitmap_iterator bi;
6704   struct iv_cand *cand;
6705   struct iv_ca_delta *best_delta = NULL, *act_delta;
6706   struct cost_pair *cp;
6707
6708   iv_ca_add_use (data, ivs, use);
6709   best_cost = iv_ca_cost (ivs);
6710   cp = iv_ca_cand_for_use (ivs, use);
6711   if (cp)
6712     {
6713       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6714       iv_ca_set_no_cp (data, ivs, use);
6715     }
6716
6717   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6718      first try important candidates not based on any memory object.  Only if
6719      this fails, try the specific ones.  Rationale -- in loops with many
6720      variables the best choice often is to use just one generic biv.  If we
6721      added here many ivs specific to the uses, the optimization algorithm later
6722      would be likely to get stuck in a local minimum, thus causing us to create
6723      too many ivs.  The approach from few ivs to more seems more likely to be
6724      successful -- starting from few ivs, replacing an expensive use by a
6725      specific iv should always be a win.  */
6726   EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6727     {
6728       cand = iv_cand (data, i);
6729
6730       if (originalp && cand->pos !=IP_ORIGINAL)
6731         continue;
6732
6733       if (!originalp && cand->iv->base_object != NULL_TREE)
6734         continue;
6735
6736       if (iv_ca_cand_used_p (ivs, cand))
6737         continue;
6738
6739       cp = get_use_iv_cost (data, use, cand);
6740       if (!cp)
6741         continue;
6742
6743       iv_ca_set_cp (data, ivs, use, cp);
6744       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6745                                true);
6746       iv_ca_set_no_cp (data, ivs, use);
6747       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6748
6749       if (compare_costs (act_cost, best_cost) < 0)
6750         {
6751           best_cost = act_cost;
6752
6753           iv_ca_delta_free (&best_delta);
6754           best_delta = act_delta;
6755         }
6756       else
6757         iv_ca_delta_free (&act_delta);
6758     }
6759
6760   if (infinite_cost_p (best_cost))
6761     {
6762       for (i = 0; i < use->n_map_members; i++)
6763         {
6764           cp = use->cost_map + i;
6765           cand = cp->cand;
6766           if (!cand)
6767             continue;
6768
6769           /* Already tried this.  */
6770           if (cand->important)
6771             {
6772               if (originalp && cand->pos == IP_ORIGINAL)
6773                 continue;
6774               if (!originalp && cand->iv->base_object == NULL_TREE)
6775                 continue;
6776             }
6777
6778           if (iv_ca_cand_used_p (ivs, cand))
6779             continue;
6780
6781           act_delta = NULL;
6782           iv_ca_set_cp (data, ivs, use, cp);
6783           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6784           iv_ca_set_no_cp (data, ivs, use);
6785           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6786                                        cp, act_delta);
6787
6788           if (compare_costs (act_cost, best_cost) < 0)
6789             {
6790               best_cost = act_cost;
6791
6792               if (best_delta)
6793                 iv_ca_delta_free (&best_delta);
6794               best_delta = act_delta;
6795             }
6796           else
6797             iv_ca_delta_free (&act_delta);
6798         }
6799     }
6800
6801   iv_ca_delta_commit (data, ivs, best_delta, true);
6802   iv_ca_delta_free (&best_delta);
6803
6804   return !infinite_cost_p (best_cost);
6805 }
6806
6807 /* Finds an initial assignment of candidates to uses.  */
6808
6809 static struct iv_ca *
6810 get_initial_solution (struct ivopts_data *data, bool originalp)
6811 {
6812   struct iv_ca *ivs = iv_ca_new (data);
6813   unsigned i;
6814
6815   for (i = 0; i < n_iv_uses (data); i++)
6816     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6817       {
6818         iv_ca_free (&ivs);
6819         return NULL;
6820       }
6821
6822   return ivs;
6823 }
6824
6825 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6826    points to a bool variable, this function tries to break local
6827    optimal fixed-point by replacing candidates in IVS if it's true.  */
6828
6829 static bool
6830 try_improve_iv_set (struct ivopts_data *data,
6831                     struct iv_ca *ivs, bool *try_replace_p)
6832 {
6833   unsigned i, n_ivs;
6834   comp_cost acost, best_cost = iv_ca_cost (ivs);
6835   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6836   struct iv_cand *cand;
6837
6838   /* Try extending the set of induction variables by one.  */
6839   for (i = 0; i < n_iv_cands (data); i++)
6840     {
6841       cand = iv_cand (data, i);
6842
6843       if (iv_ca_cand_used_p (ivs, cand))
6844         continue;
6845
6846       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6847       if (!act_delta)
6848         continue;
6849
6850       /* If we successfully added the candidate and the set is small enough,
6851          try optimizing it by removing other candidates.  */
6852       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6853         {
6854           iv_ca_delta_commit (data, ivs, act_delta, true);
6855           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6856           iv_ca_delta_commit (data, ivs, act_delta, false);
6857           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6858         }
6859
6860       if (compare_costs (acost, best_cost) < 0)
6861         {
6862           best_cost = acost;
6863           iv_ca_delta_free (&best_delta);
6864           best_delta = act_delta;
6865         }
6866       else
6867         iv_ca_delta_free (&act_delta);
6868     }
6869
6870   if (!best_delta)
6871     {
6872       /* Try removing the candidates from the set instead.  */
6873       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6874
6875       if (!best_delta && *try_replace_p)
6876         {
6877           *try_replace_p = false;
6878           /* So far candidate selecting algorithm tends to choose fewer IVs
6879              so that it can handle cases in which loops have many variables
6880              but the best choice is often to use only one general biv.  One
6881              weakness is it can't handle opposite cases, in which different
6882              candidates should be chosen with respect to each use.  To solve
6883              the problem, we replace candidates in a manner described by the
6884              comments of iv_ca_replace, thus give general algorithm a chance
6885              to break local optimal fixed-point in these cases.  */
6886           best_cost = iv_ca_replace (data, ivs, &best_delta);
6887         }
6888
6889       if (!best_delta)
6890         return false;
6891     }
6892
6893   iv_ca_delta_commit (data, ivs, best_delta, true);
6894   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6895   iv_ca_delta_free (&best_delta);
6896   return true;
6897 }
6898
6899 /* Attempts to find the optimal set of induction variables.  We do simple
6900    greedy heuristic -- we try to replace at most one candidate in the selected
6901    solution and remove the unused ivs while this improves the cost.  */
6902
6903 static struct iv_ca *
6904 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6905 {
6906   struct iv_ca *set;
6907   bool try_replace_p = true;
6908
6909   /* Get the initial solution.  */
6910   set = get_initial_solution (data, originalp);
6911   if (!set)
6912     {
6913       if (dump_file && (dump_flags & TDF_DETAILS))
6914         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6915       return NULL;
6916     }
6917
6918   if (dump_file && (dump_flags & TDF_DETAILS))
6919     {
6920       fprintf (dump_file, "Initial set of candidates:\n");
6921       iv_ca_dump (data, dump_file, set);
6922     }
6923
6924   while (try_improve_iv_set (data, set, &try_replace_p))
6925     {
6926       if (dump_file && (dump_flags & TDF_DETAILS))
6927         {
6928           fprintf (dump_file, "Improved to:\n");
6929           iv_ca_dump (data, dump_file, set);
6930         }
6931     }
6932
6933   return set;
6934 }
6935
6936 static struct iv_ca *
6937 find_optimal_iv_set (struct ivopts_data *data)
6938 {
6939   unsigned i;
6940   struct iv_ca *set, *origset;
6941   struct iv_use *use;
6942   comp_cost cost, origcost;
6943
6944   /* Determine the cost based on a strategy that starts with original IVs,
6945      and try again using a strategy that prefers candidates not based
6946      on any IVs.  */
6947   origset = find_optimal_iv_set_1 (data, true);
6948   set = find_optimal_iv_set_1 (data, false);
6949
6950   if (!origset && !set)
6951     return NULL;
6952
6953   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6954   cost = set ? iv_ca_cost (set) : infinite_cost;
6955
6956   if (dump_file && (dump_flags & TDF_DETAILS))
6957     {
6958       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6959                origcost.cost, origcost.complexity);
6960       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6961                cost.cost, cost.complexity);
6962     }
6963
6964   /* Choose the one with the best cost.  */
6965   if (compare_costs (origcost, cost) <= 0)
6966     {
6967       if (set)
6968         iv_ca_free (&set);
6969       set = origset;
6970     }
6971   else if (origset)
6972     iv_ca_free (&origset);
6973
6974   for (i = 0; i < n_iv_uses (data); i++)
6975     {
6976       use = iv_use (data, i);
6977       use->selected = iv_ca_cand_for_use (set, use)->cand;
6978     }
6979
6980   return set;
6981 }
6982
6983 /* Creates a new induction variable corresponding to CAND.  */
6984
6985 static void
6986 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6987 {
6988   gimple_stmt_iterator incr_pos;
6989   tree base;
6990   bool after = false;
6991
6992   if (!cand->iv)
6993     return;
6994
6995   switch (cand->pos)
6996     {
6997     case IP_NORMAL:
6998       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6999       break;
7000
7001     case IP_END:
7002       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7003       after = true;
7004       break;
7005
7006     case IP_AFTER_USE:
7007       after = true;
7008       /* fall through */
7009     case IP_BEFORE_USE:
7010       incr_pos = gsi_for_stmt (cand->incremented_at);
7011       break;
7012
7013     case IP_ORIGINAL:
7014       /* Mark that the iv is preserved.  */
7015       name_info (data, cand->var_before)->preserve_biv = true;
7016       name_info (data, cand->var_after)->preserve_biv = true;
7017
7018       /* Rewrite the increment so that it uses var_before directly.  */
7019       find_interesting_uses_op (data, cand->var_after)->selected = cand;
7020       return;
7021     }
7022
7023   gimple_add_tmp_var (cand->var_before);
7024
7025   base = unshare_expr (cand->iv->base);
7026
7027   create_iv (base, unshare_expr (cand->iv->step),
7028              cand->var_before, data->current_loop,
7029              &incr_pos, after, &cand->var_before, &cand->var_after);
7030 }
7031
7032 /* Creates new induction variables described in SET.  */
7033
7034 static void
7035 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7036 {
7037   unsigned i;
7038   struct iv_cand *cand;
7039   bitmap_iterator bi;
7040
7041   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7042     {
7043       cand = iv_cand (data, i);
7044       create_new_iv (data, cand);
7045     }
7046
7047   if (dump_file && (dump_flags & TDF_DETAILS))
7048     {
7049       fprintf (dump_file, "Selected IV set for loop %d",
7050                data->current_loop->num);
7051       if (data->loop_loc != UNKNOWN_LOCATION)
7052         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7053                  LOCATION_LINE (data->loop_loc));
7054       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7055       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7056         {
7057           cand = iv_cand (data, i);
7058           dump_cand (dump_file, cand);
7059         }
7060       fprintf (dump_file, "\n");
7061     }
7062 }
7063
7064 /* Rewrites USE (definition of iv used in a nonlinear expression)
7065    using candidate CAND.  */
7066
7067 static void
7068 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7069                             struct iv_use *use, struct iv_cand *cand)
7070 {
7071   tree comp;
7072   tree op, tgt;
7073   gassign *ass;
7074   gimple_stmt_iterator bsi;
7075
7076   /* An important special case -- if we are asked to express value of
7077      the original iv by itself, just exit; there is no need to
7078      introduce a new computation (that might also need casting the
7079      variable to unsigned and back).  */
7080   if (cand->pos == IP_ORIGINAL
7081       && cand->incremented_at == use->stmt)
7082     {
7083       enum tree_code stmt_code;
7084
7085       gcc_assert (is_gimple_assign (use->stmt));
7086       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7087
7088       /* Check whether we may leave the computation unchanged.
7089          This is the case only if it does not rely on other
7090          computations in the loop -- otherwise, the computation
7091          we rely upon may be removed in remove_unused_ivs,
7092          thus leading to ICE.  */
7093       stmt_code = gimple_assign_rhs_code (use->stmt);
7094       if (stmt_code == PLUS_EXPR
7095           || stmt_code == MINUS_EXPR
7096           || stmt_code == POINTER_PLUS_EXPR)
7097         {
7098           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7099             op = gimple_assign_rhs2 (use->stmt);
7100           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7101             op = gimple_assign_rhs1 (use->stmt);
7102           else
7103             op = NULL_TREE;
7104         }
7105       else
7106         op = NULL_TREE;
7107
7108       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7109         return;
7110     }
7111
7112   comp = get_computation (data->current_loop, use, cand);
7113   gcc_assert (comp != NULL_TREE);
7114
7115   switch (gimple_code (use->stmt))
7116     {
7117     case GIMPLE_PHI:
7118       tgt = PHI_RESULT (use->stmt);
7119
7120       /* If we should keep the biv, do not replace it.  */
7121       if (name_info (data, tgt)->preserve_biv)
7122         return;
7123
7124       bsi = gsi_after_labels (gimple_bb (use->stmt));
7125       break;
7126
7127     case GIMPLE_ASSIGN:
7128       tgt = gimple_assign_lhs (use->stmt);
7129       bsi = gsi_for_stmt (use->stmt);
7130       break;
7131
7132     default:
7133       gcc_unreachable ();
7134     }
7135
7136   if (!valid_gimple_rhs_p (comp)
7137       || (gimple_code (use->stmt) != GIMPLE_PHI
7138           /* We can't allow re-allocating the stmt as it might be pointed
7139              to still.  */
7140           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7141               >= gimple_num_ops (gsi_stmt (bsi)))))
7142     {
7143       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7144                                        true, GSI_SAME_STMT);
7145       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7146         {
7147           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7148           /* As this isn't a plain copy we have to reset alignment
7149              information.  */
7150           if (SSA_NAME_PTR_INFO (comp))
7151             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7152         }
7153     }
7154
7155   if (gimple_code (use->stmt) == GIMPLE_PHI)
7156     {
7157       ass = gimple_build_assign (tgt, comp);
7158       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7159
7160       bsi = gsi_for_stmt (use->stmt);
7161       remove_phi_node (&bsi, false);
7162     }
7163   else
7164     {
7165       gimple_assign_set_rhs_from_tree (&bsi, comp);
7166       use->stmt = gsi_stmt (bsi);
7167     }
7168 }
7169
7170 /* Performs a peephole optimization to reorder the iv update statement with
7171    a mem ref to enable instruction combining in later phases. The mem ref uses
7172    the iv value before the update, so the reordering transformation requires
7173    adjustment of the offset. CAND is the selected IV_CAND.
7174
7175    Example:
7176
7177    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7178    iv2 = iv1 + 1;
7179
7180    if (t < val)      (1)
7181      goto L;
7182    goto Head;
7183
7184
7185    directly propagating t over to (1) will introduce overlapping live range
7186    thus increase register pressure. This peephole transform it into:
7187
7188
7189    iv2 = iv1 + 1;
7190    t = MEM_REF (base, iv2, 8, 8);
7191    if (t < val)
7192      goto L;
7193    goto Head;
7194 */
7195
7196 static void
7197 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7198 {
7199   tree var_after;
7200   gimple *iv_update, *stmt;
7201   basic_block bb;
7202   gimple_stmt_iterator gsi, gsi_iv;
7203
7204   if (cand->pos != IP_NORMAL)
7205     return;
7206
7207   var_after = cand->var_after;
7208   iv_update = SSA_NAME_DEF_STMT (var_after);
7209
7210   bb = gimple_bb (iv_update);
7211   gsi = gsi_last_nondebug_bb (bb);
7212   stmt = gsi_stmt (gsi);
7213
7214   /* Only handle conditional statement for now.  */
7215   if (gimple_code (stmt) != GIMPLE_COND)
7216     return;
7217
7218   gsi_prev_nondebug (&gsi);
7219   stmt = gsi_stmt (gsi);
7220   if (stmt != iv_update)
7221     return;
7222
7223   gsi_prev_nondebug (&gsi);
7224   if (gsi_end_p (gsi))
7225     return;
7226
7227   stmt = gsi_stmt (gsi);
7228   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7229     return;
7230
7231   if (stmt != use->stmt)
7232     return;
7233
7234   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7235     return;
7236
7237   if (dump_file && (dump_flags & TDF_DETAILS))
7238     {
7239       fprintf (dump_file, "Reordering \n");
7240       print_gimple_stmt (dump_file, iv_update, 0, 0);
7241       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7242       fprintf (dump_file, "\n");
7243     }
7244
7245   gsi = gsi_for_stmt (use->stmt);
7246   gsi_iv = gsi_for_stmt (iv_update);
7247   gsi_move_before (&gsi_iv, &gsi);
7248
7249   cand->pos = IP_BEFORE_USE;
7250   cand->incremented_at = use->stmt;
7251 }
7252
7253 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7254
7255 static void
7256 rewrite_use_address_1 (struct ivopts_data *data,
7257                        struct iv_use *use, struct iv_cand *cand)
7258 {
7259   aff_tree aff;
7260   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7261   tree base_hint = NULL_TREE;
7262   tree ref, iv;
7263   bool ok;
7264
7265   adjust_iv_update_pos (cand, use);
7266   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7267   gcc_assert (ok);
7268   unshare_aff_combination (&aff);
7269
7270   /* To avoid undefined overflow problems, all IV candidates use unsigned
7271      integer types.  The drawback is that this makes it impossible for
7272      create_mem_ref to distinguish an IV that is based on a memory object
7273      from one that represents simply an offset.
7274
7275      To work around this problem, we pass a hint to create_mem_ref that
7276      indicates which variable (if any) in aff is an IV based on a memory
7277      object.  Note that we only consider the candidate.  If this is not
7278      based on an object, the base of the reference is in some subexpression
7279      of the use -- but these will use pointer types, so they are recognized
7280      by the create_mem_ref heuristics anyway.  */
7281   if (cand->iv->base_object)
7282     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7283
7284   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7285   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7286                         reference_alias_ptr_type (*use->op_p),
7287                         iv, base_hint, data->speed);
7288   copy_ref_info (ref, *use->op_p);
7289   *use->op_p = ref;
7290 }
7291
7292 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
7293    first use of a group, rewrites sub uses in the group too.  */
7294
7295 static void
7296 rewrite_use_address (struct ivopts_data *data,
7297                       struct iv_use *use, struct iv_cand *cand)
7298 {
7299   struct iv_use *next;
7300
7301   gcc_assert (use->sub_id == 0);
7302   rewrite_use_address_1 (data, use, cand);
7303   update_stmt (use->stmt);
7304
7305   for (next = use->next; next != NULL; next = next->next)
7306     {
7307       rewrite_use_address_1 (data, next, cand);
7308       update_stmt (next->stmt);
7309     }
7310
7311   return;
7312 }
7313
7314 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7315    candidate CAND.  */
7316
7317 static void
7318 rewrite_use_compare (struct ivopts_data *data,
7319                      struct iv_use *use, struct iv_cand *cand)
7320 {
7321   tree comp, *var_p, op, bound;
7322   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7323   enum tree_code compare;
7324   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7325   bool ok;
7326
7327   bound = cp->value;
7328   if (bound)
7329     {
7330       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7331       tree var_type = TREE_TYPE (var);
7332       gimple_seq stmts;
7333
7334       if (dump_file && (dump_flags & TDF_DETAILS))
7335         {
7336           fprintf (dump_file, "Replacing exit test: ");
7337           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7338         }
7339       compare = cp->comp;
7340       bound = unshare_expr (fold_convert (var_type, bound));
7341       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7342       if (stmts)
7343         gsi_insert_seq_on_edge_immediate (
7344                 loop_preheader_edge (data->current_loop),
7345                 stmts);
7346
7347       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7348       gimple_cond_set_lhs (cond_stmt, var);
7349       gimple_cond_set_code (cond_stmt, compare);
7350       gimple_cond_set_rhs (cond_stmt, op);
7351       return;
7352     }
7353
7354   /* The induction variable elimination failed; just express the original
7355      giv.  */
7356   comp = get_computation (data->current_loop, use, cand);
7357   gcc_assert (comp != NULL_TREE);
7358
7359   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7360   gcc_assert (ok);
7361
7362   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7363                                      true, GSI_SAME_STMT);
7364 }
7365
7366 /* Rewrites USE using candidate CAND.  */
7367
7368 static void
7369 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7370 {
7371   switch (use->type)
7372     {
7373       case USE_NONLINEAR_EXPR:
7374         rewrite_use_nonlinear_expr (data, use, cand);
7375         break;
7376
7377       case USE_ADDRESS:
7378         rewrite_use_address (data, use, cand);
7379         break;
7380
7381       case USE_COMPARE:
7382         rewrite_use_compare (data, use, cand);
7383         break;
7384
7385       default:
7386         gcc_unreachable ();
7387     }
7388
7389   update_stmt (use->stmt);
7390 }
7391
7392 /* Rewrite the uses using the selected induction variables.  */
7393
7394 static void
7395 rewrite_uses (struct ivopts_data *data)
7396 {
7397   unsigned i;
7398   struct iv_cand *cand;
7399   struct iv_use *use;
7400
7401   for (i = 0; i < n_iv_uses (data); i++)
7402     {
7403       use = iv_use (data, i);
7404       cand = use->selected;
7405       gcc_assert (cand);
7406
7407       rewrite_use (data, use, cand);
7408     }
7409 }
7410
7411 /* Removes the ivs that are not used after rewriting.  */
7412
7413 static void
7414 remove_unused_ivs (struct ivopts_data *data)
7415 {
7416   unsigned j;
7417   bitmap_iterator bi;
7418   bitmap toremove = BITMAP_ALLOC (NULL);
7419
7420   /* Figure out an order in which to release SSA DEFs so that we don't
7421      release something that we'd have to propagate into a debug stmt
7422      afterwards.  */
7423   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7424     {
7425       struct version_info *info;
7426
7427       info = ver_info (data, j);
7428       if (info->iv
7429           && !integer_zerop (info->iv->step)
7430           && !info->inv_id
7431           && !info->iv->have_use_for
7432           && !info->preserve_biv)
7433         {
7434           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7435
7436           tree def = info->iv->ssa_name;
7437
7438           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7439             {
7440               imm_use_iterator imm_iter;
7441               use_operand_p use_p;
7442               gimple *stmt;
7443               int count = 0;
7444
7445               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7446                 {
7447                   if (!gimple_debug_bind_p (stmt))
7448                     continue;
7449
7450                   /* We just want to determine whether to do nothing
7451                      (count == 0), to substitute the computed
7452                      expression into a single use of the SSA DEF by
7453                      itself (count == 1), or to use a debug temp
7454                      because the SSA DEF is used multiple times or as
7455                      part of a larger expression (count > 1). */
7456                   count++;
7457                   if (gimple_debug_bind_get_value (stmt) != def)
7458                     count++;
7459
7460                   if (count > 1)
7461                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7462                 }
7463
7464               if (!count)
7465                 continue;
7466
7467               struct iv_use dummy_use;
7468               struct iv_cand *best_cand = NULL, *cand;
7469               unsigned i, best_pref = 0, cand_pref;
7470
7471               memset (&dummy_use, 0, sizeof (dummy_use));
7472               dummy_use.iv = info->iv;
7473               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7474                 {
7475                   cand = iv_use (data, i)->selected;
7476                   if (cand == best_cand)
7477                     continue;
7478                   cand_pref = operand_equal_p (cand->iv->step,
7479                                                info->iv->step, 0)
7480                     ? 4 : 0;
7481                   cand_pref
7482                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7483                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7484                     ? 2 : 0;
7485                   cand_pref
7486                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7487                     ? 1 : 0;
7488                   if (best_cand == NULL || best_pref < cand_pref)
7489                     {
7490                       best_cand = cand;
7491                       best_pref = cand_pref;
7492                     }
7493                 }
7494
7495               if (!best_cand)
7496                 continue;
7497
7498               tree comp = get_computation_at (data->current_loop,
7499                                               &dummy_use, best_cand,
7500                                               SSA_NAME_DEF_STMT (def));
7501               if (!comp)
7502                 continue;
7503
7504               if (count > 1)
7505                 {
7506                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7507                   DECL_ARTIFICIAL (vexpr) = 1;
7508                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7509                   if (SSA_NAME_VAR (def))
7510                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7511                   else
7512                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7513                   gdebug *def_temp
7514                     = gimple_build_debug_bind (vexpr, comp, NULL);
7515                   gimple_stmt_iterator gsi;
7516
7517                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7518                     gsi = gsi_after_labels (gimple_bb
7519                                             (SSA_NAME_DEF_STMT (def)));
7520                   else
7521                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7522
7523                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7524                   comp = vexpr;
7525                 }
7526
7527               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7528                 {
7529                   if (!gimple_debug_bind_p (stmt))
7530                     continue;
7531
7532                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7533                     SET_USE (use_p, comp);
7534
7535                   update_stmt (stmt);
7536                 }
7537             }
7538         }
7539     }
7540
7541   release_defs_bitset (toremove);
7542
7543   BITMAP_FREE (toremove);
7544 }
7545
7546 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7547    for hash_map::traverse.  */
7548
7549 bool
7550 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7551 {
7552   free (value);
7553   return true;
7554 }
7555
7556 /* Frees data allocated by the optimization of a single loop.  */
7557
7558 static void
7559 free_loop_data (struct ivopts_data *data)
7560 {
7561   unsigned i, j;
7562   bitmap_iterator bi;
7563   tree obj;
7564
7565   if (data->niters)
7566     {
7567       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7568       delete data->niters;
7569       data->niters = NULL;
7570     }
7571
7572   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7573     {
7574       struct version_info *info;
7575
7576       info = ver_info (data, i);
7577       info->iv = NULL;
7578       info->has_nonlin_use = false;
7579       info->preserve_biv = false;
7580       info->inv_id = 0;
7581     }
7582   bitmap_clear (data->relevant);
7583   bitmap_clear (data->important_candidates);
7584
7585   for (i = 0; i < n_iv_uses (data); i++)
7586     {
7587       struct iv_use *use = iv_use (data, i);
7588       struct iv_use *pre = use, *sub = use->next;
7589
7590       while (sub)
7591         {
7592           gcc_assert (sub->related_cands == NULL);
7593           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7594
7595           pre = sub;
7596           sub = sub->next;
7597           free (pre);
7598         }
7599
7600       BITMAP_FREE (use->related_cands);
7601       for (j = 0; j < use->n_map_members; j++)
7602         if (use->cost_map[j].depends_on)
7603           BITMAP_FREE (use->cost_map[j].depends_on);
7604       free (use->cost_map);
7605       free (use);
7606     }
7607   data->iv_uses.truncate (0);
7608
7609   for (i = 0; i < n_iv_cands (data); i++)
7610     {
7611       struct iv_cand *cand = iv_cand (data, i);
7612
7613       if (cand->depends_on)
7614         BITMAP_FREE (cand->depends_on);
7615       free (cand);
7616     }
7617   data->iv_candidates.truncate (0);
7618
7619   if (data->version_info_size < num_ssa_names)
7620     {
7621       data->version_info_size = 2 * num_ssa_names;
7622       free (data->version_info);
7623       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7624     }
7625
7626   data->max_inv_id = 0;
7627
7628   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7629     SET_DECL_RTL (obj, NULL_RTX);
7630
7631   decl_rtl_to_reset.truncate (0);
7632
7633   data->inv_expr_tab->empty ();
7634   data->inv_expr_id = 0;
7635
7636   data->iv_common_cand_tab->empty ();
7637   data->iv_common_cands.truncate (0);
7638 }
7639
7640 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7641    loop tree.  */
7642
7643 static void
7644 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7645 {
7646   free_loop_data (data);
7647   free (data->version_info);
7648   BITMAP_FREE (data->relevant);
7649   BITMAP_FREE (data->important_candidates);
7650
7651   decl_rtl_to_reset.release ();
7652   data->iv_uses.release ();
7653   data->iv_candidates.release ();
7654   delete data->inv_expr_tab;
7655   data->inv_expr_tab = NULL;
7656   free_affine_expand_cache (&data->name_expansion_cache);
7657   delete data->iv_common_cand_tab;
7658   data->iv_common_cand_tab = NULL;
7659   data->iv_common_cands.release ();
7660   obstack_free (&data->iv_obstack, NULL);
7661 }
7662
7663 /* Returns true if the loop body BODY includes any function calls.  */
7664
7665 static bool
7666 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7667 {
7668   gimple_stmt_iterator gsi;
7669   unsigned i;
7670
7671   for (i = 0; i < num_nodes; i++)
7672     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7673       {
7674         gimple *stmt = gsi_stmt (gsi);
7675         if (is_gimple_call (stmt)
7676             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7677           return true;
7678       }
7679   return false;
7680 }
7681
7682 /* Optimizes the LOOP.  Returns true if anything changed.  */
7683
7684 static bool
7685 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7686 {
7687   bool changed = false;
7688   struct iv_ca *iv_ca;
7689   edge exit = single_dom_exit (loop);
7690   basic_block *body;
7691
7692   gcc_assert (!data->niters);
7693   data->current_loop = loop;
7694   data->loop_loc = find_loop_location (loop);
7695   data->speed = optimize_loop_for_speed_p (loop);
7696
7697   if (dump_file && (dump_flags & TDF_DETAILS))
7698     {
7699       fprintf (dump_file, "Processing loop %d", loop->num);
7700       if (data->loop_loc != UNKNOWN_LOCATION)
7701         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7702                  LOCATION_LINE (data->loop_loc));
7703       fprintf (dump_file, "\n");
7704
7705       if (exit)
7706         {
7707           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7708                    exit->src->index, exit->dest->index);
7709           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7710           fprintf (dump_file, "\n");
7711         }
7712
7713       fprintf (dump_file, "\n");
7714     }
7715
7716   body = get_loop_body (loop);
7717   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7718   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7719   free (body);
7720
7721   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7722
7723   /* For each ssa name determines whether it behaves as an induction variable
7724      in some loop.  */
7725   if (!find_induction_variables (data))
7726     goto finish;
7727
7728   /* Finds interesting uses (item 1).  */
7729   find_interesting_uses (data);
7730   group_address_uses (data);
7731   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7732     goto finish;
7733
7734   /* Finds candidates for the induction variables (item 2).  */
7735   find_iv_candidates (data);
7736
7737   /* Calculates the costs (item 3, part 1).  */
7738   determine_iv_costs (data);
7739   determine_use_iv_costs (data);
7740   determine_set_costs (data);
7741
7742   /* Find the optimal set of induction variables (item 3, part 2).  */
7743   iv_ca = find_optimal_iv_set (data);
7744   if (!iv_ca)
7745     goto finish;
7746   changed = true;
7747
7748   /* Create the new induction variables (item 4, part 1).  */
7749   create_new_ivs (data, iv_ca);
7750   iv_ca_free (&iv_ca);
7751
7752   /* Rewrite the uses (item 4, part 2).  */
7753   rewrite_uses (data);
7754
7755   /* Remove the ivs that are unused after rewriting.  */
7756   remove_unused_ivs (data);
7757
7758   /* We have changed the structure of induction variables; it might happen
7759      that definitions in the scev database refer to some of them that were
7760      eliminated.  */
7761   scev_reset ();
7762
7763 finish:
7764   free_loop_data (data);
7765
7766   return changed;
7767 }
7768
7769 /* Main entry point.  Optimizes induction variables in loops.  */
7770
7771 void
7772 tree_ssa_iv_optimize (void)
7773 {
7774   struct loop *loop;
7775   struct ivopts_data data;
7776
7777   tree_ssa_iv_optimize_init (&data);
7778
7779   /* Optimize the loops starting with the innermost ones.  */
7780   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7781     {
7782       if (dump_file && (dump_flags & TDF_DETAILS))
7783         flow_loop_dump (loop, dump_file, NULL, 1);
7784
7785       tree_ssa_iv_optimize_loop (&data, loop);
7786     }
7787
7788   tree_ssa_iv_optimize_finalize (&data);
7789 }