gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "tree-flow.h"
  74 #include "cfgloop.h"
  75 #include "tree-pass.h"
  76 #include "ggc.h"
  77 #include "insn-config.h"
  78 #include "pointer-set.h"
  79 #include "hashtab.h"
  80 #include "tree-chrec.h"
  81 #include "tree-scalar-evolution.h"
  82 #include "cfgloop.h"
  83 #include "params.h"
  84 #include "langhooks.h"
  85 #include "tree-affine.h"
  86 #include "target.h"
  87 #include "tree-inline.h"
  88 #include "tree-ssa-propagate.h"
  89 #include "expmed.h"
  90
  91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  92    cost of different addressing modes.  This should be moved to a TBD
  93    interface between the GIMPLE and RTL worlds.  */
  94 #include "expr.h"
  95 #include "recog.h"
  96
  97 /* The infinite cost.  */
  98 #define INFTY 10000000
  99
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102 /* Returns the expected number of loop iterations for LOOP.
 103    The average trip count is computed from profile data if it
 104    exists. */
 105
 106 static inline HOST_WIDE_INT
 107 avg_loop_niter (struct loop *loop)
 108 {
 109   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 110   if (niter == -1)
 111     return AVG_LOOP_NITER (loop);
 112
 113   return niter;
 114 }
 115
 116 /* Representation of the induction variable.  */
 117 struct iv
 118 {
 119   tree base;            /* Initial value of the iv.  */
 120   tree base_object;     /* A memory object to that the induction variable points.  */
 121   tree step;            /* Step of the iv (constant only).  */
 122   tree ssa_name;        /* The ssa name with the value.  */
 123   bool biv_p;           /* Is it a biv?  */
 124   bool have_use_for;    /* Do we already have a use for it?  */
 125   unsigned use_id;      /* The identifier in the use if it is the case.  */
 126 };
 127
 128 /* Per-ssa version information (induction variable descriptions, etc.).  */
 129 struct version_info
 130 {
 131   tree name;            /* The ssa name.  */
 132   struct iv *iv;        /* Induction variable description.  */
 133   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 134                            an expression that is not an induction variable.  */
 135   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 136   unsigned inv_id;      /* Id of an invariant.  */
 137 };
 138
 139 /* Types of uses.  */
 140 enum use_type
 141 {
 142   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 143   USE_ADDRESS,          /* Use in an address.  */
 144   USE_COMPARE           /* Use is a compare.  */
 145 };
 146
 147 /* Cost of a computation.  */
 148 typedef struct
 149 {
 150   int cost;             /* The runtime cost.  */
 151   unsigned complexity;  /* The estimate of the complexity of the code for
 152                            the computation (in no concrete units --
 153                            complexity field should be larger for more
 154                            complex expressions and addressing modes).  */
 155 } comp_cost;
 156
 157 static const comp_cost no_cost = {0, 0};
 158 static const comp_cost infinite_cost = {INFTY, INFTY};
 159
 160 /* The candidate - cost pair.  */
 161 struct cost_pair
 162 {
 163   struct iv_cand *cand; /* The candidate.  */
 164   comp_cost cost;       /* The cost.  */
 165   bitmap depends_on;    /* The list of invariants that have to be
 166                            preserved.  */
 167   tree value;           /* For final value elimination, the expression for
 168                            the final value of the iv.  For iv elimination,
 169                            the new bound to compare with.  */
 170   enum tree_code comp;  /* For iv elimination, the comparison.  */
 171   int inv_expr_id;      /* Loop invariant expression id.  */
 172 };
 173
 174 /* Use.  */
 175 struct iv_use
 176 {
 177   unsigned id;          /* The id of the use.  */
 178   enum use_type type;   /* Type of the use.  */
 179   struct iv *iv;        /* The induction variable it is based on.  */
 180   gimple stmt;          /* Statement in that it occurs.  */
 181   tree *op_p;           /* The place where it occurs.  */
 182   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 183                            important ones.  */
 184
 185   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 186   struct cost_pair *cost_map;
 187                         /* The costs wrto the iv candidates.  */
 188
 189   struct iv_cand *selected;
 190                         /* The selected candidate.  */
 191 };
 192
 193 /* The position where the iv is computed.  */
 194 enum iv_position
 195 {
 196   IP_NORMAL,            /* At the end, just before the exit condition.  */
 197   IP_END,               /* At the end of the latch block.  */
 198   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 199   IP_AFTER_USE,         /* Immediately after a specific use.  */
 200   IP_ORIGINAL           /* The original biv.  */
 201 };
 202
 203 /* The induction variable candidate.  */
 204 struct iv_cand
 205 {
 206   unsigned id;          /* The number of the candidate.  */
 207   bool important;       /* Whether this is an "important" candidate, i.e. such
 208                            that it should be considered by all uses.  */
 209   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 210   gimple incremented_at;/* For original biv, the statement where it is
 211                            incremented.  */
 212   tree var_before;      /* The variable used for it before increment.  */
 213   tree var_after;       /* The variable used for it after increment.  */
 214   struct iv *iv;        /* The value of the candidate.  NULL for
 215                            "pseudocandidate" used to indicate the possibility
 216                            to replace the final value of an iv by direct
 217                            computation of the value.  */
 218   unsigned cost;        /* Cost of the candidate.  */
 219   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 220   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 221                               where it is incremented.  */
 222   bitmap depends_on;    /* The list of invariants that are used in step of the
 223                            biv.  */
 224 };
 225
 226 /* Loop invariant expression hashtable entry.  */
 227 struct iv_inv_expr_ent
 228 {
 229   tree expr;
 230   int id;
 231   hashval_t hash;
 232 };
 233
 234 /* The data used by the induction variable optimizations.  */
 235
 236 typedef struct iv_use *iv_use_p;
 237 DEF_VEC_P(iv_use_p);
 238 DEF_VEC_ALLOC_P(iv_use_p,heap);
 239
 240 typedef struct iv_cand *iv_cand_p;
 241 DEF_VEC_P(iv_cand_p);
 242 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 243
 244 struct ivopts_data
 245 {
 246   /* The currently optimized loop.  */
 247   struct loop *current_loop;
 248
 249   /* Numbers of iterations for all exits of the current loop.  */
 250   struct pointer_map_t *niters;
 251
 252   /* Number of registers used in it.  */
 253   unsigned regs_used;
 254
 255   /* The size of version_info array allocated.  */
 256   unsigned version_info_size;
 257
 258   /* The array of information for the ssa names.  */
 259   struct version_info *version_info;
 260
 261   /* The hashtable of loop invariant expressions created
 262      by ivopt.  */
 263   htab_t inv_expr_tab;
 264
 265   /* Loop invariant expression id.  */
 266   int inv_expr_id;
 267
 268   /* The bitmap of indices in version_info whose value was changed.  */
 269   bitmap relevant;
 270
 271   /* The uses of induction variables.  */
 272   VEC(iv_use_p,heap) *iv_uses;
 273
 274   /* The candidates.  */
 275   VEC(iv_cand_p,heap) *iv_candidates;
 276
 277   /* A bitmap of important candidates.  */
 278   bitmap important_candidates;
 279
 280   /* The maximum invariant id.  */
 281   unsigned max_inv_id;
 282
 283   /* Whether to consider just related and important candidates when replacing a
 284      use.  */
 285   bool consider_all_candidates;
 286
 287   /* Are we optimizing for speed?  */
 288   bool speed;
 289
 290   /* Whether the loop body includes any function calls.  */
 291   bool body_includes_call;
 292
 293   /* Whether the loop body can only be exited via single exit.  */
 294   bool loop_single_exit_p;
 295 };
 296
 297 /* An assignment of iv candidates to uses.  */
 298
 299 struct iv_ca
 300 {
 301   /* The number of uses covered by the assignment.  */
 302   unsigned upto;
 303
 304   /* Number of uses that cannot be expressed by the candidates in the set.  */
 305   unsigned bad_uses;
 306
 307   /* Candidate assigned to a use, together with the related costs.  */
 308   struct cost_pair **cand_for_use;
 309
 310   /* Number of times each candidate is used.  */
 311   unsigned *n_cand_uses;
 312
 313   /* The candidates used.  */
 314   bitmap cands;
 315
 316   /* The number of candidates in the set.  */
 317   unsigned n_cands;
 318
 319   /* Total number of registers needed.  */
 320   unsigned n_regs;
 321
 322   /* Total cost of expressing uses.  */
 323   comp_cost cand_use_cost;
 324
 325   /* Total cost of candidates.  */
 326   unsigned cand_cost;
 327
 328   /* Number of times each invariant is used.  */
 329   unsigned *n_invariant_uses;
 330
 331   /* The array holding the number of uses of each loop
 332      invariant expressions created by ivopt.  */
 333   unsigned *used_inv_expr;
 334
 335   /* The number of created loop invariants.  */
 336   unsigned num_used_inv_expr;
 337
 338   /* Total cost of the assignment.  */
 339   comp_cost cost;
 340 };
 341
 342 /* Difference of two iv candidate assignments.  */
 343
 344 struct iv_ca_delta
 345 {
 346   /* Changed use.  */
 347   struct iv_use *use;
 348
 349   /* An old assignment (for rollback purposes).  */
 350   struct cost_pair *old_cp;
 351
 352   /* A new assignment.  */
 353   struct cost_pair *new_cp;
 354
 355   /* Next change in the list.  */
 356   struct iv_ca_delta *next_change;
 357 };
 358
 359 /* Bound on number of candidates below that all candidates are considered.  */
 360
 361 #define CONSIDER_ALL_CANDIDATES_BOUND \
 362   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 363
 364 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 365    optimizing such a loop would help, and it would take ages).  */
 366
 367 #define MAX_CONSIDERED_USES \
 368   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 369
 370 /* If there are at most this number of ivs in the set, try removing unnecessary
 371    ivs from the set always.  */
 372
 373 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 374   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 375
 376 /* The list of trees for that the decl_rtl field must be reset is stored
 377    here.  */
 378
 379 static VEC(tree,heap) *decl_rtl_to_reset;
 380
 381 static comp_cost force_expr_to_var_cost (tree, bool);
 382
 383 /* Number of uses recorded in DATA.  */
 384
 385 static inline unsigned
 386 n_iv_uses (struct ivopts_data *data)
 387 {
 388   return VEC_length (iv_use_p, data->iv_uses);
 389 }
 390
 391 /* Ith use recorded in DATA.  */
 392
 393 static inline struct iv_use *
 394 iv_use (struct ivopts_data *data, unsigned i)
 395 {
 396   return VEC_index (iv_use_p, data->iv_uses, i);
 397 }
 398
 399 /* Number of candidates recorded in DATA.  */
 400
 401 static inline unsigned
 402 n_iv_cands (struct ivopts_data *data)
 403 {
 404   return VEC_length (iv_cand_p, data->iv_candidates);
 405 }
 406
 407 /* Ith candidate recorded in DATA.  */
 408
 409 static inline struct iv_cand *
 410 iv_cand (struct ivopts_data *data, unsigned i)
 411 {
 412   return VEC_index (iv_cand_p, data->iv_candidates, i);
 413 }
 414
 415 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 416
 417 edge
 418 single_dom_exit (struct loop *loop)
 419 {
 420   edge exit = single_exit (loop);
 421
 422   if (!exit)
 423     return NULL;
 424
 425   if (!just_once_each_iteration_p (loop, exit->src))
 426     return NULL;
 427
 428   return exit;
 429 }
 430
 431 /* Dumps information about the induction variable IV to FILE.  */
 432
 433 extern void dump_iv (FILE *, struct iv *);
 434 void
 435 dump_iv (FILE *file, struct iv *iv)
 436 {
 437   if (iv->ssa_name)
 438     {
 439       fprintf (file, "ssa name ");
 440       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 441       fprintf (file, "\n");
 442     }
 443
 444   fprintf (file, "  type ");
 445   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 446   fprintf (file, "\n");
 447
 448   if (iv->step)
 449     {
 450       fprintf (file, "  base ");
 451       print_generic_expr (file, iv->base, TDF_SLIM);
 452       fprintf (file, "\n");
 453
 454       fprintf (file, "  step ");
 455       print_generic_expr (file, iv->step, TDF_SLIM);
 456       fprintf (file, "\n");
 457     }
 458   else
 459     {
 460       fprintf (file, "  invariant ");
 461       print_generic_expr (file, iv->base, TDF_SLIM);
 462       fprintf (file, "\n");
 463     }
 464
 465   if (iv->base_object)
 466     {
 467       fprintf (file, "  base object ");
 468       print_generic_expr (file, iv->base_object, TDF_SLIM);
 469       fprintf (file, "\n");
 470     }
 471
 472   if (iv->biv_p)
 473     fprintf (file, "  is a biv\n");
 474 }
 475
 476 /* Dumps information about the USE to FILE.  */
 477
 478 extern void dump_use (FILE *, struct iv_use *);
 479 void
 480 dump_use (FILE *file, struct iv_use *use)
 481 {
 482   fprintf (file, "use %d\n", use->id);
 483
 484   switch (use->type)
 485     {
 486     case USE_NONLINEAR_EXPR:
 487       fprintf (file, "  generic\n");
 488       break;
 489
 490     case USE_ADDRESS:
 491       fprintf (file, "  address\n");
 492       break;
 493
 494     case USE_COMPARE:
 495       fprintf (file, "  compare\n");
 496       break;
 497
 498     default:
 499       gcc_unreachable ();
 500     }
 501
 502   fprintf (file, "  in statement ");
 503   print_gimple_stmt (file, use->stmt, 0, 0);
 504   fprintf (file, "\n");
 505
 506   fprintf (file, "  at position ");
 507   if (use->op_p)
 508     print_generic_expr (file, *use->op_p, TDF_SLIM);
 509   fprintf (file, "\n");
 510
 511   dump_iv (file, use->iv);
 512
 513   if (use->related_cands)
 514     {
 515       fprintf (file, "  related candidates ");
 516       dump_bitmap (file, use->related_cands);
 517     }
 518 }
 519
 520 /* Dumps information about the uses to FILE.  */
 521
 522 extern void dump_uses (FILE *, struct ivopts_data *);
 523 void
 524 dump_uses (FILE *file, struct ivopts_data *data)
 525 {
 526   unsigned i;
 527   struct iv_use *use;
 528
 529   for (i = 0; i < n_iv_uses (data); i++)
 530     {
 531       use = iv_use (data, i);
 532
 533       dump_use (file, use);
 534       fprintf (file, "\n");
 535     }
 536 }
 537
 538 /* Dumps information about induction variable candidate CAND to FILE.  */
 539
 540 extern void dump_cand (FILE *, struct iv_cand *);
 541 void
 542 dump_cand (FILE *file, struct iv_cand *cand)
 543 {
 544   struct iv *iv = cand->iv;
 545
 546   fprintf (file, "candidate %d%s\n",
 547            cand->id, cand->important ? " (important)" : "");
 548
 549   if (cand->depends_on)
 550     {
 551       fprintf (file, "  depends on ");
 552       dump_bitmap (file, cand->depends_on);
 553     }
 554
 555   if (!iv)
 556     {
 557       fprintf (file, "  final value replacement\n");
 558       return;
 559     }
 560
 561   if (cand->var_before)
 562     {
 563       fprintf (file, "  var_before ");
 564       print_generic_expr (file, cand->var_before, TDF_SLIM);
 565       fprintf (file, "\n");
 566     }
 567   if (cand->var_after)
 568     {
 569       fprintf (file, "  var_after ");
 570       print_generic_expr (file, cand->var_after, TDF_SLIM);
 571       fprintf (file, "\n");
 572     }
 573
 574   switch (cand->pos)
 575     {
 576     case IP_NORMAL:
 577       fprintf (file, "  incremented before exit test\n");
 578       break;
 579
 580     case IP_BEFORE_USE:
 581       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 582       break;
 583
 584     case IP_AFTER_USE:
 585       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 586       break;
 587
 588     case IP_END:
 589       fprintf (file, "  incremented at end\n");
 590       break;
 591
 592     case IP_ORIGINAL:
 593       fprintf (file, "  original biv\n");
 594       break;
 595     }
 596
 597   dump_iv (file, iv);
 598 }
 599
 600 /* Returns the info for ssa version VER.  */
 601
 602 static inline struct version_info *
 603 ver_info (struct ivopts_data *data, unsigned ver)
 604 {
 605   return data->version_info + ver;
 606 }
 607
 608 /* Returns the info for ssa name NAME.  */
 609
 610 static inline struct version_info *
 611 name_info (struct ivopts_data *data, tree name)
 612 {
 613   return ver_info (data, SSA_NAME_VERSION (name));
 614 }
 615
 616 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 617    emitted in LOOP.  */
 618
 619 static bool
 620 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 621 {
 622   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 623
 624   gcc_assert (bb);
 625
 626   if (sbb == loop->latch)
 627     return true;
 628
 629   if (sbb != bb)
 630     return false;
 631
 632   return stmt == last_stmt (bb);
 633 }
 634
 635 /* Returns true if STMT if after the place where the original induction
 636    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 637    if the positions are identical.  */
 638
 639 static bool
 640 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 641 {
 642   basic_block cand_bb = gimple_bb (cand->incremented_at);
 643   basic_block stmt_bb = gimple_bb (stmt);
 644
 645   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 646     return false;
 647
 648   if (stmt_bb != cand_bb)
 649     return true;
 650
 651   if (true_if_equal
 652       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 653     return true;
 654   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 655 }
 656
 657 /* Returns true if STMT if after the place where the induction variable
 658    CAND is incremented in LOOP.  */
 659
 660 static bool
 661 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 662 {
 663   switch (cand->pos)
 664     {
 665     case IP_END:
 666       return false;
 667
 668     case IP_NORMAL:
 669       return stmt_after_ip_normal_pos (loop, stmt);
 670
 671     case IP_ORIGINAL:
 672     case IP_AFTER_USE:
 673       return stmt_after_inc_pos (cand, stmt, false);
 674
 675     case IP_BEFORE_USE:
 676       return stmt_after_inc_pos (cand, stmt, true);
 677
 678     default:
 679       gcc_unreachable ();
 680     }
 681 }
 682
 683 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 684
 685 static bool
 686 abnormal_ssa_name_p (tree exp)
 687 {
 688   if (!exp)
 689     return false;
 690
 691   if (TREE_CODE (exp) != SSA_NAME)
 692     return false;
 693
 694   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 695 }
 696
 697 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 698    abnormal phi node.  Callback for for_each_index.  */
 699
 700 static bool
 701 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 702                                   void *data ATTRIBUTE_UNUSED)
 703 {
 704   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 705     {
 706       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 707         return false;
 708       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 709         return false;
 710     }
 711
 712   return !abnormal_ssa_name_p (*index);
 713 }
 714
 715 /* Returns true if EXPR contains a ssa name that occurs in an
 716    abnormal phi node.  */
 717
 718 bool
 719 contains_abnormal_ssa_name_p (tree expr)
 720 {
 721   enum tree_code code;
 722   enum tree_code_class codeclass;
 723
 724   if (!expr)
 725     return false;
 726
 727   code = TREE_CODE (expr);
 728   codeclass = TREE_CODE_CLASS (code);
 729
 730   if (code == SSA_NAME)
 731     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 732
 733   if (code == INTEGER_CST
 734       || is_gimple_min_invariant (expr))
 735     return false;
 736
 737   if (code == ADDR_EXPR)
 738     return !for_each_index (&TREE_OPERAND (expr, 0),
 739                             idx_contains_abnormal_ssa_name_p,
 740                             NULL);
 741
 742   if (code == COND_EXPR)
 743     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 744       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 745       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 746
 747   switch (codeclass)
 748     {
 749     case tcc_binary:
 750     case tcc_comparison:
 751       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 752         return true;
 753
 754       /* Fallthru.  */
 755     case tcc_unary:
 756       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 757         return true;
 758
 759       break;
 760
 761     default:
 762       gcc_unreachable ();
 763     }
 764
 765   return false;
 766 }
 767
 768 /*  Returns the structure describing number of iterations determined from
 769     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 770
 771 static struct tree_niter_desc *
 772 niter_for_exit (struct ivopts_data *data, edge exit)
 773 {
 774   struct tree_niter_desc *desc;
 775   void **slot;
 776
 777   if (!data->niters)
 778     {
 779       data->niters = pointer_map_create ();
 780       slot = NULL;
 781     }
 782   else
 783     slot = pointer_map_contains (data->niters, exit);
 784
 785   if (!slot)
 786     {
 787       /* Try to determine number of iterations.  We cannot safely work with ssa
 788          names that appear in phi nodes on abnormal edges, so that we do not
 789          create overlapping life ranges for them (PR 27283).  */
 790       desc = XNEW (struct tree_niter_desc);
 791       if (!number_of_iterations_exit (data->current_loop,
 792                                       exit, desc, true)
 793           || contains_abnormal_ssa_name_p (desc->niter))
 794         {
 795           XDELETE (desc);
 796           desc = NULL;
 797         }
 798       slot = pointer_map_insert (data->niters, exit);
 799       *slot = desc;
 800     }
 801   else
 802     desc = (struct tree_niter_desc *) *slot;
 803
 804   return desc;
 805 }
 806
 807 /* Returns the structure describing number of iterations determined from
 808    single dominating exit of DATA->current_loop, or NULL if something
 809    goes wrong.  */
 810
 811 static struct tree_niter_desc *
 812 niter_for_single_dom_exit (struct ivopts_data *data)
 813 {
 814   edge exit = single_dom_exit (data->current_loop);
 815
 816   if (!exit)
 817     return NULL;
 818
 819   return niter_for_exit (data, exit);
 820 }
 821
 822 /* Hash table equality function for expressions.  */
 823
 824 static int
 825 htab_inv_expr_eq (const void *ent1, const void *ent2)
 826 {
 827   const struct iv_inv_expr_ent *expr1 =
 828       (const struct iv_inv_expr_ent *)ent1;
 829   const struct iv_inv_expr_ent *expr2 =
 830       (const struct iv_inv_expr_ent *)ent2;
 831
 832   return expr1->hash == expr2->hash
 833          && operand_equal_p (expr1->expr, expr2->expr, 0);
 834 }
 835
 836 /* Hash function for loop invariant expressions.  */
 837
 838 static hashval_t
 839 htab_inv_expr_hash (const void *ent)
 840 {
 841   const struct iv_inv_expr_ent *expr =
 842       (const struct iv_inv_expr_ent *)ent;
 843   return expr->hash;
 844 }
 845
 846 /* Initializes data structures used by the iv optimization pass, stored
 847    in DATA.  */
 848
 849 static void
 850 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 851 {
 852   data->version_info_size = 2 * num_ssa_names;
 853   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 854   data->relevant = BITMAP_ALLOC (NULL);
 855   data->important_candidates = BITMAP_ALLOC (NULL);
 856   data->max_inv_id = 0;
 857   data->niters = NULL;
 858   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 859   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 860   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 861                                     htab_inv_expr_eq, free);
 862   data->inv_expr_id = 0;
 863   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 864 }
 865
 866 /* Returns a memory object to that EXPR points.  In case we are able to
 867    determine that it does not point to any such object, NULL is returned.  */
 868
 869 static tree
 870 determine_base_object (tree expr)
 871 {
 872   enum tree_code code = TREE_CODE (expr);
 873   tree base, obj;
 874
 875   /* If this is a pointer casted to any type, we need to determine
 876      the base object for the pointer; so handle conversions before
 877      throwing away non-pointer expressions.  */
 878   if (CONVERT_EXPR_P (expr))
 879     return determine_base_object (TREE_OPERAND (expr, 0));
 880
 881   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 882     return NULL_TREE;
 883
 884   switch (code)
 885     {
 886     case INTEGER_CST:
 887       return NULL_TREE;
 888
 889     case ADDR_EXPR:
 890       obj = TREE_OPERAND (expr, 0);
 891       base = get_base_address (obj);
 892
 893       if (!base)
 894         return expr;
 895
 896       if (TREE_CODE (base) == MEM_REF)
 897         return determine_base_object (TREE_OPERAND (base, 0));
 898
 899       return fold_convert (ptr_type_node,
 900                            build_fold_addr_expr (base));
 901
 902     case POINTER_PLUS_EXPR:
 903       return determine_base_object (TREE_OPERAND (expr, 0));
 904
 905     case PLUS_EXPR:
 906     case MINUS_EXPR:
 907       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 908       gcc_unreachable ();
 909
 910     default:
 911       return fold_convert (ptr_type_node, expr);
 912     }
 913 }
 914
 915 /* Allocates an induction variable with given initial value BASE and step STEP
 916    for loop LOOP.  */
 917
 918 static struct iv *
 919 alloc_iv (tree base, tree step)
 920 {
 921   struct iv *iv = XCNEW (struct iv);
 922   gcc_assert (step != NULL_TREE);
 923
 924   iv->base = base;
 925   iv->base_object = determine_base_object (base);
 926   iv->step = step;
 927   iv->biv_p = false;
 928   iv->have_use_for = false;
 929   iv->use_id = 0;
 930   iv->ssa_name = NULL_TREE;
 931
 932   return iv;
 933 }
 934
 935 /* Sets STEP and BASE for induction variable IV.  */
 936
 937 static void
 938 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 939 {
 940   struct version_info *info = name_info (data, iv);
 941
 942   gcc_assert (!info->iv);
 943
 944   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 945   info->iv = alloc_iv (base, step);
 946   info->iv->ssa_name = iv;
 947 }
 948
 949 /* Finds induction variable declaration for VAR.  */
 950
 951 static struct iv *
 952 get_iv (struct ivopts_data *data, tree var)
 953 {
 954   basic_block bb;
 955   tree type = TREE_TYPE (var);
 956
 957   if (!POINTER_TYPE_P (type)
 958       && !INTEGRAL_TYPE_P (type))
 959     return NULL;
 960
 961   if (!name_info (data, var)->iv)
 962     {
 963       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 964
 965       if (!bb
 966           || !flow_bb_inside_loop_p (data->current_loop, bb))
 967         set_iv (data, var, var, build_int_cst (type, 0));
 968     }
 969
 970   return name_info (data, var)->iv;
 971 }
 972
 973 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 974    not define a simple affine biv with nonzero step.  */
 975
 976 static tree
 977 determine_biv_step (gimple phi)
 978 {
 979   struct loop *loop = gimple_bb (phi)->loop_father;
 980   tree name = PHI_RESULT (phi);
 981   affine_iv iv;
 982
 983   if (!is_gimple_reg (name))
 984     return NULL_TREE;
 985
 986   if (!simple_iv (loop, loop, name, &iv, true))
 987     return NULL_TREE;
 988
 989   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 990 }
 991
 992 /* Finds basic ivs.  */
 993
 994 static bool
 995 find_bivs (struct ivopts_data *data)
 996 {
 997   gimple phi;
 998   tree step, type, base;
 999   bool found = false;
1000   struct loop *loop = data->current_loop;
1001   gimple_stmt_iterator psi;
1002
1003   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1004     {
1005       phi = gsi_stmt (psi);
1006
1007       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1008         continue;
1009
1010       step = determine_biv_step (phi);
1011       if (!step)
1012         continue;
1013
1014       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1015       base = expand_simple_operations (base);
1016       if (contains_abnormal_ssa_name_p (base)
1017           || contains_abnormal_ssa_name_p (step))
1018         continue;
1019
1020       type = TREE_TYPE (PHI_RESULT (phi));
1021       base = fold_convert (type, base);
1022       if (step)
1023         {
1024           if (POINTER_TYPE_P (type))
1025             step = convert_to_ptrofftype (step);
1026           else
1027             step = fold_convert (type, step);
1028         }
1029
1030       set_iv (data, PHI_RESULT (phi), base, step);
1031       found = true;
1032     }
1033
1034   return found;
1035 }
1036
1037 /* Marks basic ivs.  */
1038
1039 static void
1040 mark_bivs (struct ivopts_data *data)
1041 {
1042   gimple phi;
1043   tree var;
1044   struct iv *iv, *incr_iv;
1045   struct loop *loop = data->current_loop;
1046   basic_block incr_bb;
1047   gimple_stmt_iterator psi;
1048
1049   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1050     {
1051       phi = gsi_stmt (psi);
1052
1053       iv = get_iv (data, PHI_RESULT (phi));
1054       if (!iv)
1055         continue;
1056
1057       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1058       incr_iv = get_iv (data, var);
1059       if (!incr_iv)
1060         continue;
1061
1062       /* If the increment is in the subloop, ignore it.  */
1063       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1064       if (incr_bb->loop_father != data->current_loop
1065           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1066         continue;
1067
1068       iv->biv_p = true;
1069       incr_iv->biv_p = true;
1070     }
1071 }
1072
1073 /* Checks whether STMT defines a linear induction variable and stores its
1074    parameters to IV.  */
1075
1076 static bool
1077 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1078 {
1079   tree lhs;
1080   struct loop *loop = data->current_loop;
1081
1082   iv->base = NULL_TREE;
1083   iv->step = NULL_TREE;
1084
1085   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1086     return false;
1087
1088   lhs = gimple_assign_lhs (stmt);
1089   if (TREE_CODE (lhs) != SSA_NAME)
1090     return false;
1091
1092   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1093     return false;
1094   iv->base = expand_simple_operations (iv->base);
1095
1096   if (contains_abnormal_ssa_name_p (iv->base)
1097       || contains_abnormal_ssa_name_p (iv->step))
1098     return false;
1099
1100   /* If STMT could throw, then do not consider STMT as defining a GIV.
1101      While this will suppress optimizations, we can not safely delete this
1102      GIV and associated statements, even if it appears it is not used.  */
1103   if (stmt_could_throw_p (stmt))
1104     return false;
1105
1106   return true;
1107 }
1108
1109 /* Finds general ivs in statement STMT.  */
1110
1111 static void
1112 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1113 {
1114   affine_iv iv;
1115
1116   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1117     return;
1118
1119   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1120 }
1121
1122 /* Finds general ivs in basic block BB.  */
1123
1124 static void
1125 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1126 {
1127   gimple_stmt_iterator bsi;
1128
1129   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1130     find_givs_in_stmt (data, gsi_stmt (bsi));
1131 }
1132
1133 /* Finds general ivs.  */
1134
1135 static void
1136 find_givs (struct ivopts_data *data)
1137 {
1138   struct loop *loop = data->current_loop;
1139   basic_block *body = get_loop_body_in_dom_order (loop);
1140   unsigned i;
1141
1142   for (i = 0; i < loop->num_nodes; i++)
1143     find_givs_in_bb (data, body[i]);
1144   free (body);
1145 }
1146
1147 /* For each ssa name defined in LOOP determines whether it is an induction
1148    variable and if so, its initial value and step.  */
1149
1150 static bool
1151 find_induction_variables (struct ivopts_data *data)
1152 {
1153   unsigned i;
1154   bitmap_iterator bi;
1155
1156   if (!find_bivs (data))
1157     return false;
1158
1159   find_givs (data);
1160   mark_bivs (data);
1161
1162   if (dump_file && (dump_flags & TDF_DETAILS))
1163     {
1164       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1165
1166       if (niter)
1167         {
1168           fprintf (dump_file, "  number of iterations ");
1169           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1170           if (!integer_zerop (niter->may_be_zero))
1171             {
1172               fprintf (dump_file, "; zero if ");
1173               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1174             }
1175           fprintf (dump_file, "\n\n");
1176         };
1177
1178       fprintf (dump_file, "Induction variables:\n\n");
1179
1180       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1181         {
1182           if (ver_info (data, i)->iv)
1183             dump_iv (dump_file, ver_info (data, i)->iv);
1184         }
1185     }
1186
1187   return true;
1188 }
1189
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1191
1192 static struct iv_use *
1193 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1194             gimple stmt, enum use_type use_type)
1195 {
1196   struct iv_use *use = XCNEW (struct iv_use);
1197
1198   use->id = n_iv_uses (data);
1199   use->type = use_type;
1200   use->iv = iv;
1201   use->stmt = stmt;
1202   use->op_p = use_p;
1203   use->related_cands = BITMAP_ALLOC (NULL);
1204
1205   /* To avoid showing ssa name in the dumps, if it was not reset by the
1206      caller.  */
1207   iv->ssa_name = NULL_TREE;
1208
1209   if (dump_file && (dump_flags & TDF_DETAILS))
1210     dump_use (dump_file, use);
1211
1212   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1213
1214   return use;
1215 }
1216
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218    NONLINEAR_USE is true if the invariant is used in a way we do not
1219    handle specially.  */
1220
1221 static void
1222 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1223 {
1224   basic_block bb;
1225   struct version_info *info;
1226
1227   if (TREE_CODE (op) != SSA_NAME
1228       || !is_gimple_reg (op))
1229     return;
1230
1231   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1232   if (bb
1233       && flow_bb_inside_loop_p (data->current_loop, bb))
1234     return;
1235
1236   info = name_info (data, op);
1237   info->name = op;
1238   info->has_nonlin_use |= nonlinear_use;
1239   if (!info->inv_id)
1240     info->inv_id = ++data->max_inv_id;
1241   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1242 }
1243
1244 /* Checks whether the use OP is interesting and if so, records it.  */
1245
1246 static struct iv_use *
1247 find_interesting_uses_op (struct ivopts_data *data, tree op)
1248 {
1249   struct iv *iv;
1250   struct iv *civ;
1251   gimple stmt;
1252   struct iv_use *use;
1253
1254   if (TREE_CODE (op) != SSA_NAME)
1255     return NULL;
1256
1257   iv = get_iv (data, op);
1258   if (!iv)
1259     return NULL;
1260
1261   if (iv->have_use_for)
1262     {
1263       use = iv_use (data, iv->use_id);
1264
1265       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1266       return use;
1267     }
1268
1269   if (integer_zerop (iv->step))
1270     {
1271       record_invariant (data, op, true);
1272       return NULL;
1273     }
1274   iv->have_use_for = true;
1275
1276   civ = XNEW (struct iv);
1277   *civ = *iv;
1278
1279   stmt = SSA_NAME_DEF_STMT (op);
1280   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281               || is_gimple_assign (stmt));
1282
1283   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1284   iv->use_id = use->id;
1285
1286   return use;
1287 }
1288
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290    of an induction variable and an invariant.  If this is the case,
1291    CONTROL_VAR is set to location of the iv, BOUND to the location of
1292    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293    induction variable descriptions, and true is returned.  If this is not
1294    the case, CONTROL_VAR and BOUND are set to the arguments of the
1295    condition and false is returned.  */
1296
1297 static bool
1298 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1299                        tree **control_var, tree **bound,
1300                        struct iv **iv_var, struct iv **iv_bound)
1301 {
1302   /* The objects returned when COND has constant operands.  */
1303   static struct iv const_iv;
1304   static tree zero;
1305   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1307   bool ret = false;
1308
1309   if (gimple_code (stmt) == GIMPLE_COND)
1310     {
1311       op0 = gimple_cond_lhs_ptr (stmt);
1312       op1 = gimple_cond_rhs_ptr (stmt);
1313     }
1314   else
1315     {
1316       op0 = gimple_assign_rhs1_ptr (stmt);
1317       op1 = gimple_assign_rhs2_ptr (stmt);
1318     }
1319
1320   zero = integer_zero_node;
1321   const_iv.step = integer_zero_node;
1322
1323   if (TREE_CODE (*op0) == SSA_NAME)
1324     iv0 = get_iv (data, *op0);
1325   if (TREE_CODE (*op1) == SSA_NAME)
1326     iv1 = get_iv (data, *op1);
1327
1328   /* Exactly one of the compared values must be an iv, and the other one must
1329      be an invariant.  */
1330   if (!iv0 || !iv1)
1331     goto end;
1332
1333   if (integer_zerop (iv0->step))
1334     {
1335       /* Control variable may be on the other side.  */
1336       tmp_op = op0; op0 = op1; op1 = tmp_op;
1337       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1338     }
1339   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1340
1341 end:
1342   if (control_var)
1343     *control_var = op0;;
1344   if (iv_var)
1345     *iv_var = iv0;;
1346   if (bound)
1347     *bound = op1;
1348   if (iv_bound)
1349     *iv_bound = iv1;
1350
1351   return ret;
1352 }
1353
1354 /* Checks whether the condition in STMT is interesting and if so,
1355    records it.  */
1356
1357 static void
1358 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1359 {
1360   tree *var_p, *bound_p;
1361   struct iv *var_iv, *civ;
1362
1363   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1364     {
1365       find_interesting_uses_op (data, *var_p);
1366       find_interesting_uses_op (data, *bound_p);
1367       return;
1368     }
1369
1370   civ = XNEW (struct iv);
1371   *civ = *var_iv;
1372   record_use (data, NULL, civ, stmt, USE_COMPARE);
1373 }
1374
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376    i.e. if all its operands are defined outside of the LOOP.  LOOP
1377    should not be the function body.  */
1378
1379 bool
1380 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1381 {
1382   basic_block def_bb;
1383   unsigned i, len;
1384
1385   gcc_assert (loop_depth (loop) > 0);
1386
1387   if (is_gimple_min_invariant (expr))
1388     return true;
1389
1390   if (TREE_CODE (expr) == SSA_NAME)
1391     {
1392       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1393       if (def_bb
1394           && flow_bb_inside_loop_p (loop, def_bb))
1395         return false;
1396
1397       return true;
1398     }
1399
1400   if (!EXPR_P (expr))
1401     return false;
1402
1403   len = TREE_OPERAND_LENGTH (expr);
1404   for (i = 0; i < len; i++)
1405     if (TREE_OPERAND (expr, i)
1406         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1407       return false;
1408
1409   return true;
1410 }
1411
1412 /* Returns true if statement STMT is obviously invariant in LOOP,
1413    i.e. if all its operands on the RHS are defined outside of the LOOP.
1414    LOOP should not be the function body.  */
1415
1416 bool
1417 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1418 {
1419   unsigned i;
1420   tree lhs;
1421
1422   gcc_assert (loop_depth (loop) > 0);
1423
1424   lhs = gimple_get_lhs (stmt);
1425   for (i = 0; i < gimple_num_ops (stmt); i++)
1426     {
1427       tree op = gimple_op (stmt, i);
1428       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1429         return false;
1430     }
1431
1432   return true;
1433 }
1434
1435 /* Cumulates the steps of indices into DATA and replaces their values with the
1436    initial ones.  Returns false when the value of the index cannot be determined.
1437    Callback for for_each_index.  */
1438
1439 struct ifs_ivopts_data
1440 {
1441   struct ivopts_data *ivopts_data;
1442   gimple stmt;
1443   tree step;
1444 };
1445
1446 static bool
1447 idx_find_step (tree base, tree *idx, void *data)
1448 {
1449   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1450   struct iv *iv;
1451   tree step, iv_base, iv_step, lbound, off;
1452   struct loop *loop = dta->ivopts_data->current_loop;
1453
1454   /* If base is a component ref, require that the offset of the reference
1455      be invariant.  */
1456   if (TREE_CODE (base) == COMPONENT_REF)
1457     {
1458       off = component_ref_field_offset (base);
1459       return expr_invariant_in_loop_p (loop, off);
1460     }
1461
1462   /* If base is array, first check whether we will be able to move the
1463      reference out of the loop (in order to take its address in strength
1464      reduction).  In order for this to work we need both lower bound
1465      and step to be loop invariants.  */
1466   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1467     {
1468       /* Moreover, for a range, the size needs to be invariant as well.  */
1469       if (TREE_CODE (base) == ARRAY_RANGE_REF
1470           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1471         return false;
1472
1473       step = array_ref_element_size (base);
1474       lbound = array_ref_low_bound (base);
1475
1476       if (!expr_invariant_in_loop_p (loop, step)
1477           || !expr_invariant_in_loop_p (loop, lbound))
1478         return false;
1479     }
1480
1481   if (TREE_CODE (*idx) != SSA_NAME)
1482     return true;
1483
1484   iv = get_iv (dta->ivopts_data, *idx);
1485   if (!iv)
1486     return false;
1487
1488   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1489           *&x[0], which is not folded and does not trigger the
1490           ARRAY_REF path below.  */
1491   *idx = iv->base;
1492
1493   if (integer_zerop (iv->step))
1494     return true;
1495
1496   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1497     {
1498       step = array_ref_element_size (base);
1499
1500       /* We only handle addresses whose step is an integer constant.  */
1501       if (TREE_CODE (step) != INTEGER_CST)
1502         return false;
1503     }
1504   else
1505     /* The step for pointer arithmetics already is 1 byte.  */
1506     step = size_one_node;
1507
1508   iv_base = iv->base;
1509   iv_step = iv->step;
1510   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1511                             sizetype, &iv_base, &iv_step, dta->stmt,
1512                             false))
1513     {
1514       /* The index might wrap.  */
1515       return false;
1516     }
1517
1518   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1519   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1520
1521   return true;
1522 }
1523
1524 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1525    object is passed to it in DATA.  */
1526
1527 static bool
1528 idx_record_use (tree base, tree *idx,
1529                 void *vdata)
1530 {
1531   struct ivopts_data *data = (struct ivopts_data *) vdata;
1532   find_interesting_uses_op (data, *idx);
1533   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534     {
1535       find_interesting_uses_op (data, array_ref_element_size (base));
1536       find_interesting_uses_op (data, array_ref_low_bound (base));
1537     }
1538   return true;
1539 }
1540
1541 /* If we can prove that TOP = cst * BOT for some constant cst,
1542    store cst to MUL and return true.  Otherwise return false.
1543    The returned value is always sign-extended, regardless of the
1544    signedness of TOP and BOT.  */
1545
1546 static bool
1547 constant_multiple_of (tree top, tree bot, double_int *mul)
1548 {
1549   tree mby;
1550   enum tree_code code;
1551   double_int res, p0, p1;
1552   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1553
1554   STRIP_NOPS (top);
1555   STRIP_NOPS (bot);
1556
1557   if (operand_equal_p (top, bot, 0))
1558     {
1559       *mul = double_int_one;
1560       return true;
1561     }
1562
1563   code = TREE_CODE (top);
1564   switch (code)
1565     {
1566     case MULT_EXPR:
1567       mby = TREE_OPERAND (top, 1);
1568       if (TREE_CODE (mby) != INTEGER_CST)
1569         return false;
1570
1571       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1572         return false;
1573
1574       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1575                               precision);
1576       return true;
1577
1578     case PLUS_EXPR:
1579     case MINUS_EXPR:
1580       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1581           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1582         return false;
1583
1584       if (code == MINUS_EXPR)
1585         p1 = double_int_neg (p1);
1586       *mul = double_int_sext (double_int_add (p0, p1), precision);
1587       return true;
1588
1589     case INTEGER_CST:
1590       if (TREE_CODE (bot) != INTEGER_CST)
1591         return false;
1592
1593       p0 = double_int_sext (tree_to_double_int (top), precision);
1594       p1 = double_int_sext (tree_to_double_int (bot), precision);
1595       if (double_int_zero_p (p1))
1596         return false;
1597       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1598                               precision);
1599       return double_int_zero_p (res);
1600
1601     default:
1602       return false;
1603     }
1604 }
1605
1606 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1607
1608 static bool
1609 may_be_unaligned_p (tree ref, tree step)
1610 {
1611   tree base;
1612   tree base_type;
1613   HOST_WIDE_INT bitsize;
1614   HOST_WIDE_INT bitpos;
1615   tree toffset;
1616   enum machine_mode mode;
1617   int unsignedp, volatilep;
1618   unsigned base_align;
1619
1620   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1621      thus they are not misaligned.  */
1622   if (TREE_CODE (ref) == TARGET_MEM_REF)
1623     return false;
1624
1625   /* The test below is basically copy of what expr.c:normal_inner_ref
1626      does to check whether the object must be loaded by parts when
1627      STRICT_ALIGNMENT is true.  */
1628   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1629                               &unsignedp, &volatilep, true);
1630   base_type = TREE_TYPE (base);
1631   base_align = get_object_alignment (base);
1632   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1633
1634   if (mode != BLKmode)
1635     {
1636       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1637
1638       if (base_align < mode_align
1639           || (bitpos % mode_align) != 0
1640           || (bitpos % BITS_PER_UNIT) != 0)
1641         return true;
1642
1643       if (toffset
1644           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1645         return true;
1646
1647       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1648         return true;
1649     }
1650
1651   return false;
1652 }
1653
1654 /* Return true if EXPR may be non-addressable.   */
1655
1656 bool
1657 may_be_nonaddressable_p (tree expr)
1658 {
1659   switch (TREE_CODE (expr))
1660     {
1661     case TARGET_MEM_REF:
1662       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1663          target, thus they are always addressable.  */
1664       return false;
1665
1666     case COMPONENT_REF:
1667       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1668              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1669
1670     case VIEW_CONVERT_EXPR:
1671       /* This kind of view-conversions may wrap non-addressable objects
1672          and make them look addressable.  After some processing the
1673          non-addressability may be uncovered again, causing ADDR_EXPRs
1674          of inappropriate objects to be built.  */
1675       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1676           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1677         return true;
1678
1679       /* ... fall through ... */
1680
1681     case ARRAY_REF:
1682     case ARRAY_RANGE_REF:
1683       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1684
1685     CASE_CONVERT:
1686       return true;
1687
1688     default:
1689       break;
1690     }
1691
1692   return false;
1693 }
1694
1695 /* Finds addresses in *OP_P inside STMT.  */
1696
1697 static void
1698 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1699 {
1700   tree base = *op_p, step = size_zero_node;
1701   struct iv *civ;
1702   struct ifs_ivopts_data ifs_ivopts_data;
1703
1704   /* Do not play with volatile memory references.  A bit too conservative,
1705      perhaps, but safe.  */
1706   if (gimple_has_volatile_ops (stmt))
1707     goto fail;
1708
1709   /* Ignore bitfields for now.  Not really something terribly complicated
1710      to handle.  TODO.  */
1711   if (TREE_CODE (base) == BIT_FIELD_REF)
1712     goto fail;
1713
1714   base = unshare_expr (base);
1715
1716   if (TREE_CODE (base) == TARGET_MEM_REF)
1717     {
1718       tree type = build_pointer_type (TREE_TYPE (base));
1719       tree astep;
1720
1721       if (TMR_BASE (base)
1722           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1723         {
1724           civ = get_iv (data, TMR_BASE (base));
1725           if (!civ)
1726             goto fail;
1727
1728           TMR_BASE (base) = civ->base;
1729           step = civ->step;
1730         }
1731       if (TMR_INDEX2 (base)
1732           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1733         {
1734           civ = get_iv (data, TMR_INDEX2 (base));
1735           if (!civ)
1736             goto fail;
1737
1738           TMR_INDEX2 (base) = civ->base;
1739           step = civ->step;
1740         }
1741       if (TMR_INDEX (base)
1742           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1743         {
1744           civ = get_iv (data, TMR_INDEX (base));
1745           if (!civ)
1746             goto fail;
1747
1748           TMR_INDEX (base) = civ->base;
1749           astep = civ->step;
1750
1751           if (astep)
1752             {
1753               if (TMR_STEP (base))
1754                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1755
1756               step = fold_build2 (PLUS_EXPR, type, step, astep);
1757             }
1758         }
1759
1760       if (integer_zerop (step))
1761         goto fail;
1762       base = tree_mem_ref_addr (type, base);
1763     }
1764   else
1765     {
1766       ifs_ivopts_data.ivopts_data = data;
1767       ifs_ivopts_data.stmt = stmt;
1768       ifs_ivopts_data.step = size_zero_node;
1769       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1770           || integer_zerop (ifs_ivopts_data.step))
1771         goto fail;
1772       step = ifs_ivopts_data.step;
1773
1774       /* Check that the base expression is addressable.  This needs
1775          to be done after substituting bases of IVs into it.  */
1776       if (may_be_nonaddressable_p (base))
1777         goto fail;
1778
1779       /* Moreover, on strict alignment platforms, check that it is
1780          sufficiently aligned.  */
1781       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1782         goto fail;
1783
1784       base = build_fold_addr_expr (base);
1785
1786       /* Substituting bases of IVs into the base expression might
1787          have caused folding opportunities.  */
1788       if (TREE_CODE (base) == ADDR_EXPR)
1789         {
1790           tree *ref = &TREE_OPERAND (base, 0);
1791           while (handled_component_p (*ref))
1792             ref = &TREE_OPERAND (*ref, 0);
1793           if (TREE_CODE (*ref) == MEM_REF)
1794             {
1795               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1796                                       TREE_OPERAND (*ref, 0),
1797                                       TREE_OPERAND (*ref, 1));
1798               if (tem)
1799                 *ref = tem;
1800             }
1801         }
1802     }
1803
1804   civ = alloc_iv (base, step);
1805   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1806   return;
1807
1808 fail:
1809   for_each_index (op_p, idx_record_use, data);
1810 }
1811
1812 /* Finds and records invariants used in STMT.  */
1813
1814 static void
1815 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1816 {
1817   ssa_op_iter iter;
1818   use_operand_p use_p;
1819   tree op;
1820
1821   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1822     {
1823       op = USE_FROM_PTR (use_p);
1824       record_invariant (data, op, false);
1825     }
1826 }
1827
1828 /* Finds interesting uses of induction variables in the statement STMT.  */
1829
1830 static void
1831 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1832 {
1833   struct iv *iv;
1834   tree op, *lhs, *rhs;
1835   ssa_op_iter iter;
1836   use_operand_p use_p;
1837   enum tree_code code;
1838
1839   find_invariants_stmt (data, stmt);
1840
1841   if (gimple_code (stmt) == GIMPLE_COND)
1842     {
1843       find_interesting_uses_cond (data, stmt);
1844       return;
1845     }
1846
1847   if (is_gimple_assign (stmt))
1848     {
1849       lhs = gimple_assign_lhs_ptr (stmt);
1850       rhs = gimple_assign_rhs1_ptr (stmt);
1851
1852       if (TREE_CODE (*lhs) == SSA_NAME)
1853         {
1854           /* If the statement defines an induction variable, the uses are not
1855              interesting by themselves.  */
1856
1857           iv = get_iv (data, *lhs);
1858
1859           if (iv && !integer_zerop (iv->step))
1860             return;
1861         }
1862
1863       code = gimple_assign_rhs_code (stmt);
1864       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1865           && (REFERENCE_CLASS_P (*rhs)
1866               || is_gimple_val (*rhs)))
1867         {
1868           if (REFERENCE_CLASS_P (*rhs))
1869             find_interesting_uses_address (data, stmt, rhs);
1870           else
1871             find_interesting_uses_op (data, *rhs);
1872
1873           if (REFERENCE_CLASS_P (*lhs))
1874             find_interesting_uses_address (data, stmt, lhs);
1875           return;
1876         }
1877       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1878         {
1879           find_interesting_uses_cond (data, stmt);
1880           return;
1881         }
1882
1883       /* TODO -- we should also handle address uses of type
1884
1885          memory = call (whatever);
1886
1887          and
1888
1889          call (memory).  */
1890     }
1891
1892   if (gimple_code (stmt) == GIMPLE_PHI
1893       && gimple_bb (stmt) == data->current_loop->header)
1894     {
1895       iv = get_iv (data, PHI_RESULT (stmt));
1896
1897       if (iv && !integer_zerop (iv->step))
1898         return;
1899     }
1900
1901   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1902     {
1903       op = USE_FROM_PTR (use_p);
1904
1905       if (TREE_CODE (op) != SSA_NAME)
1906         continue;
1907
1908       iv = get_iv (data, op);
1909       if (!iv)
1910         continue;
1911
1912       find_interesting_uses_op (data, op);
1913     }
1914 }
1915
1916 /* Finds interesting uses of induction variables outside of loops
1917    on loop exit edge EXIT.  */
1918
1919 static void
1920 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1921 {
1922   gimple phi;
1923   gimple_stmt_iterator psi;
1924   tree def;
1925
1926   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1927     {
1928       phi = gsi_stmt (psi);
1929       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1930       if (is_gimple_reg (def))
1931         find_interesting_uses_op (data, def);
1932     }
1933 }
1934
1935 /* Finds uses of the induction variables that are interesting.  */
1936
1937 static void
1938 find_interesting_uses (struct ivopts_data *data)
1939 {
1940   basic_block bb;
1941   gimple_stmt_iterator bsi;
1942   basic_block *body = get_loop_body (data->current_loop);
1943   unsigned i;
1944   struct version_info *info;
1945   edge e;
1946
1947   if (dump_file && (dump_flags & TDF_DETAILS))
1948     fprintf (dump_file, "Uses:\n\n");
1949
1950   for (i = 0; i < data->current_loop->num_nodes; i++)
1951     {
1952       edge_iterator ei;
1953       bb = body[i];
1954
1955       FOR_EACH_EDGE (e, ei, bb->succs)
1956         if (e->dest != EXIT_BLOCK_PTR
1957             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1958           find_interesting_uses_outside (data, e);
1959
1960       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1961         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1962       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1963         if (!is_gimple_debug (gsi_stmt (bsi)))
1964           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1965     }
1966
1967   if (dump_file && (dump_flags & TDF_DETAILS))
1968     {
1969       bitmap_iterator bi;
1970
1971       fprintf (dump_file, "\n");
1972
1973       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1974         {
1975           info = ver_info (data, i);
1976           if (info->inv_id)
1977             {
1978               fprintf (dump_file, "  ");
1979               print_generic_expr (dump_file, info->name, TDF_SLIM);
1980               fprintf (dump_file, " is invariant (%d)%s\n",
1981                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1982             }
1983         }
1984
1985       fprintf (dump_file, "\n");
1986     }
1987
1988   free (body);
1989 }
1990
1991 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1992    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1993    we are at the top-level of the processed address.  */
1994
1995 static tree
1996 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1997                 unsigned HOST_WIDE_INT *offset)
1998 {
1999   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2000   enum tree_code code;
2001   tree type, orig_type = TREE_TYPE (expr);
2002   unsigned HOST_WIDE_INT off0, off1, st;
2003   tree orig_expr = expr;
2004
2005   STRIP_NOPS (expr);
2006
2007   type = TREE_TYPE (expr);
2008   code = TREE_CODE (expr);
2009   *offset = 0;
2010
2011   switch (code)
2012     {
2013     case INTEGER_CST:
2014       if (!cst_and_fits_in_hwi (expr)
2015           || integer_zerop (expr))
2016         return orig_expr;
2017
2018       *offset = int_cst_value (expr);
2019       return build_int_cst (orig_type, 0);
2020
2021     case POINTER_PLUS_EXPR:
2022     case PLUS_EXPR:
2023     case MINUS_EXPR:
2024       op0 = TREE_OPERAND (expr, 0);
2025       op1 = TREE_OPERAND (expr, 1);
2026
2027       op0 = strip_offset_1 (op0, false, false, &off0);
2028       op1 = strip_offset_1 (op1, false, false, &off1);
2029
2030       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2031       if (op0 == TREE_OPERAND (expr, 0)
2032           && op1 == TREE_OPERAND (expr, 1))
2033         return orig_expr;
2034
2035       if (integer_zerop (op1))
2036         expr = op0;
2037       else if (integer_zerop (op0))
2038         {
2039           if (code == MINUS_EXPR)
2040             expr = fold_build1 (NEGATE_EXPR, type, op1);
2041           else
2042             expr = op1;
2043         }
2044       else
2045         expr = fold_build2 (code, type, op0, op1);
2046
2047       return fold_convert (orig_type, expr);
2048
2049     case MULT_EXPR:
2050       op1 = TREE_OPERAND (expr, 1);
2051       if (!cst_and_fits_in_hwi (op1))
2052         return orig_expr;
2053
2054       op0 = TREE_OPERAND (expr, 0);
2055       op0 = strip_offset_1 (op0, false, false, &off0);
2056       if (op0 == TREE_OPERAND (expr, 0))
2057         return orig_expr;
2058
2059       *offset = off0 * int_cst_value (op1);
2060       if (integer_zerop (op0))
2061         expr = op0;
2062       else
2063         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2064
2065       return fold_convert (orig_type, expr);
2066
2067     case ARRAY_REF:
2068     case ARRAY_RANGE_REF:
2069       if (!inside_addr)
2070         return orig_expr;
2071
2072       step = array_ref_element_size (expr);
2073       if (!cst_and_fits_in_hwi (step))
2074         break;
2075
2076       st = int_cst_value (step);
2077       op1 = TREE_OPERAND (expr, 1);
2078       op1 = strip_offset_1 (op1, false, false, &off1);
2079       *offset = off1 * st;
2080
2081       if (top_compref
2082           && integer_zerop (op1))
2083         {
2084           /* Strip the component reference completely.  */
2085           op0 = TREE_OPERAND (expr, 0);
2086           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2087           *offset += off0;
2088           return op0;
2089         }
2090       break;
2091
2092     case COMPONENT_REF:
2093       if (!inside_addr)
2094         return orig_expr;
2095
2096       tmp = component_ref_field_offset (expr);
2097       if (top_compref
2098           && cst_and_fits_in_hwi (tmp))
2099         {
2100           /* Strip the component reference completely.  */
2101           op0 = TREE_OPERAND (expr, 0);
2102           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2103           *offset = off0 + int_cst_value (tmp);
2104           return op0;
2105         }
2106       break;
2107
2108     case ADDR_EXPR:
2109       op0 = TREE_OPERAND (expr, 0);
2110       op0 = strip_offset_1 (op0, true, true, &off0);
2111       *offset += off0;
2112
2113       if (op0 == TREE_OPERAND (expr, 0))
2114         return orig_expr;
2115
2116       expr = build_fold_addr_expr (op0);
2117       return fold_convert (orig_type, expr);
2118
2119     case MEM_REF:
2120       /* ???  Offset operand?  */
2121       inside_addr = false;
2122       break;
2123
2124     default:
2125       return orig_expr;
2126     }
2127
2128   /* Default handling of expressions for that we want to recurse into
2129      the first operand.  */
2130   op0 = TREE_OPERAND (expr, 0);
2131   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2132   *offset += off0;
2133
2134   if (op0 == TREE_OPERAND (expr, 0)
2135       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2136     return orig_expr;
2137
2138   expr = copy_node (expr);
2139   TREE_OPERAND (expr, 0) = op0;
2140   if (op1)
2141     TREE_OPERAND (expr, 1) = op1;
2142
2143   /* Inside address, we might strip the top level component references,
2144      thus changing type of the expression.  Handling of ADDR_EXPR
2145      will fix that.  */
2146   expr = fold_convert (orig_type, expr);
2147
2148   return expr;
2149 }
2150
2151 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2152
2153 static tree
2154 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2155 {
2156   return strip_offset_1 (expr, false, false, offset);
2157 }
2158
2159 /* Returns variant of TYPE that can be used as base for different uses.
2160    We return unsigned type with the same precision, which avoids problems
2161    with overflows.  */
2162
2163 static tree
2164 generic_type_for (tree type)
2165 {
2166   if (POINTER_TYPE_P (type))
2167     return unsigned_type_for (type);
2168
2169   if (TYPE_UNSIGNED (type))
2170     return type;
2171
2172   return unsigned_type_for (type);
2173 }
2174
2175 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2176    the bitmap to that we should store it.  */
2177
2178 static struct ivopts_data *fd_ivopts_data;
2179 static tree
2180 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2181 {
2182   bitmap *depends_on = (bitmap *) data;
2183   struct version_info *info;
2184
2185   if (TREE_CODE (*expr_p) != SSA_NAME)
2186     return NULL_TREE;
2187   info = name_info (fd_ivopts_data, *expr_p);
2188
2189   if (!info->inv_id || info->has_nonlin_use)
2190     return NULL_TREE;
2191
2192   if (!*depends_on)
2193     *depends_on = BITMAP_ALLOC (NULL);
2194   bitmap_set_bit (*depends_on, info->inv_id);
2195
2196   return NULL_TREE;
2197 }
2198
2199 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2200    position to POS.  If USE is not NULL, the candidate is set as related to
2201    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2202    replacement of the final value of the iv by a direct computation.  */
2203
2204 static struct iv_cand *
2205 add_candidate_1 (struct ivopts_data *data,
2206                  tree base, tree step, bool important, enum iv_position pos,
2207                  struct iv_use *use, gimple incremented_at)
2208 {
2209   unsigned i;
2210   struct iv_cand *cand = NULL;
2211   tree type, orig_type;
2212
2213   /* For non-original variables, make sure their values are computed in a type
2214      that does not invoke undefined behavior on overflows (since in general,
2215      we cannot prove that these induction variables are non-wrapping).  */
2216   if (pos != IP_ORIGINAL)
2217     {
2218       orig_type = TREE_TYPE (base);
2219       type = generic_type_for (orig_type);
2220       if (type != orig_type)
2221         {
2222           base = fold_convert (type, base);
2223           step = fold_convert (type, step);
2224         }
2225     }
2226
2227   for (i = 0; i < n_iv_cands (data); i++)
2228     {
2229       cand = iv_cand (data, i);
2230
2231       if (cand->pos != pos)
2232         continue;
2233
2234       if (cand->incremented_at != incremented_at
2235           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2236               && cand->ainc_use != use))
2237         continue;
2238
2239       if (!cand->iv)
2240         {
2241           if (!base && !step)
2242             break;
2243
2244           continue;
2245         }
2246
2247       if (!base && !step)
2248         continue;
2249
2250       if (operand_equal_p (base, cand->iv->base, 0)
2251           && operand_equal_p (step, cand->iv->step, 0)
2252           && (TYPE_PRECISION (TREE_TYPE (base))
2253               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2254         break;
2255     }
2256
2257   if (i == n_iv_cands (data))
2258     {
2259       cand = XCNEW (struct iv_cand);
2260       cand->id = i;
2261
2262       if (!base && !step)
2263         cand->iv = NULL;
2264       else
2265         cand->iv = alloc_iv (base, step);
2266
2267       cand->pos = pos;
2268       if (pos != IP_ORIGINAL && cand->iv)
2269         {
2270           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2271           cand->var_after = cand->var_before;
2272         }
2273       cand->important = important;
2274       cand->incremented_at = incremented_at;
2275       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2276
2277       if (step
2278           && TREE_CODE (step) != INTEGER_CST)
2279         {
2280           fd_ivopts_data = data;
2281           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2282         }
2283
2284       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2285         cand->ainc_use = use;
2286       else
2287         cand->ainc_use = NULL;
2288
2289       if (dump_file && (dump_flags & TDF_DETAILS))
2290         dump_cand (dump_file, cand);
2291     }
2292
2293   if (important && !cand->important)
2294     {
2295       cand->important = true;
2296       if (dump_file && (dump_flags & TDF_DETAILS))
2297         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2298     }
2299
2300   if (use)
2301     {
2302       bitmap_set_bit (use->related_cands, i);
2303       if (dump_file && (dump_flags & TDF_DETAILS))
2304         fprintf (dump_file, "Candidate %d is related to use %d\n",
2305                  cand->id, use->id);
2306     }
2307
2308   return cand;
2309 }
2310
2311 /* Returns true if incrementing the induction variable at the end of the LOOP
2312    is allowed.
2313
2314    The purpose is to avoid splitting latch edge with a biv increment, thus
2315    creating a jump, possibly confusing other optimization passes and leaving
2316    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2317    is not available (so we do not have a better alternative), or if the latch
2318    edge is already nonempty.  */
2319
2320 static bool
2321 allow_ip_end_pos_p (struct loop *loop)
2322 {
2323   if (!ip_normal_pos (loop))
2324     return true;
2325
2326   if (!empty_block_p (ip_end_pos (loop)))
2327     return true;
2328
2329   return false;
2330 }
2331
2332 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2333    Important field is set to IMPORTANT.  */
2334
2335 static void
2336 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2337                         bool important, struct iv_use *use)
2338 {
2339   basic_block use_bb = gimple_bb (use->stmt);
2340   enum machine_mode mem_mode;
2341   unsigned HOST_WIDE_INT cstepi;
2342
2343   /* If we insert the increment in any position other than the standard
2344      ones, we must ensure that it is incremented once per iteration.
2345      It must not be in an inner nested loop, or one side of an if
2346      statement.  */
2347   if (use_bb->loop_father != data->current_loop
2348       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2349       || stmt_could_throw_p (use->stmt)
2350       || !cst_and_fits_in_hwi (step))
2351     return;
2352
2353   cstepi = int_cst_value (step);
2354
2355   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2356   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2357         || USE_STORE_PRE_INCREMENT (mem_mode))
2358        && GET_MODE_SIZE (mem_mode) == cstepi)
2359       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2360            || USE_STORE_PRE_DECREMENT (mem_mode))
2361           && GET_MODE_SIZE (mem_mode) == -cstepi))
2362     {
2363       enum tree_code code = MINUS_EXPR;
2364       tree new_base;
2365       tree new_step = step;
2366
2367       if (POINTER_TYPE_P (TREE_TYPE (base)))
2368         {
2369           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2370           code = POINTER_PLUS_EXPR;
2371         }
2372       else
2373         new_step = fold_convert (TREE_TYPE (base), new_step);
2374       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2375       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2376                        use->stmt);
2377     }
2378   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2379         || USE_STORE_POST_INCREMENT (mem_mode))
2380        && GET_MODE_SIZE (mem_mode) == cstepi)
2381       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2382            || USE_STORE_POST_DECREMENT (mem_mode))
2383           && GET_MODE_SIZE (mem_mode) == -cstepi))
2384     {
2385       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2386                        use->stmt);
2387     }
2388 }
2389
2390 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2391    position to POS.  If USE is not NULL, the candidate is set as related to
2392    it.  The candidate computation is scheduled on all available positions.  */
2393
2394 static void
2395 add_candidate (struct ivopts_data *data,
2396                tree base, tree step, bool important, struct iv_use *use)
2397 {
2398   if (ip_normal_pos (data->current_loop))
2399     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2400   if (ip_end_pos (data->current_loop)
2401       && allow_ip_end_pos_p (data->current_loop))
2402     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2403
2404   if (use != NULL && use->type == USE_ADDRESS)
2405     add_autoinc_candidates (data, base, step, important, use);
2406 }
2407
2408 /* Adds standard iv candidates.  */
2409
2410 static void
2411 add_standard_iv_candidates (struct ivopts_data *data)
2412 {
2413   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2414
2415   /* The same for a double-integer type if it is still fast enough.  */
2416   if (TYPE_PRECISION
2417         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2418       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2419     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2420                    build_int_cst (long_integer_type_node, 1), true, NULL);
2421
2422   /* The same for a double-integer type if it is still fast enough.  */
2423   if (TYPE_PRECISION
2424         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2425       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2426     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2427                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2428 }
2429
2430
2431 /* Adds candidates bases on the old induction variable IV.  */
2432
2433 static void
2434 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2435 {
2436   gimple phi;
2437   tree def;
2438   struct iv_cand *cand;
2439
2440   add_candidate (data, iv->base, iv->step, true, NULL);
2441
2442   /* The same, but with initial value zero.  */
2443   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2444     add_candidate (data, size_int (0), iv->step, true, NULL);
2445   else
2446     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2447                    iv->step, true, NULL);
2448
2449   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2450   if (gimple_code (phi) == GIMPLE_PHI)
2451     {
2452       /* Additionally record the possibility of leaving the original iv
2453          untouched.  */
2454       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2455       cand = add_candidate_1 (data,
2456                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2457                               SSA_NAME_DEF_STMT (def));
2458       cand->var_before = iv->ssa_name;
2459       cand->var_after = def;
2460     }
2461 }
2462
2463 /* Adds candidates based on the old induction variables.  */
2464
2465 static void
2466 add_old_ivs_candidates (struct ivopts_data *data)
2467 {
2468   unsigned i;
2469   struct iv *iv;
2470   bitmap_iterator bi;
2471
2472   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2473     {
2474       iv = ver_info (data, i)->iv;
2475       if (iv && iv->biv_p && !integer_zerop (iv->step))
2476         add_old_iv_candidates (data, iv);
2477     }
2478 }
2479
2480 /* Adds candidates based on the value of the induction variable IV and USE.  */
2481
2482 static void
2483 add_iv_value_candidates (struct ivopts_data *data,
2484                          struct iv *iv, struct iv_use *use)
2485 {
2486   unsigned HOST_WIDE_INT offset;
2487   tree base;
2488   tree basetype;
2489
2490   add_candidate (data, iv->base, iv->step, false, use);
2491
2492   /* The same, but with initial value zero.  Make such variable important,
2493      since it is generic enough so that possibly many uses may be based
2494      on it.  */
2495   basetype = TREE_TYPE (iv->base);
2496   if (POINTER_TYPE_P (basetype))
2497     basetype = sizetype;
2498   add_candidate (data, build_int_cst (basetype, 0),
2499                  iv->step, true, use);
2500
2501   /* Third, try removing the constant offset.  Make sure to even
2502      add a candidate for &a[0] vs. (T *)&a.  */
2503   base = strip_offset (iv->base, &offset);
2504   if (offset
2505       || base != iv->base)
2506     add_candidate (data, base, iv->step, false, use);
2507 }
2508
2509 /* Adds candidates based on the uses.  */
2510
2511 static void
2512 add_derived_ivs_candidates (struct ivopts_data *data)
2513 {
2514   unsigned i;
2515
2516   for (i = 0; i < n_iv_uses (data); i++)
2517     {
2518       struct iv_use *use = iv_use (data, i);
2519
2520       if (!use)
2521         continue;
2522
2523       switch (use->type)
2524         {
2525         case USE_NONLINEAR_EXPR:
2526         case USE_COMPARE:
2527         case USE_ADDRESS:
2528           /* Just add the ivs based on the value of the iv used here.  */
2529           add_iv_value_candidates (data, use->iv, use);
2530           break;
2531
2532         default:
2533           gcc_unreachable ();
2534         }
2535     }
2536 }
2537
2538 /* Record important candidates and add them to related_cands bitmaps
2539    if needed.  */
2540
2541 static void
2542 record_important_candidates (struct ivopts_data *data)
2543 {
2544   unsigned i;
2545   struct iv_use *use;
2546
2547   for (i = 0; i < n_iv_cands (data); i++)
2548     {
2549       struct iv_cand *cand = iv_cand (data, i);
2550
2551       if (cand->important)
2552         bitmap_set_bit (data->important_candidates, i);
2553     }
2554
2555   data->consider_all_candidates = (n_iv_cands (data)
2556                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2557
2558   if (data->consider_all_candidates)
2559     {
2560       /* We will not need "related_cands" bitmaps in this case,
2561          so release them to decrease peak memory consumption.  */
2562       for (i = 0; i < n_iv_uses (data); i++)
2563         {
2564           use = iv_use (data, i);
2565           BITMAP_FREE (use->related_cands);
2566         }
2567     }
2568   else
2569     {
2570       /* Add important candidates to the related_cands bitmaps.  */
2571       for (i = 0; i < n_iv_uses (data); i++)
2572         bitmap_ior_into (iv_use (data, i)->related_cands,
2573                          data->important_candidates);
2574     }
2575 }
2576
2577 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2578    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2579    we allocate a simple list to every use.  */
2580
2581 static void
2582 alloc_use_cost_map (struct ivopts_data *data)
2583 {
2584   unsigned i, size, s, j;
2585
2586   for (i = 0; i < n_iv_uses (data); i++)
2587     {
2588       struct iv_use *use = iv_use (data, i);
2589       bitmap_iterator bi;
2590
2591       if (data->consider_all_candidates)
2592         size = n_iv_cands (data);
2593       else
2594         {
2595           s = 0;
2596           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2597             {
2598               s++;
2599             }
2600
2601           /* Round up to the power of two, so that moduling by it is fast.  */
2602           for (size = 1; size < s; size <<= 1)
2603             continue;
2604         }
2605
2606       use->n_map_members = size;
2607       use->cost_map = XCNEWVEC (struct cost_pair, size);
2608     }
2609 }
2610
2611 /* Returns description of computation cost of expression whose runtime
2612    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2613
2614 static comp_cost
2615 new_cost (unsigned runtime, unsigned complexity)
2616 {
2617   comp_cost cost;
2618
2619   cost.cost = runtime;
2620   cost.complexity = complexity;
2621
2622   return cost;
2623 }
2624
2625 /* Adds costs COST1 and COST2.  */
2626
2627 static comp_cost
2628 add_costs (comp_cost cost1, comp_cost cost2)
2629 {
2630   cost1.cost += cost2.cost;
2631   cost1.complexity += cost2.complexity;
2632
2633   return cost1;
2634 }
2635 /* Subtracts costs COST1 and COST2.  */
2636
2637 static comp_cost
2638 sub_costs (comp_cost cost1, comp_cost cost2)
2639 {
2640   cost1.cost -= cost2.cost;
2641   cost1.complexity -= cost2.complexity;
2642
2643   return cost1;
2644 }
2645
2646 /* Returns a negative number if COST1 < COST2, a positive number if
2647    COST1 > COST2, and 0 if COST1 = COST2.  */
2648
2649 static int
2650 compare_costs (comp_cost cost1, comp_cost cost2)
2651 {
2652   if (cost1.cost == cost2.cost)
2653     return cost1.complexity - cost2.complexity;
2654
2655   return cost1.cost - cost2.cost;
2656 }
2657
2658 /* Returns true if COST is infinite.  */
2659
2660 static bool
2661 infinite_cost_p (comp_cost cost)
2662 {
2663   return cost.cost == INFTY;
2664 }
2665
2666 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2667    on invariants DEPENDS_ON and that the value used in expressing it
2668    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2669
2670 static void
2671 set_use_iv_cost (struct ivopts_data *data,
2672                  struct iv_use *use, struct iv_cand *cand,
2673                  comp_cost cost, bitmap depends_on, tree value,
2674                  enum tree_code comp, int inv_expr_id)
2675 {
2676   unsigned i, s;
2677
2678   if (infinite_cost_p (cost))
2679     {
2680       BITMAP_FREE (depends_on);
2681       return;
2682     }
2683
2684   if (data->consider_all_candidates)
2685     {
2686       use->cost_map[cand->id].cand = cand;
2687       use->cost_map[cand->id].cost = cost;
2688       use->cost_map[cand->id].depends_on = depends_on;
2689       use->cost_map[cand->id].value = value;
2690       use->cost_map[cand->id].comp = comp;
2691       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2692       return;
2693     }
2694
2695   /* n_map_members is a power of two, so this computes modulo.  */
2696   s = cand->id & (use->n_map_members - 1);
2697   for (i = s; i < use->n_map_members; i++)
2698     if (!use->cost_map[i].cand)
2699       goto found;
2700   for (i = 0; i < s; i++)
2701     if (!use->cost_map[i].cand)
2702       goto found;
2703
2704   gcc_unreachable ();
2705
2706 found:
2707   use->cost_map[i].cand = cand;
2708   use->cost_map[i].cost = cost;
2709   use->cost_map[i].depends_on = depends_on;
2710   use->cost_map[i].value = value;
2711   use->cost_map[i].comp = comp;
2712   use->cost_map[i].inv_expr_id = inv_expr_id;
2713 }
2714
2715 /* Gets cost of (USE, CANDIDATE) pair.  */
2716
2717 static struct cost_pair *
2718 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2719                  struct iv_cand *cand)
2720 {
2721   unsigned i, s;
2722   struct cost_pair *ret;
2723
2724   if (!cand)
2725     return NULL;
2726
2727   if (data->consider_all_candidates)
2728     {
2729       ret = use->cost_map + cand->id;
2730       if (!ret->cand)
2731         return NULL;
2732
2733       return ret;
2734     }
2735
2736   /* n_map_members is a power of two, so this computes modulo.  */
2737   s = cand->id & (use->n_map_members - 1);
2738   for (i = s; i < use->n_map_members; i++)
2739     if (use->cost_map[i].cand == cand)
2740       return use->cost_map + i;
2741
2742   for (i = 0; i < s; i++)
2743     if (use->cost_map[i].cand == cand)
2744       return use->cost_map + i;
2745
2746   return NULL;
2747 }
2748
2749 /* Returns estimate on cost of computing SEQ.  */
2750
2751 static unsigned
2752 seq_cost (rtx seq, bool speed)
2753 {
2754   unsigned cost = 0;
2755   rtx set;
2756
2757   for (; seq; seq = NEXT_INSN (seq))
2758     {
2759       set = single_set (seq);
2760       if (set)
2761         cost += set_src_cost (SET_SRC (set), speed);
2762       else
2763         cost++;
2764     }
2765
2766   return cost;
2767 }
2768
2769 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2770 static rtx
2771 produce_memory_decl_rtl (tree obj, int *regno)
2772 {
2773   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2774   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2775   rtx x;
2776
2777   gcc_assert (obj);
2778   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2779     {
2780       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2781       x = gen_rtx_SYMBOL_REF (address_mode, name);
2782       SET_SYMBOL_REF_DECL (x, obj);
2783       x = gen_rtx_MEM (DECL_MODE (obj), x);
2784       set_mem_addr_space (x, as);
2785       targetm.encode_section_info (obj, x, true);
2786     }
2787   else
2788     {
2789       x = gen_raw_REG (address_mode, (*regno)++);
2790       x = gen_rtx_MEM (DECL_MODE (obj), x);
2791       set_mem_addr_space (x, as);
2792     }
2793
2794   return x;
2795 }
2796
2797 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2798    walk_tree.  DATA contains the actual fake register number.  */
2799
2800 static tree
2801 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2802 {
2803   tree obj = NULL_TREE;
2804   rtx x = NULL_RTX;
2805   int *regno = (int *) data;
2806
2807   switch (TREE_CODE (*expr_p))
2808     {
2809     case ADDR_EXPR:
2810       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2811            handled_component_p (*expr_p);
2812            expr_p = &TREE_OPERAND (*expr_p, 0))
2813         continue;
2814       obj = *expr_p;
2815       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2816         x = produce_memory_decl_rtl (obj, regno);
2817       break;
2818
2819     case SSA_NAME:
2820       *ws = 0;
2821       obj = SSA_NAME_VAR (*expr_p);
2822       if (!DECL_RTL_SET_P (obj))
2823         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2824       break;
2825
2826     case VAR_DECL:
2827     case PARM_DECL:
2828     case RESULT_DECL:
2829       *ws = 0;
2830       obj = *expr_p;
2831
2832       if (DECL_RTL_SET_P (obj))
2833         break;
2834
2835       if (DECL_MODE (obj) == BLKmode)
2836         x = produce_memory_decl_rtl (obj, regno);
2837       else
2838         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2839
2840       break;
2841
2842     default:
2843       break;
2844     }
2845
2846   if (x)
2847     {
2848       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2849       SET_DECL_RTL (obj, x);
2850     }
2851
2852   return NULL_TREE;
2853 }
2854
2855 /* Determines cost of the computation of EXPR.  */
2856
2857 static unsigned
2858 computation_cost (tree expr, bool speed)
2859 {
2860   rtx seq, rslt;
2861   tree type = TREE_TYPE (expr);
2862   unsigned cost;
2863   /* Avoid using hard regs in ways which may be unsupported.  */
2864   int regno = LAST_VIRTUAL_REGISTER + 1;
2865   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2866   enum node_frequency real_frequency = node->frequency;
2867
2868   node->frequency = NODE_FREQUENCY_NORMAL;
2869   crtl->maybe_hot_insn_p = speed;
2870   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2871   start_sequence ();
2872   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2873   seq = get_insns ();
2874   end_sequence ();
2875   default_rtl_profile ();
2876   node->frequency = real_frequency;
2877
2878   cost = seq_cost (seq, speed);
2879   if (MEM_P (rslt))
2880     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2881                           TYPE_ADDR_SPACE (type), speed);
2882   else if (!REG_P (rslt))
2883     cost += set_src_cost (rslt, speed);
2884
2885   return cost;
2886 }
2887
2888 /* Returns variable containing the value of candidate CAND at statement AT.  */
2889
2890 static tree
2891 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2892 {
2893   if (stmt_after_increment (loop, cand, stmt))
2894     return cand->var_after;
2895   else
2896     return cand->var_before;
2897 }
2898
2899 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2900    same precision that is at least as wide as the precision of TYPE, stores
2901    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2902    type of A and B.  */
2903
2904 static tree
2905 determine_common_wider_type (tree *a, tree *b)
2906 {
2907   tree wider_type = NULL;
2908   tree suba, subb;
2909   tree atype = TREE_TYPE (*a);
2910
2911   if (CONVERT_EXPR_P (*a))
2912     {
2913       suba = TREE_OPERAND (*a, 0);
2914       wider_type = TREE_TYPE (suba);
2915       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2916         return atype;
2917     }
2918   else
2919     return atype;
2920
2921   if (CONVERT_EXPR_P (*b))
2922     {
2923       subb = TREE_OPERAND (*b, 0);
2924       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2925         return atype;
2926     }
2927   else
2928     return atype;
2929
2930   *a = suba;
2931   *b = subb;
2932   return wider_type;
2933 }
2934
2935 /* Determines the expression by that USE is expressed from induction variable
2936    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2937    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2938
2939 static bool
2940 get_computation_aff (struct loop *loop,
2941                      struct iv_use *use, struct iv_cand *cand, gimple at,
2942                      struct affine_tree_combination *aff)
2943 {
2944   tree ubase = use->iv->base;
2945   tree ustep = use->iv->step;
2946   tree cbase = cand->iv->base;
2947   tree cstep = cand->iv->step, cstep_common;
2948   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2949   tree common_type, var;
2950   tree uutype;
2951   aff_tree cbase_aff, var_aff;
2952   double_int rat;
2953
2954   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2955     {
2956       /* We do not have a precision to express the values of use.  */
2957       return false;
2958     }
2959
2960   var = var_at_stmt (loop, cand, at);
2961   uutype = unsigned_type_for (utype);
2962
2963   /* If the conversion is not noop, perform it.  */
2964   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2965     {
2966       cstep = fold_convert (uutype, cstep);
2967       cbase = fold_convert (uutype, cbase);
2968       var = fold_convert (uutype, var);
2969     }
2970
2971   if (!constant_multiple_of (ustep, cstep, &rat))
2972     return false;
2973
2974   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2975      type, we achieve better folding by computing their difference in this
2976      wider type, and cast the result to UUTYPE.  We do not need to worry about
2977      overflows, as all the arithmetics will in the end be performed in UUTYPE
2978      anyway.  */
2979   common_type = determine_common_wider_type (&ubase, &cbase);
2980
2981   /* use = ubase - ratio * cbase + ratio * var.  */
2982   tree_to_aff_combination (ubase, common_type, aff);
2983   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2984   tree_to_aff_combination (var, uutype, &var_aff);
2985
2986   /* We need to shift the value if we are after the increment.  */
2987   if (stmt_after_increment (loop, cand, at))
2988     {
2989       aff_tree cstep_aff;
2990
2991       if (common_type != uutype)
2992         cstep_common = fold_convert (common_type, cstep);
2993       else
2994         cstep_common = cstep;
2995
2996       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2997       aff_combination_add (&cbase_aff, &cstep_aff);
2998     }
2999
3000   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3001   aff_combination_add (aff, &cbase_aff);
3002   if (common_type != uutype)
3003     aff_combination_convert (aff, uutype);
3004
3005   aff_combination_scale (&var_aff, rat);
3006   aff_combination_add (aff, &var_aff);
3007
3008   return true;
3009 }
3010
3011 /* Determines the expression by that USE is expressed from induction variable
3012    CAND at statement AT in LOOP.  The computation is unshared.  */
3013
3014 static tree
3015 get_computation_at (struct loop *loop,
3016                     struct iv_use *use, struct iv_cand *cand, gimple at)
3017 {
3018   aff_tree aff;
3019   tree type = TREE_TYPE (use->iv->base);
3020
3021   if (!get_computation_aff (loop, use, cand, at, &aff))
3022     return NULL_TREE;
3023   unshare_aff_combination (&aff);
3024   return fold_convert (type, aff_combination_to_tree (&aff));
3025 }
3026
3027 /* Determines the expression by that USE is expressed from induction variable
3028    CAND in LOOP.  The computation is unshared.  */
3029
3030 static tree
3031 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3032 {
3033   return get_computation_at (loop, use, cand, use->stmt);
3034 }
3035
3036 /* Adjust the cost COST for being in loop setup rather than loop body.
3037    If we're optimizing for space, the loop setup overhead is constant;
3038    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3039 static unsigned
3040 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3041 {
3042   if (cost == INFTY)
3043     return cost;
3044   else if (optimize_loop_for_speed_p (data->current_loop))
3045     return cost / avg_loop_niter (data->current_loop);
3046   else
3047     return cost;
3048 }
3049
3050 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3051    validity for a memory reference accessing memory of mode MODE in
3052    address space AS.  */
3053
3054 DEF_VEC_P (sbitmap);
3055 DEF_VEC_ALLOC_P (sbitmap, heap);
3056
3057 bool
3058 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3059                                  addr_space_t as)
3060 {
3061 #define MAX_RATIO 128
3062   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3063   static VEC (sbitmap, heap) *valid_mult_list;
3064   sbitmap valid_mult;
3065
3066   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3067     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3068
3069   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3070   if (!valid_mult)
3071     {
3072       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3073       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3074       rtx addr;
3075       HOST_WIDE_INT i;
3076
3077       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3078       sbitmap_zero (valid_mult);
3079       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3080       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3081         {
3082           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3083           if (memory_address_addr_space_p (mode, addr, as))
3084             SET_BIT (valid_mult, i + MAX_RATIO);
3085         }
3086
3087       if (dump_file && (dump_flags & TDF_DETAILS))
3088         {
3089           fprintf (dump_file, "  allowed multipliers:");
3090           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3091             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3092               fprintf (dump_file, " %d", (int) i);
3093           fprintf (dump_file, "\n");
3094           fprintf (dump_file, "\n");
3095         }
3096
3097       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3098     }
3099
3100   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3101     return false;
3102
3103   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3104 }
3105
3106 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3107    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3108    variable is omitted.  Compute the cost for a memory reference that accesses
3109    a memory location of mode MEM_MODE in address space AS.
3110
3111    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3112    size of MEM_MODE / RATIO) is available.  To make this determination, we
3113    look at the size of the increment to be made, which is given in CSTEP.
3114    CSTEP may be zero if the step is unknown.
3115    STMT_AFTER_INC is true iff the statement we're looking at is after the
3116    increment of the original biv.
3117
3118    TODO -- there must be some better way.  This all is quite crude.  */
3119
3120 typedef struct
3121 {
3122   HOST_WIDE_INT min_offset, max_offset;
3123   unsigned costs[2][2][2][2];
3124 } *address_cost_data;
3125
3126 DEF_VEC_P (address_cost_data);
3127 DEF_VEC_ALLOC_P (address_cost_data, heap);
3128
3129 static comp_cost
3130 get_address_cost (bool symbol_present, bool var_present,
3131                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3132                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3133                   addr_space_t as, bool speed,
3134                   bool stmt_after_inc, bool *may_autoinc)
3135 {
3136   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3137   static VEC(address_cost_data, heap) *address_cost_data_list;
3138   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3139   address_cost_data data;
3140   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3141   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3142   unsigned cost, acost, complexity;
3143   bool offset_p, ratio_p, autoinc;
3144   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3145   unsigned HOST_WIDE_INT mask;
3146   unsigned bits;
3147
3148   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3149     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3150                            data_index + 1);
3151
3152   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3153   if (!data)
3154     {
3155       HOST_WIDE_INT i;
3156       HOST_WIDE_INT rat, off = 0;
3157       int old_cse_not_expected, width;
3158       unsigned sym_p, var_p, off_p, rat_p, add_c;
3159       rtx seq, addr, base;
3160       rtx reg0, reg1;
3161
3162       data = (address_cost_data) xcalloc (1, sizeof (*data));
3163
3164       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3165
3166       width = GET_MODE_BITSIZE (address_mode) - 1;
3167       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3168         width = HOST_BITS_PER_WIDE_INT - 1;
3169       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3170
3171       for (i = width; i >= 0; i--)
3172         {
3173           off = -((HOST_WIDE_INT) 1 << i);
3174           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3175           if (memory_address_addr_space_p (mem_mode, addr, as))
3176             break;
3177         }
3178       data->min_offset = (i == -1? 0 : off);
3179
3180       for (i = width; i >= 0; i--)
3181         {
3182           off = ((HOST_WIDE_INT) 1 << i) - 1;
3183           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3184           if (memory_address_addr_space_p (mem_mode, addr, as))
3185             break;
3186         }
3187       if (i == -1)
3188         off = 0;
3189       data->max_offset = off;
3190
3191       if (dump_file && (dump_flags & TDF_DETAILS))
3192         {
3193           fprintf (dump_file, "get_address_cost:\n");
3194           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3195                    GET_MODE_NAME (mem_mode),
3196                    data->min_offset);
3197           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3198                    GET_MODE_NAME (mem_mode),
3199                    data->max_offset);
3200         }
3201
3202       rat = 1;
3203       for (i = 2; i <= MAX_RATIO; i++)
3204         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3205           {
3206             rat = i;
3207             break;
3208           }
3209
3210       /* Compute the cost of various addressing modes.  */
3211       acost = 0;
3212       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3213       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3214
3215       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3216           || USE_STORE_PRE_DECREMENT (mem_mode))
3217         {
3218           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3219           has_predec[mem_mode]
3220             = memory_address_addr_space_p (mem_mode, addr, as);
3221         }
3222       if (USE_LOAD_POST_DECREMENT (mem_mode)
3223           || USE_STORE_POST_DECREMENT (mem_mode))
3224         {
3225           addr = gen_rtx_POST_DEC (address_mode, reg0);
3226           has_postdec[mem_mode]
3227             = memory_address_addr_space_p (mem_mode, addr, as);
3228         }
3229       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3230           || USE_STORE_PRE_DECREMENT (mem_mode))
3231         {
3232           addr = gen_rtx_PRE_INC (address_mode, reg0);
3233           has_preinc[mem_mode]
3234             = memory_address_addr_space_p (mem_mode, addr, as);
3235         }
3236       if (USE_LOAD_POST_INCREMENT (mem_mode)
3237           || USE_STORE_POST_INCREMENT (mem_mode))
3238         {
3239           addr = gen_rtx_POST_INC (address_mode, reg0);
3240           has_postinc[mem_mode]
3241             = memory_address_addr_space_p (mem_mode, addr, as);
3242         }
3243       for (i = 0; i < 16; i++)
3244         {
3245           sym_p = i & 1;
3246           var_p = (i >> 1) & 1;
3247           off_p = (i >> 2) & 1;
3248           rat_p = (i >> 3) & 1;
3249
3250           addr = reg0;
3251           if (rat_p)
3252             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3253                                    gen_int_mode (rat, address_mode));
3254
3255           if (var_p)
3256             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3257
3258           if (sym_p)
3259             {
3260               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3261               /* ??? We can run into trouble with some backends by presenting
3262                  it with symbols which haven't been properly passed through
3263                  targetm.encode_section_info.  By setting the local bit, we
3264                  enhance the probability of things working.  */
3265               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3266
3267               if (off_p)
3268                 base = gen_rtx_fmt_e (CONST, address_mode,
3269                                       gen_rtx_fmt_ee
3270                                         (PLUS, address_mode, base,
3271                                          gen_int_mode (off, address_mode)));
3272             }
3273           else if (off_p)
3274             base = gen_int_mode (off, address_mode);
3275           else
3276             base = NULL_RTX;
3277
3278           if (base)
3279             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3280
3281           start_sequence ();
3282           /* To avoid splitting addressing modes, pretend that no cse will
3283              follow.  */
3284           old_cse_not_expected = cse_not_expected;
3285           cse_not_expected = true;
3286           addr = memory_address_addr_space (mem_mode, addr, as);
3287           cse_not_expected = old_cse_not_expected;
3288           seq = get_insns ();
3289           end_sequence ();
3290
3291           acost = seq_cost (seq, speed);
3292           acost += address_cost (addr, mem_mode, as, speed);
3293
3294           if (!acost)
3295             acost = 1;
3296           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3297         }
3298
3299       /* On some targets, it is quite expensive to load symbol to a register,
3300          which makes addresses that contain symbols look much more expensive.
3301          However, the symbol will have to be loaded in any case before the
3302          loop (and quite likely we have it in register already), so it does not
3303          make much sense to penalize them too heavily.  So make some final
3304          tweaks for the SYMBOL_PRESENT modes:
3305
3306          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3307          var is cheaper, use this mode with small penalty.
3308          If VAR_PRESENT is true, try whether the mode with
3309          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3310          if this is the case, use it.  */
3311       add_c = add_cost[speed][address_mode];
3312       for (i = 0; i < 8; i++)
3313         {
3314           var_p = i & 1;
3315           off_p = (i >> 1) & 1;
3316           rat_p = (i >> 2) & 1;
3317
3318           acost = data->costs[0][1][off_p][rat_p] + 1;
3319           if (var_p)
3320             acost += add_c;
3321
3322           if (acost < data->costs[1][var_p][off_p][rat_p])
3323             data->costs[1][var_p][off_p][rat_p] = acost;
3324         }
3325
3326       if (dump_file && (dump_flags & TDF_DETAILS))
3327         {
3328           fprintf (dump_file, "Address costs:\n");
3329
3330           for (i = 0; i < 16; i++)
3331             {
3332               sym_p = i & 1;
3333               var_p = (i >> 1) & 1;
3334               off_p = (i >> 2) & 1;
3335               rat_p = (i >> 3) & 1;
3336
3337               fprintf (dump_file, "  ");
3338               if (sym_p)
3339                 fprintf (dump_file, "sym + ");
3340               if (var_p)
3341                 fprintf (dump_file, "var + ");
3342               if (off_p)
3343                 fprintf (dump_file, "cst + ");
3344               if (rat_p)
3345                 fprintf (dump_file, "rat * ");
3346
3347               acost = data->costs[sym_p][var_p][off_p][rat_p];
3348               fprintf (dump_file, "index costs %d\n", acost);
3349             }
3350           if (has_predec[mem_mode] || has_postdec[mem_mode]
3351               || has_preinc[mem_mode] || has_postinc[mem_mode])
3352             fprintf (dump_file, "  May include autoinc/dec\n");
3353           fprintf (dump_file, "\n");
3354         }
3355
3356       VEC_replace (address_cost_data, address_cost_data_list,
3357                    data_index, data);
3358     }
3359
3360   bits = GET_MODE_BITSIZE (address_mode);
3361   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3362   offset &= mask;
3363   if ((offset >> (bits - 1) & 1))
3364     offset |= ~mask;
3365   s_offset = offset;
3366
3367   autoinc = false;
3368   msize = GET_MODE_SIZE (mem_mode);
3369   autoinc_offset = offset;
3370   if (stmt_after_inc)
3371     autoinc_offset += ratio * cstep;
3372   if (symbol_present || var_present || ratio != 1)
3373     autoinc = false;
3374   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3375                && msize == cstep)
3376            || (has_postdec[mem_mode] && autoinc_offset == 0
3377                && msize == -cstep)
3378            || (has_preinc[mem_mode] && autoinc_offset == msize
3379                && msize == cstep)
3380            || (has_predec[mem_mode] && autoinc_offset == -msize
3381                && msize == -cstep))
3382     autoinc = true;
3383
3384   cost = 0;
3385   offset_p = (s_offset != 0
3386               && data->min_offset <= s_offset
3387               && s_offset <= data->max_offset);
3388   ratio_p = (ratio != 1
3389              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3390
3391   if (ratio != 1 && !ratio_p)
3392     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3393
3394   if (s_offset && !offset_p && !symbol_present)
3395     cost += add_cost[speed][address_mode];
3396
3397   if (may_autoinc)
3398     *may_autoinc = autoinc;
3399   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3400   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3401   return new_cost (cost + acost, complexity);
3402 }
3403
3404  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3405     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3406     calculating the operands of EXPR.  Returns true if successful, and returns
3407     the cost in COST.  */
3408
3409 static bool
3410 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3411                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3412 {
3413   comp_cost res;
3414   tree op1 = TREE_OPERAND (expr, 1);
3415   tree cst = TREE_OPERAND (mult, 1);
3416   tree multop = TREE_OPERAND (mult, 0);
3417   int m = exact_log2 (int_cst_value (cst));
3418   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3419   int sa_cost;
3420
3421   if (!(m >= 0 && m < maxm))
3422     return false;
3423
3424   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3425              ? shiftadd_cost[speed][mode][m]
3426              : (mult == op1
3427                 ? shiftsub1_cost[speed][mode][m]
3428                 : shiftsub0_cost[speed][mode][m]));
3429   res = new_cost (sa_cost, 0);
3430   res = add_costs (res, mult == op1 ? cost0 : cost1);
3431
3432   STRIP_NOPS (multop);
3433   if (!is_gimple_val (multop))
3434     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3435
3436   *cost = res;
3437   return true;
3438 }
3439
3440 /* Estimates cost of forcing expression EXPR into a variable.  */
3441
3442 static comp_cost
3443 force_expr_to_var_cost (tree expr, bool speed)
3444 {
3445   static bool costs_initialized = false;
3446   static unsigned integer_cost [2];
3447   static unsigned symbol_cost [2];
3448   static unsigned address_cost [2];
3449   tree op0, op1;
3450   comp_cost cost0, cost1, cost;
3451   enum machine_mode mode;
3452
3453   if (!costs_initialized)
3454     {
3455       tree type = build_pointer_type (integer_type_node);
3456       tree var, addr;
3457       rtx x;
3458       int i;
3459
3460       var = create_tmp_var_raw (integer_type_node, "test_var");
3461       TREE_STATIC (var) = 1;
3462       x = produce_memory_decl_rtl (var, NULL);
3463       SET_DECL_RTL (var, x);
3464
3465       addr = build1 (ADDR_EXPR, type, var);
3466
3467
3468       for (i = 0; i < 2; i++)
3469         {
3470           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3471                                                              2000), i);
3472
3473           symbol_cost[i] = computation_cost (addr, i) + 1;
3474
3475           address_cost[i]
3476             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3477           if (dump_file && (dump_flags & TDF_DETAILS))
3478             {
3479               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3480               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3481               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3482               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3483               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3484               fprintf (dump_file, "\n");
3485             }
3486         }
3487
3488       costs_initialized = true;
3489     }
3490
3491   STRIP_NOPS (expr);
3492
3493   if (SSA_VAR_P (expr))
3494     return no_cost;
3495
3496   if (is_gimple_min_invariant (expr))
3497     {
3498       if (TREE_CODE (expr) == INTEGER_CST)
3499         return new_cost (integer_cost [speed], 0);
3500
3501       if (TREE_CODE (expr) == ADDR_EXPR)
3502         {
3503           tree obj = TREE_OPERAND (expr, 0);
3504
3505           if (TREE_CODE (obj) == VAR_DECL
3506               || TREE_CODE (obj) == PARM_DECL
3507               || TREE_CODE (obj) == RESULT_DECL)
3508             return new_cost (symbol_cost [speed], 0);
3509         }
3510
3511       return new_cost (address_cost [speed], 0);
3512     }
3513
3514   switch (TREE_CODE (expr))
3515     {
3516     case POINTER_PLUS_EXPR:
3517     case PLUS_EXPR:
3518     case MINUS_EXPR:
3519     case MULT_EXPR:
3520       op0 = TREE_OPERAND (expr, 0);
3521       op1 = TREE_OPERAND (expr, 1);
3522       STRIP_NOPS (op0);
3523       STRIP_NOPS (op1);
3524
3525       if (is_gimple_val (op0))
3526         cost0 = no_cost;
3527       else
3528         cost0 = force_expr_to_var_cost (op0, speed);
3529
3530       if (is_gimple_val (op1))
3531         cost1 = no_cost;
3532       else
3533         cost1 = force_expr_to_var_cost (op1, speed);
3534
3535       break;
3536
3537     case NEGATE_EXPR:
3538       op0 = TREE_OPERAND (expr, 0);
3539       STRIP_NOPS (op0);
3540       op1 = NULL_TREE;
3541
3542       if (is_gimple_val (op0))
3543         cost0 = no_cost;
3544       else
3545         cost0 = force_expr_to_var_cost (op0, speed);
3546
3547       cost1 = no_cost;
3548       break;
3549
3550     default:
3551       /* Just an arbitrary value, FIXME.  */
3552       return new_cost (target_spill_cost[speed], 0);
3553     }
3554
3555   mode = TYPE_MODE (TREE_TYPE (expr));
3556   switch (TREE_CODE (expr))
3557     {
3558     case POINTER_PLUS_EXPR:
3559     case PLUS_EXPR:
3560     case MINUS_EXPR:
3561     case NEGATE_EXPR:
3562       cost = new_cost (add_cost[speed][mode], 0);
3563       if (TREE_CODE (expr) != NEGATE_EXPR)
3564         {
3565           tree mult = NULL_TREE;
3566           comp_cost sa_cost;
3567           if (TREE_CODE (op1) == MULT_EXPR)
3568             mult = op1;
3569           else if (TREE_CODE (op0) == MULT_EXPR)
3570             mult = op0;
3571
3572           if (mult != NULL_TREE
3573               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3574               && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3575                                     &sa_cost))
3576             return sa_cost;
3577         }
3578       break;
3579
3580     case MULT_EXPR:
3581       if (cst_and_fits_in_hwi (op0))
3582         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3583                                              mode, speed), 0);
3584       else if (cst_and_fits_in_hwi (op1))
3585         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3586                                              mode, speed), 0);
3587       else
3588         return new_cost (target_spill_cost [speed], 0);
3589       break;
3590
3591     default:
3592       gcc_unreachable ();
3593     }
3594
3595   cost = add_costs (cost, cost0);
3596   cost = add_costs (cost, cost1);
3597
3598   /* Bound the cost by target_spill_cost.  The parts of complicated
3599      computations often are either loop invariant or at least can
3600      be shared between several iv uses, so letting this grow without
3601      limits would not give reasonable results.  */
3602   if (cost.cost > (int) target_spill_cost [speed])
3603     cost.cost = target_spill_cost [speed];
3604
3605   return cost;
3606 }
3607
3608 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3609    invariants the computation depends on.  */
3610
3611 static comp_cost
3612 force_var_cost (struct ivopts_data *data,
3613                 tree expr, bitmap *depends_on)
3614 {
3615   if (depends_on)
3616     {
3617       fd_ivopts_data = data;
3618       walk_tree (&expr, find_depends, depends_on, NULL);
3619     }
3620
3621   return force_expr_to_var_cost (expr, data->speed);
3622 }
3623
3624 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3625    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3626    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3627    invariants the computation depends on.  */
3628
3629 static comp_cost
3630 split_address_cost (struct ivopts_data *data,
3631                     tree addr, bool *symbol_present, bool *var_present,
3632                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3633 {
3634   tree core;
3635   HOST_WIDE_INT bitsize;
3636   HOST_WIDE_INT bitpos;
3637   tree toffset;
3638   enum machine_mode mode;
3639   int unsignedp, volatilep;
3640
3641   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3642                               &unsignedp, &volatilep, false);
3643
3644   if (toffset != 0
3645       || bitpos % BITS_PER_UNIT != 0
3646       || TREE_CODE (core) != VAR_DECL)
3647     {
3648       *symbol_present = false;
3649       *var_present = true;
3650       fd_ivopts_data = data;
3651       walk_tree (&addr, find_depends, depends_on, NULL);
3652       return new_cost (target_spill_cost[data->speed], 0);
3653     }
3654
3655   *offset += bitpos / BITS_PER_UNIT;
3656   if (TREE_STATIC (core)
3657       || DECL_EXTERNAL (core))
3658     {
3659       *symbol_present = true;
3660       *var_present = false;
3661       return no_cost;
3662     }
3663
3664   *symbol_present = false;
3665   *var_present = true;
3666   return no_cost;
3667 }
3668
3669 /* Estimates cost of expressing difference of addresses E1 - E2 as
3670    var + symbol + offset.  The value of offset is added to OFFSET,
3671    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3672    part is missing.  DEPENDS_ON is a set of the invariants the computation
3673    depends on.  */
3674
3675 static comp_cost
3676 ptr_difference_cost (struct ivopts_data *data,
3677                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3678                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3679 {
3680   HOST_WIDE_INT diff = 0;
3681   aff_tree aff_e1, aff_e2;
3682   tree type;
3683
3684   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3685
3686   if (ptr_difference_const (e1, e2, &diff))
3687     {
3688       *offset += diff;
3689       *symbol_present = false;
3690       *var_present = false;
3691       return no_cost;
3692     }
3693
3694   if (integer_zerop (e2))
3695     return split_address_cost (data, TREE_OPERAND (e1, 0),
3696                                symbol_present, var_present, offset, depends_on);
3697
3698   *symbol_present = false;
3699   *var_present = true;
3700
3701   type = signed_type_for (TREE_TYPE (e1));
3702   tree_to_aff_combination (e1, type, &aff_e1);
3703   tree_to_aff_combination (e2, type, &aff_e2);
3704   aff_combination_scale (&aff_e2, double_int_minus_one);
3705   aff_combination_add (&aff_e1, &aff_e2);
3706
3707   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3708 }
3709
3710 /* Estimates cost of expressing difference E1 - E2 as
3711    var + symbol + offset.  The value of offset is added to OFFSET,
3712    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3713    part is missing.  DEPENDS_ON is a set of the invariants the computation
3714    depends on.  */
3715
3716 static comp_cost
3717 difference_cost (struct ivopts_data *data,
3718                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3719                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3720 {
3721   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3722   unsigned HOST_WIDE_INT off1, off2;
3723   aff_tree aff_e1, aff_e2;
3724   tree type;
3725
3726   e1 = strip_offset (e1, &off1);
3727   e2 = strip_offset (e2, &off2);
3728   *offset += off1 - off2;
3729
3730   STRIP_NOPS (e1);
3731   STRIP_NOPS (e2);
3732
3733   if (TREE_CODE (e1) == ADDR_EXPR)
3734     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3735                                 offset, depends_on);
3736   *symbol_present = false;
3737
3738   if (operand_equal_p (e1, e2, 0))
3739     {
3740       *var_present = false;
3741       return no_cost;
3742     }
3743
3744   *var_present = true;
3745
3746   if (integer_zerop (e2))
3747     return force_var_cost (data, e1, depends_on);
3748
3749   if (integer_zerop (e1))
3750     {
3751       comp_cost cost = force_var_cost (data, e2, depends_on);
3752       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3753       return cost;
3754     }
3755
3756   type = signed_type_for (TREE_TYPE (e1));
3757   tree_to_aff_combination (e1, type, &aff_e1);
3758   tree_to_aff_combination (e2, type, &aff_e2);
3759   aff_combination_scale (&aff_e2, double_int_minus_one);
3760   aff_combination_add (&aff_e1, &aff_e2);
3761
3762   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3763 }
3764
3765 /* Returns true if AFF1 and AFF2 are identical.  */
3766
3767 static bool
3768 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3769 {
3770   unsigned i;
3771
3772   if (aff1->n != aff2->n)
3773     return false;
3774
3775   for (i = 0; i < aff1->n; i++)
3776     {
3777       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3778         return false;
3779
3780       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3781         return false;
3782     }
3783   return true;
3784 }
3785
3786 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3787
3788 static int
3789 get_expr_id (struct ivopts_data *data, tree expr)
3790 {
3791   struct iv_inv_expr_ent ent;
3792   struct iv_inv_expr_ent **slot;
3793
3794   ent.expr = expr;
3795   ent.hash = iterative_hash_expr (expr, 0);
3796   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3797                                                      &ent, INSERT);
3798   if (*slot)
3799     return (*slot)->id;
3800
3801   *slot = XNEW (struct iv_inv_expr_ent);
3802   (*slot)->expr = expr;
3803   (*slot)->hash = ent.hash;
3804   (*slot)->id = data->inv_expr_id++;
3805   return (*slot)->id;
3806 }
3807
3808 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3809    requires a new compiler generated temporary.  Returns -1 otherwise.
3810    ADDRESS_P is a flag indicating if the expression is for address
3811    computation.  */
3812
3813 static int
3814 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3815                             tree cbase, HOST_WIDE_INT ratio,
3816                             bool address_p)
3817 {
3818   aff_tree ubase_aff, cbase_aff;
3819   tree expr, ub, cb;
3820
3821   STRIP_NOPS (ubase);
3822   STRIP_NOPS (cbase);
3823   ub = ubase;
3824   cb = cbase;
3825
3826   if ((TREE_CODE (ubase) == INTEGER_CST)
3827       && (TREE_CODE (cbase) == INTEGER_CST))
3828     return -1;
3829
3830   /* Strips the constant part. */
3831   if (TREE_CODE (ubase) == PLUS_EXPR
3832       || TREE_CODE (ubase) == MINUS_EXPR
3833       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3834     {
3835       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3836         ubase = TREE_OPERAND (ubase, 0);
3837     }
3838
3839   /* Strips the constant part. */
3840   if (TREE_CODE (cbase) == PLUS_EXPR
3841       || TREE_CODE (cbase) == MINUS_EXPR
3842       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3843     {
3844       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3845         cbase = TREE_OPERAND (cbase, 0);
3846     }
3847
3848   if (address_p)
3849     {
3850       if (((TREE_CODE (ubase) == SSA_NAME)
3851            || (TREE_CODE (ubase) == ADDR_EXPR
3852                && is_gimple_min_invariant (ubase)))
3853           && (TREE_CODE (cbase) == INTEGER_CST))
3854         return -1;
3855
3856       if (((TREE_CODE (cbase) == SSA_NAME)
3857            || (TREE_CODE (cbase) == ADDR_EXPR
3858                && is_gimple_min_invariant (cbase)))
3859           && (TREE_CODE (ubase) == INTEGER_CST))
3860         return -1;
3861     }
3862
3863   if (ratio == 1)
3864     {
3865       if(operand_equal_p (ubase, cbase, 0))
3866         return -1;
3867
3868       if (TREE_CODE (ubase) == ADDR_EXPR
3869           && TREE_CODE (cbase) == ADDR_EXPR)
3870         {
3871           tree usym, csym;
3872
3873           usym = TREE_OPERAND (ubase, 0);
3874           csym = TREE_OPERAND (cbase, 0);
3875           if (TREE_CODE (usym) == ARRAY_REF)
3876             {
3877               tree ind = TREE_OPERAND (usym, 1);
3878               if (TREE_CODE (ind) == INTEGER_CST
3879                   && host_integerp (ind, 0)
3880                   && TREE_INT_CST_LOW (ind) == 0)
3881                 usym = TREE_OPERAND (usym, 0);
3882             }
3883           if (TREE_CODE (csym) == ARRAY_REF)
3884             {
3885               tree ind = TREE_OPERAND (csym, 1);
3886               if (TREE_CODE (ind) == INTEGER_CST
3887                   && host_integerp (ind, 0)
3888                   && TREE_INT_CST_LOW (ind) == 0)
3889                 csym = TREE_OPERAND (csym, 0);
3890             }
3891           if (operand_equal_p (usym, csym, 0))
3892             return -1;
3893         }
3894       /* Now do more complex comparison  */
3895       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3896       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3897       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3898         return -1;
3899     }
3900
3901   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3902   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3903
3904   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3905   aff_combination_add (&ubase_aff, &cbase_aff);
3906   expr = aff_combination_to_tree (&ubase_aff);
3907   return get_expr_id (data, expr);
3908 }
3909
3910
3911
3912 /* Determines the cost of the computation by that USE is expressed
3913    from induction variable CAND.  If ADDRESS_P is true, we just need
3914    to create an address from it, otherwise we want to get it into
3915    register.  A set of invariants we depend on is stored in
3916    DEPENDS_ON.  AT is the statement at that the value is computed.
3917    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3918    addressing is likely.  */
3919
3920 static comp_cost
3921 get_computation_cost_at (struct ivopts_data *data,
3922                          struct iv_use *use, struct iv_cand *cand,
3923                          bool address_p, bitmap *depends_on, gimple at,
3924                          bool *can_autoinc,
3925                          int *inv_expr_id)
3926 {
3927   tree ubase = use->iv->base, ustep = use->iv->step;
3928   tree cbase, cstep;
3929   tree utype = TREE_TYPE (ubase), ctype;
3930   unsigned HOST_WIDE_INT cstepi, offset = 0;
3931   HOST_WIDE_INT ratio, aratio;
3932   bool var_present, symbol_present, stmt_is_after_inc;
3933   comp_cost cost;
3934   double_int rat;
3935   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3936
3937   *depends_on = NULL;
3938
3939   /* Only consider real candidates.  */
3940   if (!cand->iv)
3941     return infinite_cost;
3942
3943   cbase = cand->iv->base;
3944   cstep = cand->iv->step;
3945   ctype = TREE_TYPE (cbase);
3946
3947   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3948     {
3949       /* We do not have a precision to express the values of use.  */
3950       return infinite_cost;
3951     }
3952
3953   if (address_p
3954       || (use->iv->base_object
3955           && cand->iv->base_object
3956           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3957           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3958     {
3959       /* Do not try to express address of an object with computation based
3960          on address of a different object.  This may cause problems in rtl
3961          level alias analysis (that does not expect this to be happening,
3962          as this is illegal in C), and would be unlikely to be useful
3963          anyway.  */
3964       if (use->iv->base_object
3965           && cand->iv->base_object
3966           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3967         return infinite_cost;
3968     }
3969
3970   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3971     {
3972       /* TODO -- add direct handling of this case.  */
3973       goto fallback;
3974     }
3975
3976   /* CSTEPI is removed from the offset in case statement is after the
3977      increment.  If the step is not constant, we use zero instead.
3978      This is a bit imprecise (there is the extra addition), but
3979      redundancy elimination is likely to transform the code so that
3980      it uses value of the variable before increment anyway,
3981      so it is not that much unrealistic.  */
3982   if (cst_and_fits_in_hwi (cstep))
3983     cstepi = int_cst_value (cstep);
3984   else
3985     cstepi = 0;
3986
3987   if (!constant_multiple_of (ustep, cstep, &rat))
3988     return infinite_cost;
3989
3990   if (double_int_fits_in_shwi_p (rat))
3991     ratio = double_int_to_shwi (rat);
3992   else
3993     return infinite_cost;
3994
3995   STRIP_NOPS (cbase);
3996   ctype = TREE_TYPE (cbase);
3997
3998   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3999
4000   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4001      or ratio == 1, it is better to handle this like
4002
4003      ubase - ratio * cbase + ratio * var
4004
4005      (also holds in the case ratio == -1, TODO.  */
4006
4007   if (cst_and_fits_in_hwi (cbase))
4008     {
4009       offset = - ratio * int_cst_value (cbase);
4010       cost = difference_cost (data,
4011                               ubase, build_int_cst (utype, 0),
4012                               &symbol_present, &var_present, &offset,
4013                               depends_on);
4014       cost.cost /= avg_loop_niter (data->current_loop);
4015     }
4016   else if (ratio == 1)
4017     {
4018       tree real_cbase = cbase;
4019
4020       /* Check to see if any adjustment is needed.  */
4021       if (cstepi == 0 && stmt_is_after_inc)
4022         {
4023           aff_tree real_cbase_aff;
4024           aff_tree cstep_aff;
4025
4026           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4027                                    &real_cbase_aff);
4028           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4029
4030           aff_combination_add (&real_cbase_aff, &cstep_aff);
4031           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4032         }
4033
4034       cost = difference_cost (data,
4035                               ubase, real_cbase,
4036                               &symbol_present, &var_present, &offset,
4037                               depends_on);
4038       cost.cost /= avg_loop_niter (data->current_loop);
4039     }
4040   else if (address_p
4041            && !POINTER_TYPE_P (ctype)
4042            && multiplier_allowed_in_address_p
4043                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4044                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4045     {
4046       cbase
4047         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4048       cost = difference_cost (data,
4049                               ubase, cbase,
4050                               &symbol_present, &var_present, &offset,
4051                               depends_on);
4052       cost.cost /= avg_loop_niter (data->current_loop);
4053     }
4054   else
4055     {
4056       cost = force_var_cost (data, cbase, depends_on);
4057       cost = add_costs (cost,
4058                         difference_cost (data,
4059                                          ubase, build_int_cst (utype, 0),
4060                                          &symbol_present, &var_present,
4061                                          &offset, depends_on));
4062       cost.cost /= avg_loop_niter (data->current_loop);
4063       cost.cost += add_cost[data->speed][TYPE_MODE (ctype)];
4064     }
4065
4066   if (inv_expr_id)
4067     {
4068       *inv_expr_id =
4069           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4070       /* Clear depends on.  */
4071       if (*inv_expr_id != -1 && depends_on && *depends_on)
4072         bitmap_clear (*depends_on);
4073     }
4074
4075   /* If we are after the increment, the value of the candidate is higher by
4076      one iteration.  */
4077   if (stmt_is_after_inc)
4078     offset -= ratio * cstepi;
4079
4080   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4081      (symbol/var1/const parts may be omitted).  If we are looking for an
4082      address, find the cost of addressing this.  */
4083   if (address_p)
4084     return add_costs (cost,
4085                       get_address_cost (symbol_present, var_present,
4086                                         offset, ratio, cstepi,
4087                                         TYPE_MODE (TREE_TYPE (utype)),
4088                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4089                                         speed, stmt_is_after_inc,
4090                                         can_autoinc));
4091
4092   /* Otherwise estimate the costs for computing the expression.  */
4093   if (!symbol_present && !var_present && !offset)
4094     {
4095       if (ratio != 1)
4096         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4097       return cost;
4098     }
4099
4100   /* Symbol + offset should be compile-time computable so consider that they
4101       are added once to the variable, if present.  */
4102   if (var_present && (symbol_present || offset))
4103     cost.cost += adjust_setup_cost (data,
4104                                     add_cost[speed][TYPE_MODE (ctype)]);
4105
4106   /* Having offset does not affect runtime cost in case it is added to
4107      symbol, but it increases complexity.  */
4108   if (offset)
4109     cost.complexity++;
4110
4111   cost.cost += add_cost[speed][TYPE_MODE (ctype)];
4112
4113   aratio = ratio > 0 ? ratio : -ratio;
4114   if (aratio != 1)
4115     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4116   return cost;
4117
4118 fallback:
4119   if (can_autoinc)
4120     *can_autoinc = false;
4121
4122   {
4123     /* Just get the expression, expand it and measure the cost.  */
4124     tree comp = get_computation_at (data->current_loop, use, cand, at);
4125
4126     if (!comp)
4127       return infinite_cost;
4128
4129     if (address_p)
4130       comp = build_simple_mem_ref (comp);
4131
4132     return new_cost (computation_cost (comp, speed), 0);
4133   }
4134 }
4135
4136 /* Determines the cost of the computation by that USE is expressed
4137    from induction variable CAND.  If ADDRESS_P is true, we just need
4138    to create an address from it, otherwise we want to get it into
4139    register.  A set of invariants we depend on is stored in
4140    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4141    autoinc addressing is likely.  */
4142
4143 static comp_cost
4144 get_computation_cost (struct ivopts_data *data,
4145                       struct iv_use *use, struct iv_cand *cand,
4146                       bool address_p, bitmap *depends_on,
4147                       bool *can_autoinc, int *inv_expr_id)
4148 {
4149   return get_computation_cost_at (data,
4150                                   use, cand, address_p, depends_on, use->stmt,
4151                                   can_autoinc, inv_expr_id);
4152 }
4153
4154 /* Determines cost of basing replacement of USE on CAND in a generic
4155    expression.  */
4156
4157 static bool
4158 determine_use_iv_cost_generic (struct ivopts_data *data,
4159                                struct iv_use *use, struct iv_cand *cand)
4160 {
4161   bitmap depends_on;
4162   comp_cost cost;
4163   int inv_expr_id = -1;
4164
4165   /* The simple case first -- if we need to express value of the preserved
4166      original biv, the cost is 0.  This also prevents us from counting the
4167      cost of increment twice -- once at this use and once in the cost of
4168      the candidate.  */
4169   if (cand->pos == IP_ORIGINAL
4170       && cand->incremented_at == use->stmt)
4171     {
4172       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4173                        ERROR_MARK, -1);
4174       return true;
4175     }
4176
4177   cost = get_computation_cost (data, use, cand, false, &depends_on,
4178                                NULL, &inv_expr_id);
4179
4180   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4181                    inv_expr_id);
4182
4183   return !infinite_cost_p (cost);
4184 }
4185
4186 /* Determines cost of basing replacement of USE on CAND in an address.  */
4187
4188 static bool
4189 determine_use_iv_cost_address (struct ivopts_data *data,
4190                                struct iv_use *use, struct iv_cand *cand)
4191 {
4192   bitmap depends_on;
4193   bool can_autoinc;
4194   int inv_expr_id = -1;
4195   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4196                                          &can_autoinc, &inv_expr_id);
4197
4198   if (cand->ainc_use == use)
4199     {
4200       if (can_autoinc)
4201         cost.cost -= cand->cost_step;
4202       /* If we generated the candidate solely for exploiting autoincrement
4203          opportunities, and it turns out it can't be used, set the cost to
4204          infinity to make sure we ignore it.  */
4205       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4206         cost = infinite_cost;
4207     }
4208   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4209                    inv_expr_id);
4210
4211   return !infinite_cost_p (cost);
4212 }
4213
4214 /* Computes value of candidate CAND at position AT in iteration NITER, and
4215    stores it to VAL.  */
4216
4217 static void
4218 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4219                aff_tree *val)
4220 {
4221   aff_tree step, delta, nit;
4222   struct iv *iv = cand->iv;
4223   tree type = TREE_TYPE (iv->base);
4224   tree steptype = type;
4225   if (POINTER_TYPE_P (type))
4226     steptype = sizetype;
4227
4228   tree_to_aff_combination (iv->step, steptype, &step);
4229   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4230   aff_combination_convert (&nit, steptype);
4231   aff_combination_mult (&nit, &step, &delta);
4232   if (stmt_after_increment (loop, cand, at))
4233     aff_combination_add (&delta, &step);
4234
4235   tree_to_aff_combination (iv->base, type, val);
4236   aff_combination_add (val, &delta);
4237 }
4238
4239 /* Returns period of induction variable iv.  */
4240
4241 static tree
4242 iv_period (struct iv *iv)
4243 {
4244   tree step = iv->step, period, type;
4245   tree pow2div;
4246
4247   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4248
4249   type = unsigned_type_for (TREE_TYPE (step));
4250   /* Period of the iv is lcm (step, type_range)/step -1,
4251      i.e., N*type_range/step - 1. Since type range is power
4252      of two, N == (step >> num_of_ending_zeros_binary (step),
4253      so the final result is
4254
4255        (type_range >> num_of_ending_zeros_binary (step)) - 1
4256
4257   */
4258   pow2div = num_ending_zeros (step);
4259
4260   period = build_low_bits_mask (type,
4261                                 (TYPE_PRECISION (type)
4262                                  - tree_low_cst (pow2div, 1)));
4263
4264   return period;
4265 }
4266
4267 /* Returns the comparison operator used when eliminating the iv USE.  */
4268
4269 static enum tree_code
4270 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4271 {
4272   struct loop *loop = data->current_loop;
4273   basic_block ex_bb;
4274   edge exit;
4275
4276   ex_bb = gimple_bb (use->stmt);
4277   exit = EDGE_SUCC (ex_bb, 0);
4278   if (flow_bb_inside_loop_p (loop, exit->dest))
4279     exit = EDGE_SUCC (ex_bb, 1);
4280
4281   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4282 }
4283
4284 static tree
4285 strip_wrap_conserving_type_conversions (tree exp)
4286 {
4287   while (tree_ssa_useless_type_conversion (exp)
4288          && (nowrap_type_p (TREE_TYPE (exp))
4289              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4290     exp = TREE_OPERAND (exp, 0);
4291   return exp;
4292 }
4293
4294 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4295    check for an exact match.  */
4296
4297 static bool
4298 expr_equal_p (tree e, tree what)
4299 {
4300   gimple stmt;
4301   enum tree_code code;
4302
4303   e = strip_wrap_conserving_type_conversions (e);
4304   what = strip_wrap_conserving_type_conversions (what);
4305
4306   code = TREE_CODE (what);
4307   if (TREE_TYPE (e) != TREE_TYPE (what))
4308     return false;
4309
4310   if (operand_equal_p (e, what, 0))
4311     return true;
4312
4313   if (TREE_CODE (e) != SSA_NAME)
4314     return false;
4315
4316   stmt = SSA_NAME_DEF_STMT (e);
4317   if (gimple_code (stmt) != GIMPLE_ASSIGN
4318       || gimple_assign_rhs_code (stmt) != code)
4319     return false;
4320
4321   switch (get_gimple_rhs_class (code))
4322     {
4323     case GIMPLE_BINARY_RHS:
4324       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4325         return false;
4326       /* Fallthru.  */
4327
4328     case GIMPLE_UNARY_RHS:
4329     case GIMPLE_SINGLE_RHS:
4330       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4331     default:
4332       return false;
4333     }
4334 }
4335
4336 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4337    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4338    calculation is performed in non-wrapping type.
4339
4340    TODO: More generally, we could test for the situation that
4341          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4342          This would require knowing the sign of OFFSET.
4343
4344          Also, we only look for the first addition in the computation of BASE.
4345          More complex analysis would be better, but introducing it just for
4346          this optimization seems like an overkill.  */
4347
4348 static bool
4349 difference_cannot_overflow_p (tree base, tree offset)
4350 {
4351   enum tree_code code;
4352   tree e1, e2;
4353
4354   if (!nowrap_type_p (TREE_TYPE (base)))
4355     return false;
4356
4357   base = expand_simple_operations (base);
4358
4359   if (TREE_CODE (base) == SSA_NAME)
4360     {
4361       gimple stmt = SSA_NAME_DEF_STMT (base);
4362
4363       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4364         return false;
4365
4366       code = gimple_assign_rhs_code (stmt);
4367       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4368         return false;
4369
4370       e1 = gimple_assign_rhs1 (stmt);
4371       e2 = gimple_assign_rhs2 (stmt);
4372     }
4373   else
4374     {
4375       code = TREE_CODE (base);
4376       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4377         return false;
4378       e1 = TREE_OPERAND (base, 0);
4379       e2 = TREE_OPERAND (base, 1);
4380     }
4381
4382   /* TODO: deeper inspection may be necessary to prove the equality.  */
4383   switch (code)
4384     {
4385     case PLUS_EXPR:
4386       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4387     case POINTER_PLUS_EXPR:
4388       return expr_equal_p (e2, offset);
4389
4390     default:
4391       return false;
4392     }
4393 }
4394
4395 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4396    comparison with CAND.  NITER describes the number of iterations of
4397    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4398
4399    We aim to handle the following situation:
4400
4401    sometype *base, *p;
4402    int a, b, i;
4403
4404    i = a;
4405    p = p_0 = base + a;
4406
4407    do
4408      {
4409        bla (*p);
4410        p++;
4411        i++;
4412      }
4413    while (i < b);
4414
4415    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4416    We aim to optimize this to
4417
4418    p = p_0 = base + a;
4419    do
4420      {
4421        bla (*p);
4422        p++;
4423      }
4424    while (p < p_0 - a + b);
4425
4426    This preserves the correctness, since the pointer arithmetics does not
4427    overflow.  More precisely:
4428
4429    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4430       overflow in computing it or the values of p.
4431    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4432       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4433
4434 static bool
4435 iv_elimination_compare_lt (struct ivopts_data *data,
4436                            struct iv_cand *cand, enum tree_code *comp_p,
4437                            struct tree_niter_desc *niter)
4438 {
4439   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4440   struct affine_tree_combination nit, tmpa, tmpb;
4441   enum tree_code comp;
4442   HOST_WIDE_INT step;
4443
4444   /* We need to know that the candidate induction variable does not overflow.
4445      While more complex analysis may be used to prove this, for now just
4446      check that the variable appears in the original program and that it
4447      is computed in a type that guarantees no overflows.  */
4448   cand_type = TREE_TYPE (cand->iv->base);
4449   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4450     return false;
4451
4452   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4453      the calculation of the BOUND could overflow, making the comparison
4454      invalid.  */
4455   if (!data->loop_single_exit_p)
4456     return false;
4457
4458   /* We need to be able to decide whether candidate is increasing or decreasing
4459      in order to choose the right comparison operator.  */
4460   if (!cst_and_fits_in_hwi (cand->iv->step))
4461     return false;
4462   step = int_cst_value (cand->iv->step);
4463
4464   /* Check that the number of iterations matches the expected pattern:
4465      a + 1 > b ? 0 : b - a - 1.  */
4466   mbz = niter->may_be_zero;
4467   if (TREE_CODE (mbz) == GT_EXPR)
4468     {
4469       /* Handle a + 1 > b.  */
4470       tree op0 = TREE_OPERAND (mbz, 0);
4471       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4472         {
4473           a = TREE_OPERAND (op0, 0);
4474           b = TREE_OPERAND (mbz, 1);
4475         }
4476       else
4477         return false;
4478     }
4479   else if (TREE_CODE (mbz) == LT_EXPR)
4480     {
4481       tree op1 = TREE_OPERAND (mbz, 1);
4482
4483       /* Handle b < a + 1.  */
4484       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4485         {
4486           a = TREE_OPERAND (op1, 0);
4487           b = TREE_OPERAND (mbz, 0);
4488         }
4489       else
4490         return false;
4491     }
4492   else
4493     return false;
4494
4495   /* Expected number of iterations is B - A - 1.  Check that it matches
4496      the actual number, i.e., that B - A - NITER = 1.  */
4497   tree_to_aff_combination (niter->niter, nit_type, &nit);
4498   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4499   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4500   aff_combination_scale (&nit, double_int_minus_one);
4501   aff_combination_scale (&tmpa, double_int_minus_one);
4502   aff_combination_add (&tmpb, &tmpa);
4503   aff_combination_add (&tmpb, &nit);
4504   if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4505     return false;
4506
4507   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4508      overflow.  */
4509   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4510                         cand->iv->step,
4511                         fold_convert (TREE_TYPE (cand->iv->step), a));
4512   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4513     return false;
4514
4515   /* Determine the new comparison operator.  */
4516   comp = step < 0 ? GT_EXPR : LT_EXPR;
4517   if (*comp_p == NE_EXPR)
4518     *comp_p = comp;
4519   else if (*comp_p == EQ_EXPR)
4520     *comp_p = invert_tree_comparison (comp, false);
4521   else
4522     gcc_unreachable ();
4523
4524   return true;
4525 }
4526
4527 /* Check whether it is possible to express the condition in USE by comparison
4528    of candidate CAND.  If so, store the value compared with to BOUND, and the
4529    comparison operator to COMP.  */
4530
4531 static bool
4532 may_eliminate_iv (struct ivopts_data *data,
4533                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4534                   enum tree_code *comp)
4535 {
4536   basic_block ex_bb;
4537   edge exit;
4538   tree period;
4539   struct loop *loop = data->current_loop;
4540   aff_tree bnd;
4541   struct tree_niter_desc *desc = NULL;
4542
4543   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4544     return false;
4545
4546   /* For now works only for exits that dominate the loop latch.
4547      TODO: extend to other conditions inside loop body.  */
4548   ex_bb = gimple_bb (use->stmt);
4549   if (use->stmt != last_stmt (ex_bb)
4550       || gimple_code (use->stmt) != GIMPLE_COND
4551       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4552     return false;
4553
4554   exit = EDGE_SUCC (ex_bb, 0);
4555   if (flow_bb_inside_loop_p (loop, exit->dest))
4556     exit = EDGE_SUCC (ex_bb, 1);
4557   if (flow_bb_inside_loop_p (loop, exit->dest))
4558     return false;
4559
4560   desc = niter_for_exit (data, exit);
4561   if (!desc)
4562     return false;
4563
4564   /* Determine whether we can use the variable to test the exit condition.
4565      This is the case iff the period of the induction variable is greater
4566      than the number of iterations for which the exit condition is true.  */
4567   period = iv_period (cand->iv);
4568
4569   /* If the number of iterations is constant, compare against it directly.  */
4570   if (TREE_CODE (desc->niter) == INTEGER_CST)
4571     {
4572       /* See cand_value_at.  */
4573       if (stmt_after_increment (loop, cand, use->stmt))
4574         {
4575           if (!tree_int_cst_lt (desc->niter, period))
4576             return false;
4577         }
4578       else
4579         {
4580           if (tree_int_cst_lt (period, desc->niter))
4581             return false;
4582         }
4583     }
4584
4585   /* If not, and if this is the only possible exit of the loop, see whether
4586      we can get a conservative estimate on the number of iterations of the
4587      entire loop and compare against that instead.  */
4588   else
4589     {
4590       double_int period_value, max_niter;
4591
4592       max_niter = desc->max;
4593       if (stmt_after_increment (loop, cand, use->stmt))
4594         max_niter = double_int_add (max_niter, double_int_one);
4595       period_value = tree_to_double_int (period);
4596       if (double_int_ucmp (max_niter, period_value) > 0)
4597         {
4598           /* See if we can take advantage of inferred loop bound information.  */
4599           if (data->loop_single_exit_p)
4600             {
4601               if (!max_loop_iterations (loop, &max_niter))
4602                 return false;
4603               /* The loop bound is already adjusted by adding 1.  */
4604               if (double_int_ucmp (max_niter, period_value) > 0)
4605                 return false;
4606             }
4607           else
4608             return false;
4609         }
4610     }
4611
4612   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4613
4614   *bound = aff_combination_to_tree (&bnd);
4615   *comp = iv_elimination_compare (data, use);
4616
4617   /* It is unlikely that computing the number of iterations using division
4618      would be more profitable than keeping the original induction variable.  */
4619   if (expression_expensive_p (*bound))
4620     return false;
4621
4622   /* Sometimes, it is possible to handle the situation that the number of
4623      iterations may be zero unless additional assumtions by using <
4624      instead of != in the exit condition.
4625
4626      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4627            base the exit condition on it.  However, that is often too
4628            expensive.  */
4629   if (!integer_zerop (desc->may_be_zero))
4630     return iv_elimination_compare_lt (data, cand, comp, desc);
4631
4632   return true;
4633 }
4634
4635  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4636     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4637
4638 static int
4639 parm_decl_cost (struct ivopts_data *data, tree bound)
4640 {
4641   tree sbound = bound;
4642   STRIP_NOPS (sbound);
4643
4644   if (TREE_CODE (sbound) == SSA_NAME
4645       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4646       && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4647       && data->body_includes_call)
4648     return COSTS_N_INSNS (1);
4649
4650   return 0;
4651 }
4652
4653 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4654
4655 static bool
4656 determine_use_iv_cost_condition (struct ivopts_data *data,
4657                                  struct iv_use *use, struct iv_cand *cand)
4658 {
4659   tree bound = NULL_TREE;
4660   struct iv *cmp_iv;
4661   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4662   comp_cost elim_cost, express_cost, cost, bound_cost;
4663   bool ok;
4664   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4665   tree *control_var, *bound_cst;
4666   enum tree_code comp = ERROR_MARK;
4667
4668   /* Only consider real candidates.  */
4669   if (!cand->iv)
4670     {
4671       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4672                        ERROR_MARK, -1);
4673       return false;
4674     }
4675
4676   /* Try iv elimination.  */
4677   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4678     {
4679       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4680       if (elim_cost.cost == 0)
4681         elim_cost.cost = parm_decl_cost (data, bound);
4682       else if (TREE_CODE (bound) == INTEGER_CST)
4683         elim_cost.cost = 0;
4684       /* If we replace a loop condition 'i < n' with 'p < base + n',
4685          depends_on_elim will have 'base' and 'n' set, which implies
4686          that both 'base' and 'n' will be live during the loop.  More likely,
4687          'base + n' will be loop invariant, resulting in only one live value
4688          during the loop.  So in that case we clear depends_on_elim and set
4689         elim_inv_expr_id instead.  */
4690       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4691         {
4692           elim_inv_expr_id = get_expr_id (data, bound);
4693           bitmap_clear (depends_on_elim);
4694         }
4695       /* The bound is a loop invariant, so it will be only computed
4696          once.  */
4697       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4698     }
4699   else
4700     elim_cost = infinite_cost;
4701
4702   /* Try expressing the original giv.  If it is compared with an invariant,
4703      note that we cannot get rid of it.  */
4704   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4705                               NULL, &cmp_iv);
4706   gcc_assert (ok);
4707
4708   /* When the condition is a comparison of the candidate IV against
4709      zero, prefer this IV.
4710
4711      TODO: The constant that we're subtracting from the cost should
4712      be target-dependent.  This information should be added to the
4713      target costs for each backend.  */
4714   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4715       && integer_zerop (*bound_cst)
4716       && (operand_equal_p (*control_var, cand->var_after, 0)
4717           || operand_equal_p (*control_var, cand->var_before, 0)))
4718     elim_cost.cost -= 1;
4719
4720   express_cost = get_computation_cost (data, use, cand, false,
4721                                        &depends_on_express, NULL,
4722                                        &express_inv_expr_id);
4723   fd_ivopts_data = data;
4724   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4725
4726   /* Count the cost of the original bound as well.  */
4727   bound_cost = force_var_cost (data, *bound_cst, NULL);
4728   if (bound_cost.cost == 0)
4729     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4730   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4731     bound_cost.cost = 0;
4732   express_cost.cost += bound_cost.cost;
4733
4734   /* Choose the better approach, preferring the eliminated IV. */
4735   if (compare_costs (elim_cost, express_cost) <= 0)
4736     {
4737       cost = elim_cost;
4738       depends_on = depends_on_elim;
4739       depends_on_elim = NULL;
4740       inv_expr_id = elim_inv_expr_id;
4741     }
4742   else
4743     {
4744       cost = express_cost;
4745       depends_on = depends_on_express;
4746       depends_on_express = NULL;
4747       bound = NULL_TREE;
4748       comp = ERROR_MARK;
4749       inv_expr_id = express_inv_expr_id;
4750     }
4751
4752   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4753
4754   if (depends_on_elim)
4755     BITMAP_FREE (depends_on_elim);
4756   if (depends_on_express)
4757     BITMAP_FREE (depends_on_express);
4758
4759   return !infinite_cost_p (cost);
4760 }
4761
4762 /* Determines cost of basing replacement of USE on CAND.  Returns false
4763    if USE cannot be based on CAND.  */
4764
4765 static bool
4766 determine_use_iv_cost (struct ivopts_data *data,
4767                        struct iv_use *use, struct iv_cand *cand)
4768 {
4769   switch (use->type)
4770     {
4771     case USE_NONLINEAR_EXPR:
4772       return determine_use_iv_cost_generic (data, use, cand);
4773
4774     case USE_ADDRESS:
4775       return determine_use_iv_cost_address (data, use, cand);
4776
4777     case USE_COMPARE:
4778       return determine_use_iv_cost_condition (data, use, cand);
4779
4780     default:
4781       gcc_unreachable ();
4782     }
4783 }
4784
4785 /* Return true if get_computation_cost indicates that autoincrement is
4786    a possibility for the pair of USE and CAND, false otherwise.  */
4787
4788 static bool
4789 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4790                            struct iv_cand *cand)
4791 {
4792   bitmap depends_on;
4793   bool can_autoinc;
4794   comp_cost cost;
4795
4796   if (use->type != USE_ADDRESS)
4797     return false;
4798
4799   cost = get_computation_cost (data, use, cand, true, &depends_on,
4800                                &can_autoinc, NULL);
4801
4802   BITMAP_FREE (depends_on);
4803
4804   return !infinite_cost_p (cost) && can_autoinc;
4805 }
4806
4807 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4808    use that allows autoincrement, and set their AINC_USE if possible.  */
4809
4810 static void
4811 set_autoinc_for_original_candidates (struct ivopts_data *data)
4812 {
4813   unsigned i, j;
4814
4815   for (i = 0; i < n_iv_cands (data); i++)
4816     {
4817       struct iv_cand *cand = iv_cand (data, i);
4818       struct iv_use *closest = NULL;
4819       if (cand->pos != IP_ORIGINAL)
4820         continue;
4821       for (j = 0; j < n_iv_uses (data); j++)
4822         {
4823           struct iv_use *use = iv_use (data, j);
4824           unsigned uid = gimple_uid (use->stmt);
4825           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4826               || uid > gimple_uid (cand->incremented_at))
4827             continue;
4828           if (closest == NULL || uid > gimple_uid (closest->stmt))
4829             closest = use;
4830         }
4831       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4832         continue;
4833       cand->ainc_use = closest;
4834     }
4835 }
4836
4837 /* Finds the candidates for the induction variables.  */
4838
4839 static void
4840 find_iv_candidates (struct ivopts_data *data)
4841 {
4842   /* Add commonly used ivs.  */
4843   add_standard_iv_candidates (data);
4844
4845   /* Add old induction variables.  */
4846   add_old_ivs_candidates (data);
4847
4848   /* Add induction variables derived from uses.  */
4849   add_derived_ivs_candidates (data);
4850
4851   set_autoinc_for_original_candidates (data);
4852
4853   /* Record the important candidates.  */
4854   record_important_candidates (data);
4855 }
4856
4857 /* Determines costs of basing the use of the iv on an iv candidate.  */
4858
4859 static void
4860 determine_use_iv_costs (struct ivopts_data *data)
4861 {
4862   unsigned i, j;
4863   struct iv_use *use;
4864   struct iv_cand *cand;
4865   bitmap to_clear = BITMAP_ALLOC (NULL);
4866
4867   alloc_use_cost_map (data);
4868
4869   for (i = 0; i < n_iv_uses (data); i++)
4870     {
4871       use = iv_use (data, i);
4872
4873       if (data->consider_all_candidates)
4874         {
4875           for (j = 0; j < n_iv_cands (data); j++)
4876             {
4877               cand = iv_cand (data, j);
4878               determine_use_iv_cost (data, use, cand);
4879             }
4880         }
4881       else
4882         {
4883           bitmap_iterator bi;
4884
4885           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4886             {
4887               cand = iv_cand (data, j);
4888               if (!determine_use_iv_cost (data, use, cand))
4889                 bitmap_set_bit (to_clear, j);
4890             }
4891
4892           /* Remove the candidates for that the cost is infinite from
4893              the list of related candidates.  */
4894           bitmap_and_compl_into (use->related_cands, to_clear);
4895           bitmap_clear (to_clear);
4896         }
4897     }
4898
4899   BITMAP_FREE (to_clear);
4900
4901   if (dump_file && (dump_flags & TDF_DETAILS))
4902     {
4903       fprintf (dump_file, "Use-candidate costs:\n");
4904
4905       for (i = 0; i < n_iv_uses (data); i++)
4906         {
4907           use = iv_use (data, i);
4908
4909           fprintf (dump_file, "Use %d:\n", i);
4910           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4911           for (j = 0; j < use->n_map_members; j++)
4912             {
4913               if (!use->cost_map[j].cand
4914                   || infinite_cost_p (use->cost_map[j].cost))
4915                 continue;
4916
4917               fprintf (dump_file, "  %d\t%d\t%d\t",
4918                        use->cost_map[j].cand->id,
4919                        use->cost_map[j].cost.cost,
4920                        use->cost_map[j].cost.complexity);
4921               if (use->cost_map[j].depends_on)
4922                 bitmap_print (dump_file,
4923                               use->cost_map[j].depends_on, "","");
4924               if (use->cost_map[j].inv_expr_id != -1)
4925                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4926               fprintf (dump_file, "\n");
4927             }
4928
4929           fprintf (dump_file, "\n");
4930         }
4931       fprintf (dump_file, "\n");
4932     }
4933 }
4934
4935 /* Determines cost of the candidate CAND.  */
4936
4937 static void
4938 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4939 {
4940   comp_cost cost_base;
4941   unsigned cost, cost_step;
4942   tree base;
4943
4944   if (!cand->iv)
4945     {
4946       cand->cost = 0;
4947       return;
4948     }
4949
4950   /* There are two costs associated with the candidate -- its increment
4951      and its initialization.  The second is almost negligible for any loop
4952      that rolls enough, so we take it just very little into account.  */
4953
4954   base = cand->iv->base;
4955   cost_base = force_var_cost (data, base, NULL);
4956   /* It will be exceptional that the iv register happens to be initialized with
4957      the proper value at no cost.  In general, there will at least be a regcopy
4958      or a const set.  */
4959   if (cost_base.cost == 0)
4960     cost_base.cost = COSTS_N_INSNS (1);
4961   cost_step = add_cost[data->speed][TYPE_MODE (TREE_TYPE (base))];
4962
4963   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4964
4965   /* Prefer the original ivs unless we may gain something by replacing it.
4966      The reason is to make debugging simpler; so this is not relevant for
4967      artificial ivs created by other optimization passes.  */
4968   if (cand->pos != IP_ORIGINAL
4969       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4970     cost++;
4971
4972   /* Prefer not to insert statements into latch unless there are some
4973      already (so that we do not create unnecessary jumps).  */
4974   if (cand->pos == IP_END
4975       && empty_block_p (ip_end_pos (data->current_loop)))
4976     cost++;
4977
4978   cand->cost = cost;
4979   cand->cost_step = cost_step;
4980 }
4981
4982 /* Determines costs of computation of the candidates.  */
4983
4984 static void
4985 determine_iv_costs (struct ivopts_data *data)
4986 {
4987   unsigned i;
4988
4989   if (dump_file && (dump_flags & TDF_DETAILS))
4990     {
4991       fprintf (dump_file, "Candidate costs:\n");
4992       fprintf (dump_file, "  cand\tcost\n");
4993     }
4994
4995   for (i = 0; i < n_iv_cands (data); i++)
4996     {
4997       struct iv_cand *cand = iv_cand (data, i);
4998
4999       determine_iv_cost (data, cand);
5000
5001       if (dump_file && (dump_flags & TDF_DETAILS))
5002         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5003     }
5004
5005   if (dump_file && (dump_flags & TDF_DETAILS))
5006     fprintf (dump_file, "\n");
5007 }
5008
5009 /* Calculates cost for having SIZE induction variables.  */
5010
5011 static unsigned
5012 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5013 {
5014   /* We add size to the cost, so that we prefer eliminating ivs
5015      if possible.  */
5016   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5017                                             data->body_includes_call);
5018 }
5019
5020 /* For each size of the induction variable set determine the penalty.  */
5021
5022 static void
5023 determine_set_costs (struct ivopts_data *data)
5024 {
5025   unsigned j, n;
5026   gimple phi;
5027   gimple_stmt_iterator psi;
5028   tree op;
5029   struct loop *loop = data->current_loop;
5030   bitmap_iterator bi;
5031
5032   if (dump_file && (dump_flags & TDF_DETAILS))
5033     {
5034       fprintf (dump_file, "Global costs:\n");
5035       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5036       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5037       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5038       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5039     }
5040
5041   n = 0;
5042   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5043     {
5044       phi = gsi_stmt (psi);
5045       op = PHI_RESULT (phi);
5046
5047       if (!is_gimple_reg (op))
5048         continue;
5049
5050       if (get_iv (data, op))
5051         continue;
5052
5053       n++;
5054     }
5055
5056   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5057     {
5058       struct version_info *info = ver_info (data, j);
5059
5060       if (info->inv_id && info->has_nonlin_use)
5061         n++;
5062     }
5063
5064   data->regs_used = n;
5065   if (dump_file && (dump_flags & TDF_DETAILS))
5066     fprintf (dump_file, "  regs_used %d\n", n);
5067
5068   if (dump_file && (dump_flags & TDF_DETAILS))
5069     {
5070       fprintf (dump_file, "  cost for size:\n");
5071       fprintf (dump_file, "  ivs\tcost\n");
5072       for (j = 0; j <= 2 * target_avail_regs; j++)
5073         fprintf (dump_file, "  %d\t%d\n", j,
5074                  ivopts_global_cost_for_size (data, j));
5075       fprintf (dump_file, "\n");
5076     }
5077 }
5078
5079 /* Returns true if A is a cheaper cost pair than B.  */
5080
5081 static bool
5082 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5083 {
5084   int cmp;
5085
5086   if (!a)
5087     return false;
5088
5089   if (!b)
5090     return true;
5091
5092   cmp = compare_costs (a->cost, b->cost);
5093   if (cmp < 0)
5094     return true;
5095
5096   if (cmp > 0)
5097     return false;
5098
5099   /* In case the costs are the same, prefer the cheaper candidate.  */
5100   if (a->cand->cost < b->cand->cost)
5101     return true;
5102
5103   return false;
5104 }
5105
5106
5107 /* Returns candidate by that USE is expressed in IVS.  */
5108
5109 static struct cost_pair *
5110 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5111 {
5112   return ivs->cand_for_use[use->id];
5113 }
5114
5115 /* Computes the cost field of IVS structure.  */
5116
5117 static void
5118 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5119 {
5120   comp_cost cost = ivs->cand_use_cost;
5121
5122   cost.cost += ivs->cand_cost;
5123
5124   cost.cost += ivopts_global_cost_for_size (data,
5125                                             ivs->n_regs + ivs->num_used_inv_expr);
5126
5127   ivs->cost = cost;
5128 }
5129
5130 /* Remove invariants in set INVS to set IVS.  */
5131
5132 static void
5133 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5134 {
5135   bitmap_iterator bi;
5136   unsigned iid;
5137
5138   if (!invs)
5139     return;
5140
5141   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5142     {
5143       ivs->n_invariant_uses[iid]--;
5144       if (ivs->n_invariant_uses[iid] == 0)
5145         ivs->n_regs--;
5146     }
5147 }
5148
5149 /* Set USE not to be expressed by any candidate in IVS.  */
5150
5151 static void
5152 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5153                  struct iv_use *use)
5154 {
5155   unsigned uid = use->id, cid;
5156   struct cost_pair *cp;
5157
5158   cp = ivs->cand_for_use[uid];
5159   if (!cp)
5160     return;
5161   cid = cp->cand->id;
5162
5163   ivs->bad_uses++;
5164   ivs->cand_for_use[uid] = NULL;
5165   ivs->n_cand_uses[cid]--;
5166
5167   if (ivs->n_cand_uses[cid] == 0)
5168     {
5169       bitmap_clear_bit (ivs->cands, cid);
5170       /* Do not count the pseudocandidates.  */
5171       if (cp->cand->iv)
5172         ivs->n_regs--;
5173       ivs->n_cands--;
5174       ivs->cand_cost -= cp->cand->cost;
5175
5176       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5177     }
5178
5179   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5180
5181   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5182
5183   if (cp->inv_expr_id != -1)
5184     {
5185       ivs->used_inv_expr[cp->inv_expr_id]--;
5186       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5187         ivs->num_used_inv_expr--;
5188     }
5189   iv_ca_recount_cost (data, ivs);
5190 }
5191
5192 /* Add invariants in set INVS to set IVS.  */
5193
5194 static void
5195 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5196 {
5197   bitmap_iterator bi;
5198   unsigned iid;
5199
5200   if (!invs)
5201     return;
5202
5203   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5204     {
5205       ivs->n_invariant_uses[iid]++;
5206       if (ivs->n_invariant_uses[iid] == 1)
5207         ivs->n_regs++;
5208     }
5209 }
5210
5211 /* Set cost pair for USE in set IVS to CP.  */
5212
5213 static void
5214 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5215               struct iv_use *use, struct cost_pair *cp)
5216 {
5217   unsigned uid = use->id, cid;
5218
5219   if (ivs->cand_for_use[uid] == cp)
5220     return;
5221
5222   if (ivs->cand_for_use[uid])
5223     iv_ca_set_no_cp (data, ivs, use);
5224
5225   if (cp)
5226     {
5227       cid = cp->cand->id;
5228
5229       ivs->bad_uses--;
5230       ivs->cand_for_use[uid] = cp;
5231       ivs->n_cand_uses[cid]++;
5232       if (ivs->n_cand_uses[cid] == 1)
5233         {
5234           bitmap_set_bit (ivs->cands, cid);
5235           /* Do not count the pseudocandidates.  */
5236           if (cp->cand->iv)
5237             ivs->n_regs++;
5238           ivs->n_cands++;
5239           ivs->cand_cost += cp->cand->cost;
5240
5241           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5242         }
5243
5244       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5245       iv_ca_set_add_invariants (ivs, cp->depends_on);
5246
5247       if (cp->inv_expr_id != -1)
5248         {
5249           ivs->used_inv_expr[cp->inv_expr_id]++;
5250           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5251             ivs->num_used_inv_expr++;
5252         }
5253       iv_ca_recount_cost (data, ivs);
5254     }
5255 }
5256
5257 /* Extend set IVS by expressing USE by some of the candidates in it
5258    if possible. All important candidates will be considered
5259    if IMPORTANT_CANDIDATES is true.  */
5260
5261 static void
5262 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5263                struct iv_use *use, bool important_candidates)
5264 {
5265   struct cost_pair *best_cp = NULL, *cp;
5266   bitmap_iterator bi;
5267   bitmap cands;
5268   unsigned i;
5269
5270   gcc_assert (ivs->upto >= use->id);
5271
5272   if (ivs->upto == use->id)
5273     {
5274       ivs->upto++;
5275       ivs->bad_uses++;
5276     }
5277
5278   cands = (important_candidates ? data->important_candidates : ivs->cands);
5279   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5280     {
5281       struct iv_cand *cand = iv_cand (data, i);
5282
5283       cp = get_use_iv_cost (data, use, cand);
5284
5285       if (cheaper_cost_pair (cp, best_cp))
5286         best_cp = cp;
5287     }
5288
5289   iv_ca_set_cp (data, ivs, use, best_cp);
5290 }
5291
5292 /* Get cost for assignment IVS.  */
5293
5294 static comp_cost
5295 iv_ca_cost (struct iv_ca *ivs)
5296 {
5297   /* This was a conditional expression but it triggered a bug in
5298      Sun C 5.5.  */
5299   if (ivs->bad_uses)
5300     return infinite_cost;
5301   else
5302     return ivs->cost;
5303 }
5304
5305 /* Returns true if all dependences of CP are among invariants in IVS.  */
5306
5307 static bool
5308 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5309 {
5310   unsigned i;
5311   bitmap_iterator bi;
5312
5313   if (!cp->depends_on)
5314     return true;
5315
5316   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5317     {
5318       if (ivs->n_invariant_uses[i] == 0)
5319         return false;
5320     }
5321
5322   return true;
5323 }
5324
5325 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5326    it before NEXT_CHANGE.  */
5327
5328 static struct iv_ca_delta *
5329 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5330                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5331 {
5332   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5333
5334   change->use = use;
5335   change->old_cp = old_cp;
5336   change->new_cp = new_cp;
5337   change->next_change = next_change;
5338
5339   return change;
5340 }
5341
5342 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5343    are rewritten.  */
5344
5345 static struct iv_ca_delta *
5346 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5347 {
5348   struct iv_ca_delta *last;
5349
5350   if (!l2)
5351     return l1;
5352
5353   if (!l1)
5354     return l2;
5355
5356   for (last = l1; last->next_change; last = last->next_change)
5357     continue;
5358   last->next_change = l2;
5359
5360   return l1;
5361 }
5362
5363 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5364
5365 static struct iv_ca_delta *
5366 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5367 {
5368   struct iv_ca_delta *act, *next, *prev = NULL;
5369   struct cost_pair *tmp;
5370
5371   for (act = delta; act; act = next)
5372     {
5373       next = act->next_change;
5374       act->next_change = prev;
5375       prev = act;
5376
5377       tmp = act->old_cp;
5378       act->old_cp = act->new_cp;
5379       act->new_cp = tmp;
5380     }
5381
5382   return prev;
5383 }
5384
5385 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5386    reverted instead.  */
5387
5388 static void
5389 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5390                     struct iv_ca_delta *delta, bool forward)
5391 {
5392   struct cost_pair *from, *to;
5393   struct iv_ca_delta *act;
5394
5395   if (!forward)
5396     delta = iv_ca_delta_reverse (delta);
5397
5398   for (act = delta; act; act = act->next_change)
5399     {
5400       from = act->old_cp;
5401       to = act->new_cp;
5402       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5403       iv_ca_set_cp (data, ivs, act->use, to);
5404     }
5405
5406   if (!forward)
5407     iv_ca_delta_reverse (delta);
5408 }
5409
5410 /* Returns true if CAND is used in IVS.  */
5411
5412 static bool
5413 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5414 {
5415   return ivs->n_cand_uses[cand->id] > 0;
5416 }
5417
5418 /* Returns number of induction variable candidates in the set IVS.  */
5419
5420 static unsigned
5421 iv_ca_n_cands (struct iv_ca *ivs)
5422 {
5423   return ivs->n_cands;
5424 }
5425
5426 /* Free the list of changes DELTA.  */
5427
5428 static void
5429 iv_ca_delta_free (struct iv_ca_delta **delta)
5430 {
5431   struct iv_ca_delta *act, *next;
5432
5433   for (act = *delta; act; act = next)
5434     {
5435       next = act->next_change;
5436       free (act);
5437     }
5438
5439   *delta = NULL;
5440 }
5441
5442 /* Allocates new iv candidates assignment.  */
5443
5444 static struct iv_ca *
5445 iv_ca_new (struct ivopts_data *data)
5446 {
5447   struct iv_ca *nw = XNEW (struct iv_ca);
5448
5449   nw->upto = 0;
5450   nw->bad_uses = 0;
5451   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5452   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5453   nw->cands = BITMAP_ALLOC (NULL);
5454   nw->n_cands = 0;
5455   nw->n_regs = 0;
5456   nw->cand_use_cost = no_cost;
5457   nw->cand_cost = 0;
5458   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5459   nw->cost = no_cost;
5460   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5461   nw->num_used_inv_expr = 0;
5462
5463   return nw;
5464 }
5465
5466 /* Free memory occupied by the set IVS.  */
5467
5468 static void
5469 iv_ca_free (struct iv_ca **ivs)
5470 {
5471   free ((*ivs)->cand_for_use);
5472   free ((*ivs)->n_cand_uses);
5473   BITMAP_FREE ((*ivs)->cands);
5474   free ((*ivs)->n_invariant_uses);
5475   free ((*ivs)->used_inv_expr);
5476   free (*ivs);
5477   *ivs = NULL;
5478 }
5479
5480 /* Dumps IVS to FILE.  */
5481
5482 static void
5483 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5484 {
5485   const char *pref = "  invariants ";
5486   unsigned i;
5487   comp_cost cost = iv_ca_cost (ivs);
5488
5489   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5490   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5491            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5492   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5493
5494    for (i = 0; i < ivs->upto; i++)
5495     {
5496       struct iv_use *use = iv_use (data, i);
5497       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5498       if (cp)
5499         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5500                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5501       else
5502         fprintf (file, "   use:%d --> ??\n", use->id);
5503     }
5504
5505   for (i = 1; i <= data->max_inv_id; i++)
5506     if (ivs->n_invariant_uses[i])
5507       {
5508         fprintf (file, "%s%d", pref, i);
5509         pref = ", ";
5510       }
5511   fprintf (file, "\n\n");
5512 }
5513
5514 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5515    new set, and store differences in DELTA.  Number of induction variables
5516    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5517    the function will try to find a solution with mimimal iv candidates.  */
5518
5519 static comp_cost
5520 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5521               struct iv_cand *cand, struct iv_ca_delta **delta,
5522               unsigned *n_ivs, bool min_ncand)
5523 {
5524   unsigned i;
5525   comp_cost cost;
5526   struct iv_use *use;
5527   struct cost_pair *old_cp, *new_cp;
5528
5529   *delta = NULL;
5530   for (i = 0; i < ivs->upto; i++)
5531     {
5532       use = iv_use (data, i);
5533       old_cp = iv_ca_cand_for_use (ivs, use);
5534
5535       if (old_cp
5536           && old_cp->cand == cand)
5537         continue;
5538
5539       new_cp = get_use_iv_cost (data, use, cand);
5540       if (!new_cp)
5541         continue;
5542
5543       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5544         continue;
5545
5546       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5547         continue;
5548
5549       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5550     }
5551
5552   iv_ca_delta_commit (data, ivs, *delta, true);
5553   cost = iv_ca_cost (ivs);
5554   if (n_ivs)
5555     *n_ivs = iv_ca_n_cands (ivs);
5556   iv_ca_delta_commit (data, ivs, *delta, false);
5557
5558   return cost;
5559 }
5560
5561 /* Try narrowing set IVS by removing CAND.  Return the cost of
5562    the new set and store the differences in DELTA.  */
5563
5564 static comp_cost
5565 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5566               struct iv_cand *cand, struct iv_ca_delta **delta)
5567 {
5568   unsigned i, ci;
5569   struct iv_use *use;
5570   struct cost_pair *old_cp, *new_cp, *cp;
5571   bitmap_iterator bi;
5572   struct iv_cand *cnd;
5573   comp_cost cost;
5574
5575   *delta = NULL;
5576   for (i = 0; i < n_iv_uses (data); i++)
5577     {
5578       use = iv_use (data, i);
5579
5580       old_cp = iv_ca_cand_for_use (ivs, use);
5581       if (old_cp->cand != cand)
5582         continue;
5583
5584       new_cp = NULL;
5585
5586       if (data->consider_all_candidates)
5587         {
5588           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5589             {
5590               if (ci == cand->id)
5591                 continue;
5592
5593               cnd = iv_cand (data, ci);
5594
5595               cp = get_use_iv_cost (data, use, cnd);
5596               if (!cp)
5597                 continue;
5598
5599               if (!iv_ca_has_deps (ivs, cp))
5600                 continue;
5601
5602               if (!cheaper_cost_pair (cp, new_cp))
5603                 continue;
5604
5605               new_cp = cp;
5606             }
5607         }
5608       else
5609         {
5610           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5611             {
5612               if (ci == cand->id)
5613                 continue;
5614
5615               cnd = iv_cand (data, ci);
5616
5617               cp = get_use_iv_cost (data, use, cnd);
5618               if (!cp)
5619                 continue;
5620               if (!iv_ca_has_deps (ivs, cp))
5621                 continue;
5622
5623               if (!cheaper_cost_pair (cp, new_cp))
5624                 continue;
5625
5626               new_cp = cp;
5627             }
5628         }
5629
5630       if (!new_cp)
5631         {
5632           iv_ca_delta_free (delta);
5633           return infinite_cost;
5634         }
5635
5636       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5637     }
5638
5639   iv_ca_delta_commit (data, ivs, *delta, true);
5640   cost = iv_ca_cost (ivs);
5641   iv_ca_delta_commit (data, ivs, *delta, false);
5642
5643   return cost;
5644 }
5645
5646 /* Try optimizing the set of candidates IVS by removing candidates different
5647    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5648    differences in DELTA.  */
5649
5650 static comp_cost
5651 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5652              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5653 {
5654   bitmap_iterator bi;
5655   struct iv_ca_delta *act_delta, *best_delta;
5656   unsigned i;
5657   comp_cost best_cost, acost;
5658   struct iv_cand *cand;
5659
5660   best_delta = NULL;
5661   best_cost = iv_ca_cost (ivs);
5662
5663   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5664     {
5665       cand = iv_cand (data, i);
5666
5667       if (cand == except_cand)
5668         continue;
5669
5670       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5671
5672       if (compare_costs (acost, best_cost) < 0)
5673         {
5674           best_cost = acost;
5675           iv_ca_delta_free (&best_delta);
5676           best_delta = act_delta;
5677         }
5678       else
5679         iv_ca_delta_free (&act_delta);
5680     }
5681
5682   if (!best_delta)
5683     {
5684       *delta = NULL;
5685       return best_cost;
5686     }
5687
5688   /* Recurse to possibly remove other unnecessary ivs.  */
5689   iv_ca_delta_commit (data, ivs, best_delta, true);
5690   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5691   iv_ca_delta_commit (data, ivs, best_delta, false);
5692   *delta = iv_ca_delta_join (best_delta, *delta);
5693   return best_cost;
5694 }
5695
5696 /* Tries to extend the sets IVS in the best possible way in order
5697    to express the USE.  If ORIGINALP is true, prefer candidates from
5698    the original set of IVs, otherwise favor important candidates not
5699    based on any memory object.  */
5700
5701 static bool
5702 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5703                   struct iv_use *use, bool originalp)
5704 {
5705   comp_cost best_cost, act_cost;
5706   unsigned i;
5707   bitmap_iterator bi;
5708   struct iv_cand *cand;
5709   struct iv_ca_delta *best_delta = NULL, *act_delta;
5710   struct cost_pair *cp;
5711
5712   iv_ca_add_use (data, ivs, use, false);
5713   best_cost = iv_ca_cost (ivs);
5714
5715   cp = iv_ca_cand_for_use (ivs, use);
5716   if (!cp)
5717     {
5718       ivs->upto--;
5719       ivs->bad_uses--;
5720       iv_ca_add_use (data, ivs, use, true);
5721       best_cost = iv_ca_cost (ivs);
5722       cp = iv_ca_cand_for_use (ivs, use);
5723     }
5724   if (cp)
5725     {
5726       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5727       iv_ca_set_no_cp (data, ivs, use);
5728     }
5729
5730   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5731      first try important candidates not based on any memory object.  Only if
5732      this fails, try the specific ones.  Rationale -- in loops with many
5733      variables the best choice often is to use just one generic biv.  If we
5734      added here many ivs specific to the uses, the optimization algorithm later
5735      would be likely to get stuck in a local minimum, thus causing us to create
5736      too many ivs.  The approach from few ivs to more seems more likely to be
5737      successful -- starting from few ivs, replacing an expensive use by a
5738      specific iv should always be a win.  */
5739   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5740     {
5741       cand = iv_cand (data, i);
5742
5743       if (originalp && cand->pos !=IP_ORIGINAL)
5744         continue;
5745
5746       if (!originalp && cand->iv->base_object != NULL_TREE)
5747         continue;
5748
5749       if (iv_ca_cand_used_p (ivs, cand))
5750         continue;
5751
5752       cp = get_use_iv_cost (data, use, cand);
5753       if (!cp)
5754         continue;
5755
5756       iv_ca_set_cp (data, ivs, use, cp);
5757       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5758                                true);
5759       iv_ca_set_no_cp (data, ivs, use);
5760       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5761
5762       if (compare_costs (act_cost, best_cost) < 0)
5763         {
5764           best_cost = act_cost;
5765
5766           iv_ca_delta_free (&best_delta);
5767           best_delta = act_delta;
5768         }
5769       else
5770         iv_ca_delta_free (&act_delta);
5771     }
5772
5773   if (infinite_cost_p (best_cost))
5774     {
5775       for (i = 0; i < use->n_map_members; i++)
5776         {
5777           cp = use->cost_map + i;
5778           cand = cp->cand;
5779           if (!cand)
5780             continue;
5781
5782           /* Already tried this.  */
5783           if (cand->important)
5784             {
5785               if (originalp && cand->pos == IP_ORIGINAL)
5786                 continue;
5787               if (!originalp && cand->iv->base_object == NULL_TREE)
5788                 continue;
5789             }
5790
5791           if (iv_ca_cand_used_p (ivs, cand))
5792             continue;
5793
5794           act_delta = NULL;
5795           iv_ca_set_cp (data, ivs, use, cp);
5796           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5797           iv_ca_set_no_cp (data, ivs, use);
5798           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5799                                        cp, act_delta);
5800
5801           if (compare_costs (act_cost, best_cost) < 0)
5802             {
5803               best_cost = act_cost;
5804
5805               if (best_delta)
5806                 iv_ca_delta_free (&best_delta);
5807               best_delta = act_delta;
5808             }
5809           else
5810             iv_ca_delta_free (&act_delta);
5811         }
5812     }
5813
5814   iv_ca_delta_commit (data, ivs, best_delta, true);
5815   iv_ca_delta_free (&best_delta);
5816
5817   return !infinite_cost_p (best_cost);
5818 }
5819
5820 /* Finds an initial assignment of candidates to uses.  */
5821
5822 static struct iv_ca *
5823 get_initial_solution (struct ivopts_data *data, bool originalp)
5824 {
5825   struct iv_ca *ivs = iv_ca_new (data);
5826   unsigned i;
5827
5828   for (i = 0; i < n_iv_uses (data); i++)
5829     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5830       {
5831         iv_ca_free (&ivs);
5832         return NULL;
5833       }
5834
5835   return ivs;
5836 }
5837
5838 /* Tries to improve set of induction variables IVS.  */
5839
5840 static bool
5841 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5842 {
5843   unsigned i, n_ivs;
5844   comp_cost acost, best_cost = iv_ca_cost (ivs);
5845   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5846   struct iv_cand *cand;
5847
5848   /* Try extending the set of induction variables by one.  */
5849   for (i = 0; i < n_iv_cands (data); i++)
5850     {
5851       cand = iv_cand (data, i);
5852
5853       if (iv_ca_cand_used_p (ivs, cand))
5854         continue;
5855
5856       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5857       if (!act_delta)
5858         continue;
5859
5860       /* If we successfully added the candidate and the set is small enough,
5861          try optimizing it by removing other candidates.  */
5862       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5863         {
5864           iv_ca_delta_commit (data, ivs, act_delta, true);
5865           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5866           iv_ca_delta_commit (data, ivs, act_delta, false);
5867           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5868         }
5869
5870       if (compare_costs (acost, best_cost) < 0)
5871         {
5872           best_cost = acost;
5873           iv_ca_delta_free (&best_delta);
5874           best_delta = act_delta;
5875         }
5876       else
5877         iv_ca_delta_free (&act_delta);
5878     }
5879
5880   if (!best_delta)
5881     {
5882       /* Try removing the candidates from the set instead.  */
5883       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5884
5885       /* Nothing more we can do.  */
5886       if (!best_delta)
5887         return false;
5888     }
5889
5890   iv_ca_delta_commit (data, ivs, best_delta, true);
5891   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5892   iv_ca_delta_free (&best_delta);
5893   return true;
5894 }
5895
5896 /* Attempts to find the optimal set of induction variables.  We do simple
5897    greedy heuristic -- we try to replace at most one candidate in the selected
5898    solution and remove the unused ivs while this improves the cost.  */
5899
5900 static struct iv_ca *
5901 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5902 {
5903   struct iv_ca *set;
5904
5905   /* Get the initial solution.  */
5906   set = get_initial_solution (data, originalp);
5907   if (!set)
5908     {
5909       if (dump_file && (dump_flags & TDF_DETAILS))
5910         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5911       return NULL;
5912     }
5913
5914   if (dump_file && (dump_flags & TDF_DETAILS))
5915     {
5916       fprintf (dump_file, "Initial set of candidates:\n");
5917       iv_ca_dump (data, dump_file, set);
5918     }
5919
5920   while (try_improve_iv_set (data, set))
5921     {
5922       if (dump_file && (dump_flags & TDF_DETAILS))
5923         {
5924           fprintf (dump_file, "Improved to:\n");
5925           iv_ca_dump (data, dump_file, set);
5926         }
5927     }
5928
5929   return set;
5930 }
5931
5932 static struct iv_ca *
5933 find_optimal_iv_set (struct ivopts_data *data)
5934 {
5935   unsigned i;
5936   struct iv_ca *set, *origset;
5937   struct iv_use *use;
5938   comp_cost cost, origcost;
5939
5940   /* Determine the cost based on a strategy that starts with original IVs,
5941      and try again using a strategy that prefers candidates not based
5942      on any IVs.  */
5943   origset = find_optimal_iv_set_1 (data, true);
5944   set = find_optimal_iv_set_1 (data, false);
5945
5946   if (!origset && !set)
5947     return NULL;
5948
5949   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5950   cost = set ? iv_ca_cost (set) : infinite_cost;
5951
5952   if (dump_file && (dump_flags & TDF_DETAILS))
5953     {
5954       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5955                origcost.cost, origcost.complexity);
5956       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5957                cost.cost, cost.complexity);
5958     }
5959
5960   /* Choose the one with the best cost.  */
5961   if (compare_costs (origcost, cost) <= 0)
5962     {
5963       if (set)
5964         iv_ca_free (&set);
5965       set = origset;
5966     }
5967   else if (origset)
5968     iv_ca_free (&origset);
5969
5970   for (i = 0; i < n_iv_uses (data); i++)
5971     {
5972       use = iv_use (data, i);
5973       use->selected = iv_ca_cand_for_use (set, use)->cand;
5974     }
5975
5976   return set;
5977 }
5978
5979 /* Creates a new induction variable corresponding to CAND.  */
5980
5981 static void
5982 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5983 {
5984   gimple_stmt_iterator incr_pos;
5985   tree base;
5986   bool after = false;
5987
5988   if (!cand->iv)
5989     return;
5990
5991   switch (cand->pos)
5992     {
5993     case IP_NORMAL:
5994       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5995       break;
5996
5997     case IP_END:
5998       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5999       after = true;
6000       break;
6001
6002     case IP_AFTER_USE:
6003       after = true;
6004       /* fall through */
6005     case IP_BEFORE_USE:
6006       incr_pos = gsi_for_stmt (cand->incremented_at);
6007       break;
6008
6009     case IP_ORIGINAL:
6010       /* Mark that the iv is preserved.  */
6011       name_info (data, cand->var_before)->preserve_biv = true;
6012       name_info (data, cand->var_after)->preserve_biv = true;
6013
6014       /* Rewrite the increment so that it uses var_before directly.  */
6015       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6016       return;
6017     }
6018
6019   gimple_add_tmp_var (cand->var_before);
6020   add_referenced_var (cand->var_before);
6021
6022   base = unshare_expr (cand->iv->base);
6023
6024   create_iv (base, unshare_expr (cand->iv->step),
6025              cand->var_before, data->current_loop,
6026              &incr_pos, after, &cand->var_before, &cand->var_after);
6027 }
6028
6029 /* Creates new induction variables described in SET.  */
6030
6031 static void
6032 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6033 {
6034   unsigned i;
6035   struct iv_cand *cand;
6036   bitmap_iterator bi;
6037
6038   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6039     {
6040       cand = iv_cand (data, i);
6041       create_new_iv (data, cand);
6042     }
6043
6044   if (dump_file && (dump_flags & TDF_DETAILS))
6045     {
6046       fprintf (dump_file, "\nSelected IV set: \n");
6047       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6048         {
6049           cand = iv_cand (data, i);
6050           dump_cand (dump_file, cand);
6051         }
6052       fprintf (dump_file, "\n");
6053     }
6054 }
6055
6056 /* Rewrites USE (definition of iv used in a nonlinear expression)
6057    using candidate CAND.  */
6058
6059 static void
6060 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6061                             struct iv_use *use, struct iv_cand *cand)
6062 {
6063   tree comp;
6064   tree op, tgt;
6065   gimple ass;
6066   gimple_stmt_iterator bsi;
6067
6068   /* An important special case -- if we are asked to express value of
6069      the original iv by itself, just exit; there is no need to
6070      introduce a new computation (that might also need casting the
6071      variable to unsigned and back).  */
6072   if (cand->pos == IP_ORIGINAL
6073       && cand->incremented_at == use->stmt)
6074     {
6075       tree step, ctype, utype;
6076       enum tree_code incr_code = PLUS_EXPR, old_code;
6077
6078       gcc_assert (is_gimple_assign (use->stmt));
6079       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6080
6081       step = cand->iv->step;
6082       ctype = TREE_TYPE (step);
6083       utype = TREE_TYPE (cand->var_after);
6084       if (TREE_CODE (step) == NEGATE_EXPR)
6085         {
6086           incr_code = MINUS_EXPR;
6087           step = TREE_OPERAND (step, 0);
6088         }
6089
6090       /* Check whether we may leave the computation unchanged.
6091          This is the case only if it does not rely on other
6092          computations in the loop -- otherwise, the computation
6093          we rely upon may be removed in remove_unused_ivs,
6094          thus leading to ICE.  */
6095       old_code = gimple_assign_rhs_code (use->stmt);
6096       if (old_code == PLUS_EXPR
6097           || old_code == MINUS_EXPR
6098           || old_code == POINTER_PLUS_EXPR)
6099         {
6100           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6101             op = gimple_assign_rhs2 (use->stmt);
6102           else if (old_code != MINUS_EXPR
6103                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6104             op = gimple_assign_rhs1 (use->stmt);
6105           else
6106             op = NULL_TREE;
6107         }
6108       else
6109         op = NULL_TREE;
6110
6111       if (op
6112           && (TREE_CODE (op) == INTEGER_CST
6113               || operand_equal_p (op, step, 0)))
6114         return;
6115
6116       /* Otherwise, add the necessary computations to express
6117          the iv.  */
6118       op = fold_convert (ctype, cand->var_before);
6119       comp = fold_convert (utype,
6120                            build2 (incr_code, ctype, op,
6121                                    unshare_expr (step)));
6122     }
6123   else
6124     {
6125       comp = get_computation (data->current_loop, use, cand);
6126       gcc_assert (comp != NULL_TREE);
6127     }
6128
6129   switch (gimple_code (use->stmt))
6130     {
6131     case GIMPLE_PHI:
6132       tgt = PHI_RESULT (use->stmt);
6133
6134       /* If we should keep the biv, do not replace it.  */
6135       if (name_info (data, tgt)->preserve_biv)
6136         return;
6137
6138       bsi = gsi_after_labels (gimple_bb (use->stmt));
6139       break;
6140
6141     case GIMPLE_ASSIGN:
6142       tgt = gimple_assign_lhs (use->stmt);
6143       bsi = gsi_for_stmt (use->stmt);
6144       break;
6145
6146     default:
6147       gcc_unreachable ();
6148     }
6149
6150   if (!valid_gimple_rhs_p (comp)
6151       || (gimple_code (use->stmt) != GIMPLE_PHI
6152           /* We can't allow re-allocating the stmt as it might be pointed
6153              to still.  */
6154           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6155               >= gimple_num_ops (gsi_stmt (bsi)))))
6156     {
6157       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6158                                        true, GSI_SAME_STMT);
6159       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6160         {
6161           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6162           /* As this isn't a plain copy we have to reset alignment
6163              information.  */
6164           if (SSA_NAME_PTR_INFO (comp))
6165             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6166         }
6167     }
6168
6169   if (gimple_code (use->stmt) == GIMPLE_PHI)
6170     {
6171       ass = gimple_build_assign (tgt, comp);
6172       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6173
6174       bsi = gsi_for_stmt (use->stmt);
6175       remove_phi_node (&bsi, false);
6176     }
6177   else
6178     {
6179       gimple_assign_set_rhs_from_tree (&bsi, comp);
6180       use->stmt = gsi_stmt (bsi);
6181     }
6182 }
6183
6184 /* Performs a peephole optimization to reorder the iv update statement with
6185    a mem ref to enable instruction combining in later phases. The mem ref uses
6186    the iv value before the update, so the reordering transformation requires
6187    adjustment of the offset. CAND is the selected IV_CAND.
6188
6189    Example:
6190
6191    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6192    iv2 = iv1 + 1;
6193
6194    if (t < val)      (1)
6195      goto L;
6196    goto Head;
6197
6198
6199    directly propagating t over to (1) will introduce overlapping live range
6200    thus increase register pressure. This peephole transform it into:
6201
6202
6203    iv2 = iv1 + 1;
6204    t = MEM_REF (base, iv2, 8, 8);
6205    if (t < val)
6206      goto L;
6207    goto Head;
6208 */
6209
6210 static void
6211 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6212 {
6213   tree var_after;
6214   gimple iv_update, stmt;
6215   basic_block bb;
6216   gimple_stmt_iterator gsi, gsi_iv;
6217
6218   if (cand->pos != IP_NORMAL)
6219     return;
6220
6221   var_after = cand->var_after;
6222   iv_update = SSA_NAME_DEF_STMT (var_after);
6223
6224   bb = gimple_bb (iv_update);
6225   gsi = gsi_last_nondebug_bb (bb);
6226   stmt = gsi_stmt (gsi);
6227
6228   /* Only handle conditional statement for now.  */
6229   if (gimple_code (stmt) != GIMPLE_COND)
6230     return;
6231
6232   gsi_prev_nondebug (&gsi);
6233   stmt = gsi_stmt (gsi);
6234   if (stmt != iv_update)
6235     return;
6236
6237   gsi_prev_nondebug (&gsi);
6238   if (gsi_end_p (gsi))
6239     return;
6240
6241   stmt = gsi_stmt (gsi);
6242   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6243     return;
6244
6245   if (stmt != use->stmt)
6246     return;
6247
6248   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6249     return;
6250
6251   if (dump_file && (dump_flags & TDF_DETAILS))
6252     {
6253       fprintf (dump_file, "Reordering \n");
6254       print_gimple_stmt (dump_file, iv_update, 0, 0);
6255       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6256       fprintf (dump_file, "\n");
6257     }
6258
6259   gsi = gsi_for_stmt (use->stmt);
6260   gsi_iv = gsi_for_stmt (iv_update);
6261   gsi_move_before (&gsi_iv, &gsi);
6262
6263   cand->pos = IP_BEFORE_USE;
6264   cand->incremented_at = use->stmt;
6265 }
6266
6267 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6268
6269 static void
6270 rewrite_use_address (struct ivopts_data *data,
6271                      struct iv_use *use, struct iv_cand *cand)
6272 {
6273   aff_tree aff;
6274   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6275   tree base_hint = NULL_TREE;
6276   tree ref, iv;
6277   bool ok;
6278
6279   adjust_iv_update_pos (cand, use);
6280   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6281   gcc_assert (ok);
6282   unshare_aff_combination (&aff);
6283
6284   /* To avoid undefined overflow problems, all IV candidates use unsigned
6285      integer types.  The drawback is that this makes it impossible for
6286      create_mem_ref to distinguish an IV that is based on a memory object
6287      from one that represents simply an offset.
6288
6289      To work around this problem, we pass a hint to create_mem_ref that
6290      indicates which variable (if any) in aff is an IV based on a memory
6291      object.  Note that we only consider the candidate.  If this is not
6292      based on an object, the base of the reference is in some subexpression
6293      of the use -- but these will use pointer types, so they are recognized
6294      by the create_mem_ref heuristics anyway.  */
6295   if (cand->iv->base_object)
6296     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6297
6298   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6299   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6300                         reference_alias_ptr_type (*use->op_p),
6301                         iv, base_hint, data->speed);
6302   copy_ref_info (ref, *use->op_p);
6303   *use->op_p = ref;
6304 }
6305
6306 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6307    candidate CAND.  */
6308
6309 static void
6310 rewrite_use_compare (struct ivopts_data *data,
6311                      struct iv_use *use, struct iv_cand *cand)
6312 {
6313   tree comp, *var_p, op, bound;
6314   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6315   enum tree_code compare;
6316   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6317   bool ok;
6318
6319   bound = cp->value;
6320   if (bound)
6321     {
6322       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6323       tree var_type = TREE_TYPE (var);
6324       gimple_seq stmts;
6325
6326       if (dump_file && (dump_flags & TDF_DETAILS))
6327         {
6328           fprintf (dump_file, "Replacing exit test: ");
6329           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6330         }
6331       compare = cp->comp;
6332       bound = unshare_expr (fold_convert (var_type, bound));
6333       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6334       if (stmts)
6335         gsi_insert_seq_on_edge_immediate (
6336                 loop_preheader_edge (data->current_loop),
6337                 stmts);
6338
6339       gimple_cond_set_lhs (use->stmt, var);
6340       gimple_cond_set_code (use->stmt, compare);
6341       gimple_cond_set_rhs (use->stmt, op);
6342       return;
6343     }
6344
6345   /* The induction variable elimination failed; just express the original
6346      giv.  */
6347   comp = get_computation (data->current_loop, use, cand);
6348   gcc_assert (comp != NULL_TREE);
6349
6350   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6351   gcc_assert (ok);
6352
6353   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6354                                      true, GSI_SAME_STMT);
6355 }
6356
6357 /* Rewrites USE using candidate CAND.  */
6358
6359 static void
6360 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6361 {
6362   switch (use->type)
6363     {
6364       case USE_NONLINEAR_EXPR:
6365         rewrite_use_nonlinear_expr (data, use, cand);
6366         break;
6367
6368       case USE_ADDRESS:
6369         rewrite_use_address (data, use, cand);
6370         break;
6371
6372       case USE_COMPARE:
6373         rewrite_use_compare (data, use, cand);
6374         break;
6375
6376       default:
6377         gcc_unreachable ();
6378     }
6379
6380   update_stmt (use->stmt);
6381 }
6382
6383 /* Rewrite the uses using the selected induction variables.  */
6384
6385 static void
6386 rewrite_uses (struct ivopts_data *data)
6387 {
6388   unsigned i;
6389   struct iv_cand *cand;
6390   struct iv_use *use;
6391
6392   for (i = 0; i < n_iv_uses (data); i++)
6393     {
6394       use = iv_use (data, i);
6395       cand = use->selected;
6396       gcc_assert (cand);
6397
6398       rewrite_use (data, use, cand);
6399     }
6400 }
6401
6402 /* Removes the ivs that are not used after rewriting.  */
6403
6404 static void
6405 remove_unused_ivs (struct ivopts_data *data)
6406 {
6407   unsigned j;
6408   bitmap_iterator bi;
6409   bitmap toremove = BITMAP_ALLOC (NULL);
6410
6411   /* Figure out an order in which to release SSA DEFs so that we don't
6412      release something that we'd have to propagate into a debug stmt
6413      afterwards.  */
6414   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6415     {
6416       struct version_info *info;
6417
6418       info = ver_info (data, j);
6419       if (info->iv
6420           && !integer_zerop (info->iv->step)
6421           && !info->inv_id
6422           && !info->iv->have_use_for
6423           && !info->preserve_biv)
6424         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6425     }
6426
6427   release_defs_bitset (toremove);
6428
6429   BITMAP_FREE (toremove);
6430 }
6431
6432 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6433    for pointer_map_traverse.  */
6434
6435 static bool
6436 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6437                       void *data ATTRIBUTE_UNUSED)
6438 {
6439   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6440
6441   free (niter);
6442   return true;
6443 }
6444
6445 /* Frees data allocated by the optimization of a single loop.  */
6446
6447 static void
6448 free_loop_data (struct ivopts_data *data)
6449 {
6450   unsigned i, j;
6451   bitmap_iterator bi;
6452   tree obj;
6453
6454   if (data->niters)
6455     {
6456       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6457       pointer_map_destroy (data->niters);
6458       data->niters = NULL;
6459     }
6460
6461   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6462     {
6463       struct version_info *info;
6464
6465       info = ver_info (data, i);
6466       free (info->iv);
6467       info->iv = NULL;
6468       info->has_nonlin_use = false;
6469       info->preserve_biv = false;
6470       info->inv_id = 0;
6471     }
6472   bitmap_clear (data->relevant);
6473   bitmap_clear (data->important_candidates);
6474
6475   for (i = 0; i < n_iv_uses (data); i++)
6476     {
6477       struct iv_use *use = iv_use (data, i);
6478
6479       free (use->iv);
6480       BITMAP_FREE (use->related_cands);
6481       for (j = 0; j < use->n_map_members; j++)
6482         if (use->cost_map[j].depends_on)
6483           BITMAP_FREE (use->cost_map[j].depends_on);
6484       free (use->cost_map);
6485       free (use);
6486     }
6487   VEC_truncate (iv_use_p, data->iv_uses, 0);
6488
6489   for (i = 0; i < n_iv_cands (data); i++)
6490     {
6491       struct iv_cand *cand = iv_cand (data, i);
6492
6493       free (cand->iv);
6494       if (cand->depends_on)
6495         BITMAP_FREE (cand->depends_on);
6496       free (cand);
6497     }
6498   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6499
6500   if (data->version_info_size < num_ssa_names)
6501     {
6502       data->version_info_size = 2 * num_ssa_names;
6503       free (data->version_info);
6504       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6505     }
6506
6507   data->max_inv_id = 0;
6508
6509   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6510     SET_DECL_RTL (obj, NULL_RTX);
6511
6512   VEC_truncate (tree, decl_rtl_to_reset, 0);
6513
6514   htab_empty (data->inv_expr_tab);
6515   data->inv_expr_id = 0;
6516 }
6517
6518 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6519    loop tree.  */
6520
6521 static void
6522 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6523 {
6524   free_loop_data (data);
6525   free (data->version_info);
6526   BITMAP_FREE (data->relevant);
6527   BITMAP_FREE (data->important_candidates);
6528
6529   VEC_free (tree, heap, decl_rtl_to_reset);
6530   VEC_free (iv_use_p, heap, data->iv_uses);
6531   VEC_free (iv_cand_p, heap, data->iv_candidates);
6532   htab_delete (data->inv_expr_tab);
6533 }
6534
6535 /* Returns true if the loop body BODY includes any function calls.  */
6536
6537 static bool
6538 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6539 {
6540   gimple_stmt_iterator gsi;
6541   unsigned i;
6542
6543   for (i = 0; i < num_nodes; i++)
6544     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6545       {
6546         gimple stmt = gsi_stmt (gsi);
6547         if (is_gimple_call (stmt)
6548             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6549           return true;
6550       }
6551   return false;
6552 }
6553
6554 /* Optimizes the LOOP.  Returns true if anything changed.  */
6555
6556 static bool
6557 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6558 {
6559   bool changed = false;
6560   struct iv_ca *iv_ca;
6561   edge exit = single_dom_exit (loop);
6562   basic_block *body;
6563
6564   gcc_assert (!data->niters);
6565   data->current_loop = loop;
6566   data->speed = optimize_loop_for_speed_p (loop);
6567
6568   if (dump_file && (dump_flags & TDF_DETAILS))
6569     {
6570       fprintf (dump_file, "Processing loop %d\n", loop->num);
6571
6572       if (exit)
6573         {
6574           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6575                    exit->src->index, exit->dest->index);
6576           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6577           fprintf (dump_file, "\n");
6578         }
6579
6580       fprintf (dump_file, "\n");
6581     }
6582
6583   body = get_loop_body (loop);
6584   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6585   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6586   free (body);
6587
6588   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6589
6590   /* For each ssa name determines whether it behaves as an induction variable
6591      in some loop.  */
6592   if (!find_induction_variables (data))
6593     goto finish;
6594
6595   /* Finds interesting uses (item 1).  */
6596   find_interesting_uses (data);
6597   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6598     goto finish;
6599
6600   /* Finds candidates for the induction variables (item 2).  */
6601   find_iv_candidates (data);
6602
6603   /* Calculates the costs (item 3, part 1).  */
6604   determine_iv_costs (data);
6605   determine_use_iv_costs (data);
6606   determine_set_costs (data);
6607
6608   /* Find the optimal set of induction variables (item 3, part 2).  */
6609   iv_ca = find_optimal_iv_set (data);
6610   if (!iv_ca)
6611     goto finish;
6612   changed = true;
6613
6614   /* Create the new induction variables (item 4, part 1).  */
6615   create_new_ivs (data, iv_ca);
6616   iv_ca_free (&iv_ca);
6617
6618   /* Rewrite the uses (item 4, part 2).  */
6619   rewrite_uses (data);
6620
6621   /* Remove the ivs that are unused after rewriting.  */
6622   remove_unused_ivs (data);
6623
6624   /* We have changed the structure of induction variables; it might happen
6625      that definitions in the scev database refer to some of them that were
6626      eliminated.  */
6627   scev_reset ();
6628
6629 finish:
6630   free_loop_data (data);
6631
6632   return changed;
6633 }
6634
6635 /* Main entry point.  Optimizes induction variables in loops.  */
6636
6637 void
6638 tree_ssa_iv_optimize (void)
6639 {
6640   struct loop *loop;
6641   struct ivopts_data data;
6642   loop_iterator li;
6643
6644   tree_ssa_iv_optimize_init (&data);
6645
6646   /* Optimize the loops starting with the innermost ones.  */
6647   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6648     {
6649       if (dump_file && (dump_flags & TDF_DETAILS))
6650         flow_loop_dump (loop, dump_file, NULL, 1);
6651
6652       tree_ssa_iv_optimize_loop (&data, loop);
6653     }
6654
6655   tree_ssa_iv_optimize_finalize (&data);
6656 }