gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "tree.h"
  27 #include "basic-block.h"
  28 #include "gimple-pretty-print.h"
  29 #include "tree-inline.h"
  30 #include "tree-flow.h"
  31 #include "gimple.h"
  32 #include "tree-iterator.h"
  33 #include "tree-pass.h"
  34 #include "alloc-pool.h"
  35 #include "vec.h"
  36 #include "langhooks.h"
  37 #include "pointer-set.h"
  38 #include "cfgloop.h"
  39 #include "flags.h"
  40 #include "target.h"
  41 #include "params.h"
  42 #include "diagnostic-core.h"
  43
  44 /*  This is a simple global reassociation pass.  It is, in part, based
  45     on the LLVM pass of the same name (They do some things more/less
  46     than we do, in different orders, etc).
  47
  48     It consists of five steps:
  49
  50     1. Breaking up subtract operations into addition + negate, where
  51     it would promote the reassociation of adds.
  52
  53     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  54     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  55     During linearization, we place the operands of the binary
  56     expressions into a vector of operand_entry_t
  57
  58     3. Optimization of the operand lists, eliminating things like a +
  59     -a, a & a, etc.
  60
  61     3a. Combine repeated factors with the same occurrence counts
  62     into a __builtin_powi call that will later be optimized into
  63     an optimal number of multiplies.
  64
  65     4. Rewrite the expression trees we linearized and optimized so
  66     they are in proper rank order.
  67
  68     5. Repropagate negates, as nothing else will clean it up ATM.
  69
  70     A bit of theory on #4, since nobody seems to write anything down
  71     about why it makes sense to do it the way they do it:
  72
  73     We could do this much nicer theoretically, but don't (for reasons
  74     explained after how to do it theoretically nice :P).
  75
  76     In order to promote the most redundancy elimination, you want
  77     binary expressions whose operands are the same rank (or
  78     preferably, the same value) exposed to the redundancy eliminator,
  79     for possible elimination.
  80
  81     So the way to do this if we really cared, is to build the new op
  82     tree from the leaves to the roots, merging as you go, and putting the
  83     new op on the end of the worklist, until you are left with one
  84     thing on the worklist.
  85
  86     IE if you have to rewrite the following set of operands (listed with
  87     rank in parentheses), with opcode PLUS_EXPR:
  88
  89     a (1),  b (1),  c (1),  d (2), e (2)
  90
  91
  92     We start with our merge worklist empty, and the ops list with all of
  93     those on it.
  94
  95     You want to first merge all leaves of the same rank, as much as
  96     possible.
  97
  98     So first build a binary op of
  99
 100     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 101
 102     Because there is no three operand form of PLUS_EXPR, c is not going to
 103     be exposed to redundancy elimination as a rank 1 operand.
 104
 105     So you might as well throw it on the merge worklist (you could also
 106     consider it to now be a rank two operand, and merge it with d and e,
 107     but in this case, you then have evicted e from a binary op. So at
 108     least in this situation, you can't win.)
 109
 110     Then build a binary op of d + e
 111     mergetmp2 = d + e
 112
 113     and put mergetmp2 on the merge worklist.
 114
 115     so merge worklist = {mergetmp, c, mergetmp2}
 116
 117     Continue building binary ops of these operations until you have only
 118     one operation left on the worklist.
 119
 120     So we have
 121
 122     build binary op
 123     mergetmp3 = mergetmp + c
 124
 125     worklist = {mergetmp2, mergetmp3}
 126
 127     mergetmp4 = mergetmp2 + mergetmp3
 128
 129     worklist = {mergetmp4}
 130
 131     because we have one operation left, we can now just set the original
 132     statement equal to the result of that operation.
 133
 134     This will at least expose a + b  and d + e to redundancy elimination
 135     as binary operations.
 136
 137     For extra points, you can reuse the old statements to build the
 138     mergetmps, since you shouldn't run out.
 139
 140     So why don't we do this?
 141
 142     Because it's expensive, and rarely will help.  Most trees we are
 143     reassociating have 3 or less ops.  If they have 2 ops, they already
 144     will be written into a nice single binary op.  If you have 3 ops, a
 145     single simple check suffices to tell you whether the first two are of the
 146     same rank.  If so, you know to order it
 147
 148     mergetmp = op1 + op2
 149     newstmt = mergetmp + op3
 150
 151     instead of
 152     mergetmp = op2 + op3
 153     newstmt = mergetmp + op1
 154
 155     If all three are of the same rank, you can't expose them all in a
 156     single binary operator anyway, so the above is *still* the best you
 157     can do.
 158
 159     Thus, this is what we do.  When we have three ops left, we check to see
 160     what order to put them in, and call it a day.  As a nod to vector sum
 161     reduction, we check if any of the ops are really a phi node that is a
 162     destructive update for the associating op, and keep the destructive
 163     update together for vector sum reduction recognition.  */
 164
 165
 166 /* Statistics */
 167 static struct
 168 {
 169   int linearized;
 170   int constants_eliminated;
 171   int ops_eliminated;
 172   int rewritten;
 173   int pows_encountered;
 174   int pows_created;
 175 } reassociate_stats;
 176
 177 /* Operator, rank pair.  */
 178 typedef struct operand_entry
 179 {
 180   unsigned int rank;
 181   int id;
 182   tree op;
 183   unsigned int count;
 184 } *operand_entry_t;
 185
 186 static alloc_pool operand_entry_pool;
 187
 188 /* This is used to assign a unique ID to each struct operand_entry
 189    so that qsort results are identical on different hosts.  */
 190 static int next_operand_entry_id;
 191
 192 /* Starting rank number for a given basic block, so that we can rank
 193    operations using unmovable instructions in that BB based on the bb
 194    depth.  */
 195 static long *bb_rank;
 196
 197 /* Operand->rank hashtable.  */
 198 static struct pointer_map_t *operand_rank;
 199
 200 /* Forward decls.  */
 201 static long get_rank (tree);
 202
 203
 204 /* Bias amount for loop-carried phis.  We want this to be larger than
 205    the depth of any reassociation tree we can see, but not larger than
 206    the rank difference between two blocks.  */
 207 #define PHI_LOOP_BIAS (1 << 15)
 208
 209 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 210    an innermost loop, and the phi has only a single use which is inside
 211    the loop, then the rank is the block rank of the loop latch plus an
 212    extra bias for the loop-carried dependence.  This causes expressions
 213    calculated into an accumulator variable to be independent for each
 214    iteration of the loop.  If STMT is some other phi, the rank is the
 215    block rank of its containing block.  */
 216 static long
 217 phi_rank (gimple stmt)
 218 {
 219   basic_block bb = gimple_bb (stmt);
 220   struct loop *father = bb->loop_father;
 221   tree res;
 222   unsigned i;
 223   use_operand_p use;
 224   gimple use_stmt;
 225
 226   /* We only care about real loops (those with a latch).  */
 227   if (!father->latch)
 228     return bb_rank[bb->index];
 229
 230   /* Interesting phis must be in headers of innermost loops.  */
 231   if (bb != father->header
 232       || father->inner)
 233     return bb_rank[bb->index];
 234
 235   /* Ignore virtual SSA_NAMEs.  */
 236   res = gimple_phi_result (stmt);
 237   if (virtual_operand_p (res))
 238     return bb_rank[bb->index];
 239
 240   /* The phi definition must have a single use, and that use must be
 241      within the loop.  Otherwise this isn't an accumulator pattern.  */
 242   if (!single_imm_use (res, &use, &use_stmt)
 243       || gimple_bb (use_stmt)->loop_father != father)
 244     return bb_rank[bb->index];
 245
 246   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 247   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 248     {
 249       tree arg = gimple_phi_arg_def (stmt, i);
 250       if (TREE_CODE (arg) == SSA_NAME
 251           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 252         {
 253           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 254           if (gimple_bb (def_stmt)->loop_father == father)
 255             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 256         }
 257     }
 258
 259   /* Must be an uninteresting phi.  */
 260   return bb_rank[bb->index];
 261 }
 262
 263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 264    loop-carried dependence of an innermost loop, return TRUE; else
 265    return FALSE.  */
 266 static bool
 267 loop_carried_phi (tree exp)
 268 {
 269   gimple phi_stmt;
 270   long block_rank;
 271
 272   if (TREE_CODE (exp) != SSA_NAME
 273       || SSA_NAME_IS_DEFAULT_DEF (exp))
 274     return false;
 275
 276   phi_stmt = SSA_NAME_DEF_STMT (exp);
 277
 278   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 279     return false;
 280
 281   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 282      an additional bias added in.  If this phi doesn't have block rank,
 283      it's biased and should not be propagated.  */
 284   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 285
 286   if (phi_rank (phi_stmt) != block_rank)
 287     return true;
 288
 289   return false;
 290 }
 291
 292 /* Return the maximum of RANK and the rank that should be propagated
 293    from expression OP.  For most operands, this is just the rank of OP.
 294    For loop-carried phis, the value is zero to avoid undoing the bias
 295    in favor of the phi.  */
 296 static long
 297 propagate_rank (long rank, tree op)
 298 {
 299   long op_rank;
 300
 301   if (loop_carried_phi (op))
 302     return rank;
 303
 304   op_rank = get_rank (op);
 305
 306   return MAX (rank, op_rank);
 307 }
 308
 309 /* Look up the operand rank structure for expression E.  */
 310
 311 static inline long
 312 find_operand_rank (tree e)
 313 {
 314   void **slot = pointer_map_contains (operand_rank, e);
 315   return slot ? (long) (intptr_t) *slot : -1;
 316 }
 317
 318 /* Insert {E,RANK} into the operand rank hashtable.  */
 319
 320 static inline void
 321 insert_operand_rank (tree e, long rank)
 322 {
 323   void **slot;
 324   gcc_assert (rank > 0);
 325   slot = pointer_map_insert (operand_rank, e);
 326   gcc_assert (!*slot);
 327   *slot = (void *) (intptr_t) rank;
 328 }
 329
 330 /* Given an expression E, return the rank of the expression.  */
 331
 332 static long
 333 get_rank (tree e)
 334 {
 335   /* Constants have rank 0.  */
 336   if (is_gimple_min_invariant (e))
 337     return 0;
 338
 339   /* SSA_NAME's have the rank of the expression they are the result
 340      of.
 341      For globals and uninitialized values, the rank is 0.
 342      For function arguments, use the pre-setup rank.
 343      For PHI nodes, stores, asm statements, etc, we use the rank of
 344      the BB.
 345      For simple operations, the rank is the maximum rank of any of
 346      its operands, or the bb_rank, whichever is less.
 347      I make no claims that this is optimal, however, it gives good
 348      results.  */
 349
 350   /* We make an exception to the normal ranking system to break
 351      dependences of accumulator variables in loops.  Suppose we
 352      have a simple one-block loop containing:
 353
 354        x_1 = phi(x_0, x_2)
 355        b = a + x_1
 356        c = b + d
 357        x_2 = c + e
 358
 359      As shown, each iteration of the calculation into x is fully
 360      dependent upon the iteration before it.  We would prefer to
 361      see this in the form:
 362
 363        x_1 = phi(x_0, x_2)
 364        b = a + d
 365        c = b + e
 366        x_2 = c + x_1
 367
 368      If the loop is unrolled, the calculations of b and c from
 369      different iterations can be interleaved.
 370
 371      To obtain this result during reassociation, we bias the rank
 372      of the phi definition x_1 upward, when it is recognized as an
 373      accumulator pattern.  The artificial rank causes it to be
 374      added last, providing the desired independence.  */
 375
 376   if (TREE_CODE (e) == SSA_NAME)
 377     {
 378       gimple stmt;
 379       long rank;
 380       int i, n;
 381       tree op;
 382
 383       if (SSA_NAME_IS_DEFAULT_DEF (e))
 384         return find_operand_rank (e);
 385
 386       stmt = SSA_NAME_DEF_STMT (e);
 387       if (gimple_code (stmt) == GIMPLE_PHI)
 388         return phi_rank (stmt);
 389
 390       if (!is_gimple_assign (stmt)
 391           || gimple_vdef (stmt))
 392         return bb_rank[gimple_bb (stmt)->index];
 393
 394       /* If we already have a rank for this expression, use that.  */
 395       rank = find_operand_rank (e);
 396       if (rank != -1)
 397         return rank;
 398
 399       /* Otherwise, find the maximum rank for the operands.  As an
 400          exception, remove the bias from loop-carried phis when propagating
 401          the rank so that dependent operations are not also biased.  */
 402       rank = 0;
 403       if (gimple_assign_single_p (stmt))
 404         {
 405           tree rhs = gimple_assign_rhs1 (stmt);
 406           n = TREE_OPERAND_LENGTH (rhs);
 407           if (n == 0)
 408             rank = propagate_rank (rank, rhs);
 409           else
 410             {
 411               for (i = 0; i < n; i++)
 412                 {
 413                   op = TREE_OPERAND (rhs, i);
 414
 415                   if (op != NULL_TREE)
 416                     rank = propagate_rank (rank, op);
 417                 }
 418             }
 419         }
 420       else
 421         {
 422           n = gimple_num_ops (stmt);
 423           for (i = 1; i < n; i++)
 424             {
 425               op = gimple_op (stmt, i);
 426               gcc_assert (op);
 427               rank = propagate_rank (rank, op);
 428             }
 429         }
 430
 431       if (dump_file && (dump_flags & TDF_DETAILS))
 432         {
 433           fprintf (dump_file, "Rank for ");
 434           print_generic_expr (dump_file, e, 0);
 435           fprintf (dump_file, " is %ld\n", (rank + 1));
 436         }
 437
 438       /* Note the rank in the hashtable so we don't recompute it.  */
 439       insert_operand_rank (e, (rank + 1));
 440       return (rank + 1);
 441     }
 442
 443   /* Globals, etc,  are rank 0 */
 444   return 0;
 445 }
 446
 447
 448 /* We want integer ones to end up last no matter what, since they are
 449    the ones we can do the most with.  */
 450 #define INTEGER_CONST_TYPE 1 << 3
 451 #define FLOAT_CONST_TYPE 1 << 2
 452 #define OTHER_CONST_TYPE 1 << 1
 453
 454 /* Classify an invariant tree into integer, float, or other, so that
 455    we can sort them to be near other constants of the same type.  */
 456 static inline int
 457 constant_type (tree t)
 458 {
 459   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 460     return INTEGER_CONST_TYPE;
 461   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 462     return FLOAT_CONST_TYPE;
 463   else
 464     return OTHER_CONST_TYPE;
 465 }
 466
 467 /* qsort comparison function to sort operand entries PA and PB by rank
 468    so that the sorted array is ordered by rank in decreasing order.  */
 469 static int
 470 sort_by_operand_rank (const void *pa, const void *pb)
 471 {
 472   const operand_entry_t oea = *(const operand_entry_t *)pa;
 473   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 474
 475   /* It's nicer for optimize_expression if constants that are likely
 476      to fold when added/multiplied//whatever are put next to each
 477      other.  Since all constants have rank 0, order them by type.  */
 478   if (oeb->rank == 0 && oea->rank == 0)
 479     {
 480       if (constant_type (oeb->op) != constant_type (oea->op))
 481         return constant_type (oeb->op) - constant_type (oea->op);
 482       else
 483         /* To make sorting result stable, we use unique IDs to determine
 484            order.  */
 485         return oeb->id - oea->id;
 486     }
 487
 488   /* Lastly, make sure the versions that are the same go next to each
 489      other.  We use SSA_NAME_VERSION because it's stable.  */
 490   if ((oeb->rank - oea->rank == 0)
 491       && TREE_CODE (oea->op) == SSA_NAME
 492       && TREE_CODE (oeb->op) == SSA_NAME)
 493     {
 494       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 495         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 496       else
 497         return oeb->id - oea->id;
 498     }
 499
 500   if (oeb->rank != oea->rank)
 501     return oeb->rank - oea->rank;
 502   else
 503     return oeb->id - oea->id;
 504 }
 505
 506 /* Add an operand entry to *OPS for the tree operand OP.  */
 507
 508 static void
 509 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 510 {
 511   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 512
 513   oe->op = op;
 514   oe->rank = get_rank (op);
 515   oe->id = next_operand_entry_id++;
 516   oe->count = 1;
 517   ops->safe_push (oe);
 518 }
 519
 520 /* Add an operand entry to *OPS for the tree operand OP with repeat
 521    count REPEAT.  */
 522
 523 static void
 524 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 525                        HOST_WIDE_INT repeat)
 526 {
 527   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 528
 529   oe->op = op;
 530   oe->rank = get_rank (op);
 531   oe->id = next_operand_entry_id++;
 532   oe->count = repeat;
 533   ops->safe_push (oe);
 534
 535   reassociate_stats.pows_encountered++;
 536 }
 537
 538 /* Return true if STMT is reassociable operation containing a binary
 539    operation with tree code CODE, and is inside LOOP.  */
 540
 541 static bool
 542 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 543 {
 544   basic_block bb = gimple_bb (stmt);
 545
 546   if (gimple_bb (stmt) == NULL)
 547     return false;
 548
 549   if (!flow_bb_inside_loop_p (loop, bb))
 550     return false;
 551
 552   if (is_gimple_assign (stmt)
 553       && gimple_assign_rhs_code (stmt) == code
 554       && has_single_use (gimple_assign_lhs (stmt)))
 555     return true;
 556
 557   return false;
 558 }
 559
 560
 561 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 562    operand of the negate operation.  Otherwise, return NULL.  */
 563
 564 static tree
 565 get_unary_op (tree name, enum tree_code opcode)
 566 {
 567   gimple stmt = SSA_NAME_DEF_STMT (name);
 568
 569   if (!is_gimple_assign (stmt))
 570     return NULL_TREE;
 571
 572   if (gimple_assign_rhs_code (stmt) == opcode)
 573     return gimple_assign_rhs1 (stmt);
 574   return NULL_TREE;
 575 }
 576
 577 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 578    eliminate through equivalences, do so, remove them from OPS, and
 579    return true.  Otherwise, return false.  */
 580
 581 static bool
 582 eliminate_duplicate_pair (enum tree_code opcode,
 583                           vec<operand_entry_t> *ops,
 584                           bool *all_done,
 585                           unsigned int i,
 586                           operand_entry_t curr,
 587                           operand_entry_t last)
 588 {
 589
 590   /* If we have two of the same op, and the opcode is & |, min, or max,
 591      we can eliminate one of them.
 592      If we have two of the same op, and the opcode is ^, we can
 593      eliminate both of them.  */
 594
 595   if (last && last->op == curr->op)
 596     {
 597       switch (opcode)
 598         {
 599         case MAX_EXPR:
 600         case MIN_EXPR:
 601         case BIT_IOR_EXPR:
 602         case BIT_AND_EXPR:
 603           if (dump_file && (dump_flags & TDF_DETAILS))
 604             {
 605               fprintf (dump_file, "Equivalence: ");
 606               print_generic_expr (dump_file, curr->op, 0);
 607               fprintf (dump_file, " [&|minmax] ");
 608               print_generic_expr (dump_file, last->op, 0);
 609               fprintf (dump_file, " -> ");
 610               print_generic_stmt (dump_file, last->op, 0);
 611             }
 612
 613           ops->ordered_remove (i);
 614           reassociate_stats.ops_eliminated ++;
 615
 616           return true;
 617
 618         case BIT_XOR_EXPR:
 619           if (dump_file && (dump_flags & TDF_DETAILS))
 620             {
 621               fprintf (dump_file, "Equivalence: ");
 622               print_generic_expr (dump_file, curr->op, 0);
 623               fprintf (dump_file, " ^ ");
 624               print_generic_expr (dump_file, last->op, 0);
 625               fprintf (dump_file, " -> nothing\n");
 626             }
 627
 628           reassociate_stats.ops_eliminated += 2;
 629
 630           if (ops->length () == 2)
 631             {
 632               ops->create (0);
 633               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 634               *all_done = true;
 635             }
 636           else
 637             {
 638               ops->ordered_remove (i-1);
 639               ops->ordered_remove (i-1);
 640             }
 641
 642           return true;
 643
 644         default:
 645           break;
 646         }
 647     }
 648   return false;
 649 }
 650
 651 static vec<tree> plus_negates;
 652
 653 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 654    expression, look in OPS for a corresponding positive operation to cancel
 655    it out.  If we find one, remove the other from OPS, replace
 656    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 657    return false. */
 658
 659 static bool
 660 eliminate_plus_minus_pair (enum tree_code opcode,
 661                            vec<operand_entry_t> *ops,
 662                            unsigned int currindex,
 663                            operand_entry_t curr)
 664 {
 665   tree negateop;
 666   tree notop;
 667   unsigned int i;
 668   operand_entry_t oe;
 669
 670   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 671     return false;
 672
 673   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 674   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 675   if (negateop == NULL_TREE && notop == NULL_TREE)
 676     return false;
 677
 678   /* Any non-negated version will have a rank that is one less than
 679      the current rank.  So once we hit those ranks, if we don't find
 680      one, we can stop.  */
 681
 682   for (i = currindex + 1;
 683        ops->iterate (i, &oe)
 684        && oe->rank >= curr->rank - 1 ;
 685        i++)
 686     {
 687       if (oe->op == negateop)
 688         {
 689
 690           if (dump_file && (dump_flags & TDF_DETAILS))
 691             {
 692               fprintf (dump_file, "Equivalence: ");
 693               print_generic_expr (dump_file, negateop, 0);
 694               fprintf (dump_file, " + -");
 695               print_generic_expr (dump_file, oe->op, 0);
 696               fprintf (dump_file, " -> 0\n");
 697             }
 698
 699           ops->ordered_remove (i);
 700           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 701           ops->ordered_remove (currindex);
 702           reassociate_stats.ops_eliminated ++;
 703
 704           return true;
 705         }
 706       else if (oe->op == notop)
 707         {
 708           tree op_type = TREE_TYPE (oe->op);
 709
 710           if (dump_file && (dump_flags & TDF_DETAILS))
 711             {
 712               fprintf (dump_file, "Equivalence: ");
 713               print_generic_expr (dump_file, notop, 0);
 714               fprintf (dump_file, " + ~");
 715               print_generic_expr (dump_file, oe->op, 0);
 716               fprintf (dump_file, " -> -1\n");
 717             }
 718
 719           ops->ordered_remove (i);
 720           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 721           ops->ordered_remove (currindex);
 722           reassociate_stats.ops_eliminated ++;
 723
 724           return true;
 725         }
 726     }
 727
 728   /* CURR->OP is a negate expr in a plus expr: save it for later
 729      inspection in repropagate_negates().  */
 730   if (negateop != NULL_TREE)
 731     plus_negates.safe_push (curr->op);
 732
 733   return false;
 734 }
 735
 736 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 737    bitwise not expression, look in OPS for a corresponding operand to
 738    cancel it out.  If we find one, remove the other from OPS, replace
 739    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 740    false. */
 741
 742 static bool
 743 eliminate_not_pairs (enum tree_code opcode,
 744                      vec<operand_entry_t> *ops,
 745                      unsigned int currindex,
 746                      operand_entry_t curr)
 747 {
 748   tree notop;
 749   unsigned int i;
 750   operand_entry_t oe;
 751
 752   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 753       || TREE_CODE (curr->op) != SSA_NAME)
 754     return false;
 755
 756   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 757   if (notop == NULL_TREE)
 758     return false;
 759
 760   /* Any non-not version will have a rank that is one less than
 761      the current rank.  So once we hit those ranks, if we don't find
 762      one, we can stop.  */
 763
 764   for (i = currindex + 1;
 765        ops->iterate (i, &oe)
 766        && oe->rank >= curr->rank - 1;
 767        i++)
 768     {
 769       if (oe->op == notop)
 770         {
 771           if (dump_file && (dump_flags & TDF_DETAILS))
 772             {
 773               fprintf (dump_file, "Equivalence: ");
 774               print_generic_expr (dump_file, notop, 0);
 775               if (opcode == BIT_AND_EXPR)
 776                 fprintf (dump_file, " & ~");
 777               else if (opcode == BIT_IOR_EXPR)
 778                 fprintf (dump_file, " | ~");
 779               print_generic_expr (dump_file, oe->op, 0);
 780               if (opcode == BIT_AND_EXPR)
 781                 fprintf (dump_file, " -> 0\n");
 782               else if (opcode == BIT_IOR_EXPR)
 783                 fprintf (dump_file, " -> -1\n");
 784             }
 785
 786           if (opcode == BIT_AND_EXPR)
 787             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 788           else if (opcode == BIT_IOR_EXPR)
 789             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 790                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 791
 792           reassociate_stats.ops_eliminated += ops->length () - 1;
 793           ops->truncate (0);
 794           ops->quick_push (oe);
 795           return true;
 796         }
 797     }
 798
 799   return false;
 800 }
 801
 802 /* Use constant value that may be present in OPS to try to eliminate
 803    operands.  Note that this function is only really used when we've
 804    eliminated ops for other reasons, or merged constants.  Across
 805    single statements, fold already does all of this, plus more.  There
 806    is little point in duplicating logic, so I've only included the
 807    identities that I could ever construct testcases to trigger.  */
 808
 809 static void
 810 eliminate_using_constants (enum tree_code opcode,
 811                            vec<operand_entry_t> *ops)
 812 {
 813   operand_entry_t oelast = ops->last ();
 814   tree type = TREE_TYPE (oelast->op);
 815
 816   if (oelast->rank == 0
 817       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 818     {
 819       switch (opcode)
 820         {
 821         case BIT_AND_EXPR:
 822           if (integer_zerop (oelast->op))
 823             {
 824               if (ops->length () != 1)
 825                 {
 826                   if (dump_file && (dump_flags & TDF_DETAILS))
 827                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 828
 829                   reassociate_stats.ops_eliminated += ops->length () - 1;
 830
 831                   ops->truncate (0);
 832                   ops->quick_push (oelast);
 833                   return;
 834                 }
 835             }
 836           else if (integer_all_onesp (oelast->op))
 837             {
 838               if (ops->length () != 1)
 839                 {
 840                   if (dump_file && (dump_flags & TDF_DETAILS))
 841                     fprintf (dump_file, "Found & -1, removing\n");
 842                   ops->pop ();
 843                   reassociate_stats.ops_eliminated++;
 844                 }
 845             }
 846           break;
 847         case BIT_IOR_EXPR:
 848           if (integer_all_onesp (oelast->op))
 849             {
 850               if (ops->length () != 1)
 851                 {
 852                   if (dump_file && (dump_flags & TDF_DETAILS))
 853                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 854
 855                   reassociate_stats.ops_eliminated += ops->length () - 1;
 856
 857                   ops->truncate (0);
 858                   ops->quick_push (oelast);
 859                   return;
 860                 }
 861             }
 862           else if (integer_zerop (oelast->op))
 863             {
 864               if (ops->length () != 1)
 865                 {
 866                   if (dump_file && (dump_flags & TDF_DETAILS))
 867                     fprintf (dump_file, "Found | 0, removing\n");
 868                   ops->pop ();
 869                   reassociate_stats.ops_eliminated++;
 870                 }
 871             }
 872           break;
 873         case MULT_EXPR:
 874           if (integer_zerop (oelast->op)
 875               || (FLOAT_TYPE_P (type)
 876                   && !HONOR_NANS (TYPE_MODE (type))
 877                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 878                   && real_zerop (oelast->op)))
 879             {
 880               if (ops->length () != 1)
 881                 {
 882                   if (dump_file && (dump_flags & TDF_DETAILS))
 883                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 884
 885                   reassociate_stats.ops_eliminated += ops->length () - 1;
 886                   ops->truncate (1);
 887                   ops->quick_push (oelast);
 888                   return;
 889                 }
 890             }
 891           else if (integer_onep (oelast->op)
 892                    || (FLOAT_TYPE_P (type)
 893                        && !HONOR_SNANS (TYPE_MODE (type))
 894                        && real_onep (oelast->op)))
 895             {
 896               if (ops->length () != 1)
 897                 {
 898                   if (dump_file && (dump_flags & TDF_DETAILS))
 899                     fprintf (dump_file, "Found * 1, removing\n");
 900                   ops->pop ();
 901                   reassociate_stats.ops_eliminated++;
 902                   return;
 903                 }
 904             }
 905           break;
 906         case BIT_XOR_EXPR:
 907         case PLUS_EXPR:
 908         case MINUS_EXPR:
 909           if (integer_zerop (oelast->op)
 910               || (FLOAT_TYPE_P (type)
 911                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 912                   && fold_real_zero_addition_p (type, oelast->op,
 913                                                 opcode == MINUS_EXPR)))
 914             {
 915               if (ops->length () != 1)
 916                 {
 917                   if (dump_file && (dump_flags & TDF_DETAILS))
 918                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 919                   ops->pop ();
 920                   reassociate_stats.ops_eliminated++;
 921                   return;
 922                 }
 923             }
 924           break;
 925         default:
 926           break;
 927         }
 928     }
 929 }
 930
 931
 932 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 933                                  bool, bool);
 934
 935 /* Structure for tracking and counting operands.  */
 936 typedef struct oecount_s {
 937   int cnt;
 938   int id;
 939   enum tree_code oecode;
 940   tree op;
 941 } oecount;
 942
 943
 944 /* The heap for the oecount hashtable and the sorted list of operands.  */
 945 static vec<oecount> cvec;
 946
 947
 948 /* Oecount hashtable helpers.  */
 949
 950 struct oecount_hasher : typed_noop_remove <void>
 951 {
 952   /* Note that this hash table stores integers, not pointers.
 953      So, observe the casting in the member functions.  */
 954   typedef void value_type;
 955   typedef void compare_type;
 956   static inline hashval_t hash (const value_type *);
 957   static inline bool equal (const value_type *, const compare_type *);
 958 };
 959
 960 /* Hash function for oecount.  */
 961
 962 inline hashval_t
 963 oecount_hasher::hash (const value_type *p)
 964 {
 965   const oecount *c = &cvec[(size_t)p - 42];
 966   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 967 }
 968
 969 /* Comparison function for oecount.  */
 970
 971 inline bool
 972 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 973 {
 974   const oecount *c1 = &cvec[(size_t)p1 - 42];
 975   const oecount *c2 = &cvec[(size_t)p2 - 42];
 976   return (c1->oecode == c2->oecode
 977           && c1->op == c2->op);
 978 }
 979
 980 /* Comparison function for qsort sorting oecount elements by count.  */
 981
 982 static int
 983 oecount_cmp (const void *p1, const void *p2)
 984 {
 985   const oecount *c1 = (const oecount *)p1;
 986   const oecount *c2 = (const oecount *)p2;
 987   if (c1->cnt != c2->cnt)
 988     return c1->cnt - c2->cnt;
 989   else
 990     /* If counts are identical, use unique IDs to stabilize qsort.  */
 991     return c1->id - c2->id;
 992 }
 993
 994 /* Return TRUE iff STMT represents a builtin call that raises OP
 995    to some exponent.  */
 996
 997 static bool
 998 stmt_is_power_of_op (gimple stmt, tree op)
 999 {
1000   tree fndecl;
1001
1002   if (!is_gimple_call (stmt))
1003     return false;
1004
1005   fndecl = gimple_call_fndecl (stmt);
1006
1007   if (!fndecl
1008       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1009     return false;
1010
1011   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1012     {
1013     CASE_FLT_FN (BUILT_IN_POW):
1014     CASE_FLT_FN (BUILT_IN_POWI):
1015       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1016
1017     default:
1018       return false;
1019     }
1020 }
1021
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023    in place and return the result.  Assumes that stmt_is_power_of_op
1024    was previously called for STMT and returned TRUE.  */
1025
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt)
1028 {
1029   REAL_VALUE_TYPE c, cint;
1030   HOST_WIDE_INT power;
1031   tree arg1;
1032
1033   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1034     {
1035     CASE_FLT_FN (BUILT_IN_POW):
1036       arg1 = gimple_call_arg (stmt, 1);
1037       c = TREE_REAL_CST (arg1);
1038       power = real_to_integer (&c) - 1;
1039       real_from_integer (&cint, VOIDmode, power, 0, 0);
1040       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1041       return power;
1042
1043     CASE_FLT_FN (BUILT_IN_POWI):
1044       arg1 = gimple_call_arg (stmt, 1);
1045       power = TREE_INT_CST_LOW (arg1) - 1;
1046       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1047       return power;
1048
1049     default:
1050       gcc_unreachable ();
1051     }
1052 }
1053
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1056    replace *DEF with OP as well.  */
1057
1058 static void
1059 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1060 {
1061   tree lhs;
1062   gimple use_stmt;
1063   use_operand_p use;
1064   gimple_stmt_iterator gsi;
1065
1066   if (is_gimple_call (stmt))
1067     lhs = gimple_call_lhs (stmt);
1068   else
1069     lhs = gimple_assign_lhs (stmt);
1070
1071   gcc_assert (has_single_use (lhs));
1072   single_imm_use (lhs, &use, &use_stmt);
1073   if (lhs == *def)
1074     *def = op;
1075   SET_USE (use, op);
1076   if (TREE_CODE (op) != SSA_NAME)
1077     update_stmt (use_stmt);
1078   gsi = gsi_for_stmt (stmt);
1079   unlink_stmt_vdef (stmt);
1080   gsi_remove (&gsi, true);
1081   release_defs (stmt);
1082 }
1083
1084 /* Walks the linear chain with result *DEF searching for an operation
1085    with operand OP and code OPCODE removing that from the chain.  *DEF
1086    is updated if there is only one operand but no operation left.  */
1087
1088 static void
1089 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1090 {
1091   gimple stmt = SSA_NAME_DEF_STMT (*def);
1092
1093   do
1094     {
1095       tree name;
1096
1097       if (opcode == MULT_EXPR
1098           && stmt_is_power_of_op (stmt, op))
1099         {
1100           if (decrement_power (stmt) == 1)
1101             propagate_op_to_single_use (op, stmt, def);
1102           return;
1103         }
1104
1105       name = gimple_assign_rhs1 (stmt);
1106
1107       /* If this is the operation we look for and one of the operands
1108          is ours simply propagate the other operand into the stmts
1109          single use.  */
1110       if (gimple_assign_rhs_code (stmt) == opcode
1111           && (name == op
1112               || gimple_assign_rhs2 (stmt) == op))
1113         {
1114           if (name == op)
1115             name = gimple_assign_rhs2 (stmt);
1116           propagate_op_to_single_use (name, stmt, def);
1117           return;
1118         }
1119
1120       /* We might have a multiply of two __builtin_pow* calls, and
1121          the operand might be hiding in the rightmost one.  */
1122       if (opcode == MULT_EXPR
1123           && gimple_assign_rhs_code (stmt) == opcode
1124           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1125           && has_single_use (gimple_assign_rhs2 (stmt)))
1126         {
1127           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1128           if (stmt_is_power_of_op (stmt2, op))
1129             {
1130               if (decrement_power (stmt2) == 1)
1131                 propagate_op_to_single_use (op, stmt2, def);
1132               return;
1133             }
1134         }
1135
1136       /* Continue walking the chain.  */
1137       gcc_assert (name != op
1138                   && TREE_CODE (name) == SSA_NAME);
1139       stmt = SSA_NAME_DEF_STMT (name);
1140     }
1141   while (1);
1142 }
1143
1144 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1145    the result.  Places the statement after the definition of either
1146    OP1 or OP2.  Returns the new statement.  */
1147
1148 static gimple
1149 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1150 {
1151   gimple op1def = NULL, op2def = NULL;
1152   gimple_stmt_iterator gsi;
1153   tree op;
1154   gimple sum;
1155
1156   /* Create the addition statement.  */
1157   op = make_ssa_name (type, NULL);
1158   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1159
1160   /* Find an insertion place and insert.  */
1161   if (TREE_CODE (op1) == SSA_NAME)
1162     op1def = SSA_NAME_DEF_STMT (op1);
1163   if (TREE_CODE (op2) == SSA_NAME)
1164     op2def = SSA_NAME_DEF_STMT (op2);
1165   if ((!op1def || gimple_nop_p (op1def))
1166       && (!op2def || gimple_nop_p (op2def)))
1167     {
1168       gimple first_stmt;
1169       unsigned uid;
1170       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1171       first_stmt = gsi_stmt (gsi);
1172       uid = first_stmt ? gimple_uid (first_stmt) : 1;
1173       gimple_set_uid (sum, uid);
1174       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1175     }
1176   else if ((!op1def || gimple_nop_p (op1def))
1177            || (op2def && !gimple_nop_p (op2def)
1178                && stmt_dominates_stmt_p (op1def, op2def)))
1179     {
1180       if (gimple_code (op2def) == GIMPLE_PHI)
1181         {
1182           gsi = gsi_after_labels (gimple_bb (op2def));
1183           gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1184           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1185         }
1186       else
1187         {
1188           if (!stmt_ends_bb_p (op2def))
1189             {
1190               gsi = gsi_for_stmt (op2def);
1191               gimple_set_uid (sum, gimple_uid (op2def));
1192               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1193             }
1194           else
1195             {
1196               edge e;
1197               edge_iterator ei;
1198
1199               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1200                 if (e->flags & EDGE_FALLTHRU)
1201                   gsi_insert_on_edge_immediate (e, sum);
1202             }
1203         }
1204     }
1205   else
1206     {
1207       if (gimple_code (op1def) == GIMPLE_PHI)
1208         {
1209           gsi = gsi_after_labels (gimple_bb (op1def));
1210           gimple_set_uid (sum, gimple_uid (op1def));
1211           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1212         }
1213       else
1214         {
1215           if (!stmt_ends_bb_p (op1def))
1216             {
1217               gsi = gsi_for_stmt (op1def);
1218               gimple_set_uid (sum, gimple_uid (op1def));
1219               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1220             }
1221           else
1222             {
1223               edge e;
1224               edge_iterator ei;
1225
1226               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1227                 if (e->flags & EDGE_FALLTHRU)
1228                   gsi_insert_on_edge_immediate (e, sum);
1229             }
1230         }
1231     }
1232   update_stmt (sum);
1233
1234   return sum;
1235 }
1236
1237 /* Perform un-distribution of divisions and multiplications.
1238    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1239    to (A + B) / X for real X.
1240
1241    The algorithm is organized as follows.
1242
1243     - First we walk the addition chain *OPS looking for summands that
1244       are defined by a multiplication or a real division.  This results
1245       in the candidates bitmap with relevant indices into *OPS.
1246
1247     - Second we build the chains of multiplications or divisions for
1248       these candidates, counting the number of occurrences of (operand, code)
1249       pairs in all of the candidates chains.
1250
1251     - Third we sort the (operand, code) pairs by number of occurrence and
1252       process them starting with the pair with the most uses.
1253
1254       * For each such pair we walk the candidates again to build a
1255         second candidate bitmap noting all multiplication/division chains
1256         that have at least one occurrence of (operand, code).
1257
1258       * We build an alternate addition chain only covering these
1259         candidates with one (operand, code) operation removed from their
1260         multiplication/division chain.
1261
1262       * The first candidate gets replaced by the alternate addition chain
1263         multiplied/divided by the operand.
1264
1265       * All candidate chains get disabled for further processing and
1266         processing of (operand, code) pairs continues.
1267
1268   The alternate addition chains built are re-processed by the main
1269   reassociation algorithm which allows optimizing a * x * y + b * y * x
1270   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1271
1272 static bool
1273 undistribute_ops_list (enum tree_code opcode,
1274                        vec<operand_entry_t> *ops, struct loop *loop)
1275 {
1276   unsigned int length = ops->length ();
1277   operand_entry_t oe1;
1278   unsigned i, j;
1279   sbitmap candidates, candidates2;
1280   unsigned nr_candidates, nr_candidates2;
1281   sbitmap_iterator sbi0;
1282   vec<operand_entry_t> *subops;
1283   hash_table <oecount_hasher> ctable;
1284   bool changed = false;
1285   int next_oecount_id = 0;
1286
1287   if (length <= 1
1288       || opcode != PLUS_EXPR)
1289     return false;
1290
1291   /* Build a list of candidates to process.  */
1292   candidates = sbitmap_alloc (length);
1293   bitmap_clear (candidates);
1294   nr_candidates = 0;
1295   FOR_EACH_VEC_ELT (*ops, i, oe1)
1296     {
1297       enum tree_code dcode;
1298       gimple oe1def;
1299
1300       if (TREE_CODE (oe1->op) != SSA_NAME)
1301         continue;
1302       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1303       if (!is_gimple_assign (oe1def))
1304         continue;
1305       dcode = gimple_assign_rhs_code (oe1def);
1306       if ((dcode != MULT_EXPR
1307            && dcode != RDIV_EXPR)
1308           || !is_reassociable_op (oe1def, dcode, loop))
1309         continue;
1310
1311       bitmap_set_bit (candidates, i);
1312       nr_candidates++;
1313     }
1314
1315   if (nr_candidates < 2)
1316     {
1317       sbitmap_free (candidates);
1318       return false;
1319     }
1320
1321   if (dump_file && (dump_flags & TDF_DETAILS))
1322     {
1323       fprintf (dump_file, "searching for un-distribute opportunities ");
1324       print_generic_expr (dump_file,
1325         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1326       fprintf (dump_file, " %d\n", nr_candidates);
1327     }
1328
1329   /* Build linearized sub-operand lists and the counting table.  */
1330   cvec.create (0);
1331   ctable.create (15);
1332   /* ??? Macro arguments cannot have multi-argument template types in
1333      them.  This typedef is needed to workaround that limitation.  */
1334   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1335   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1336   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1337     {
1338       gimple oedef;
1339       enum tree_code oecode;
1340       unsigned j;
1341
1342       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1343       oecode = gimple_assign_rhs_code (oedef);
1344       linearize_expr_tree (&subops[i], oedef,
1345                            associative_tree_code (oecode), false);
1346
1347       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1348         {
1349           oecount c;
1350           void **slot;
1351           size_t idx;
1352           c.oecode = oecode;
1353           c.cnt = 1;
1354           c.id = next_oecount_id++;
1355           c.op = oe1->op;
1356           cvec.safe_push (c);
1357           idx = cvec.length () + 41;
1358           slot = ctable.find_slot ((void *)idx, INSERT);
1359           if (!*slot)
1360             {
1361               *slot = (void *)idx;
1362             }
1363           else
1364             {
1365               cvec.pop ();
1366               cvec[(size_t)*slot - 42].cnt++;
1367             }
1368         }
1369     }
1370   ctable.dispose ();
1371
1372   /* Sort the counting table.  */
1373   cvec.qsort (oecount_cmp);
1374
1375   if (dump_file && (dump_flags & TDF_DETAILS))
1376     {
1377       oecount *c;
1378       fprintf (dump_file, "Candidates:\n");
1379       FOR_EACH_VEC_ELT (cvec, j, c)
1380         {
1381           fprintf (dump_file, "  %u %s: ", c->cnt,
1382                    c->oecode == MULT_EXPR
1383                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1384           print_generic_expr (dump_file, c->op, 0);
1385           fprintf (dump_file, "\n");
1386         }
1387     }
1388
1389   /* Process the (operand, code) pairs in order of most occurrence.  */
1390   candidates2 = sbitmap_alloc (length);
1391   while (!cvec.is_empty ())
1392     {
1393       oecount *c = &cvec.last ();
1394       if (c->cnt < 2)
1395         break;
1396
1397       /* Now collect the operands in the outer chain that contain
1398          the common operand in their inner chain.  */
1399       bitmap_clear (candidates2);
1400       nr_candidates2 = 0;
1401       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1402         {
1403           gimple oedef;
1404           enum tree_code oecode;
1405           unsigned j;
1406           tree op = (*ops)[i]->op;
1407
1408           /* If we undistributed in this chain already this may be
1409              a constant.  */
1410           if (TREE_CODE (op) != SSA_NAME)
1411             continue;
1412
1413           oedef = SSA_NAME_DEF_STMT (op);
1414           oecode = gimple_assign_rhs_code (oedef);
1415           if (oecode != c->oecode)
1416             continue;
1417
1418           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1419             {
1420               if (oe1->op == c->op)
1421                 {
1422                   bitmap_set_bit (candidates2, i);
1423                   ++nr_candidates2;
1424                   break;
1425                 }
1426             }
1427         }
1428
1429       if (nr_candidates2 >= 2)
1430         {
1431           operand_entry_t oe1, oe2;
1432           gimple prod;
1433           int first = bitmap_first_set_bit (candidates2);
1434
1435           /* Build the new addition chain.  */
1436           oe1 = (*ops)[first];
1437           if (dump_file && (dump_flags & TDF_DETAILS))
1438             {
1439               fprintf (dump_file, "Building (");
1440               print_generic_expr (dump_file, oe1->op, 0);
1441             }
1442           zero_one_operation (&oe1->op, c->oecode, c->op);
1443           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1444             {
1445               gimple sum;
1446               oe2 = (*ops)[i];
1447               if (dump_file && (dump_flags & TDF_DETAILS))
1448                 {
1449                   fprintf (dump_file, " + ");
1450                   print_generic_expr (dump_file, oe2->op, 0);
1451                 }
1452               zero_one_operation (&oe2->op, c->oecode, c->op);
1453               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1454                                        oe1->op, oe2->op, opcode);
1455               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1456               oe2->rank = 0;
1457               oe1->op = gimple_get_lhs (sum);
1458             }
1459
1460           /* Apply the multiplication/division.  */
1461           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1462                                     oe1->op, c->op, c->oecode);
1463           if (dump_file && (dump_flags & TDF_DETAILS))
1464             {
1465               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1466               print_generic_expr (dump_file, c->op, 0);
1467               fprintf (dump_file, "\n");
1468             }
1469
1470           /* Record it in the addition chain and disable further
1471              undistribution with this op.  */
1472           oe1->op = gimple_assign_lhs (prod);
1473           oe1->rank = get_rank (oe1->op);
1474           subops[first].release ();
1475
1476           changed = true;
1477         }
1478
1479       cvec.pop ();
1480     }
1481
1482   for (i = 0; i < ops->length (); ++i)
1483     subops[i].release ();
1484   free (subops);
1485   cvec.release ();
1486   sbitmap_free (candidates);
1487   sbitmap_free (candidates2);
1488
1489   return changed;
1490 }
1491
1492 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1493    expression, examine the other OPS to see if any of them are comparisons
1494    of the same values, which we may be able to combine or eliminate.
1495    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1496
1497 static bool
1498 eliminate_redundant_comparison (enum tree_code opcode,
1499                                 vec<operand_entry_t> *ops,
1500                                 unsigned int currindex,
1501                                 operand_entry_t curr)
1502 {
1503   tree op1, op2;
1504   enum tree_code lcode, rcode;
1505   gimple def1, def2;
1506   int i;
1507   operand_entry_t oe;
1508
1509   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1510     return false;
1511
1512   /* Check that CURR is a comparison.  */
1513   if (TREE_CODE (curr->op) != SSA_NAME)
1514     return false;
1515   def1 = SSA_NAME_DEF_STMT (curr->op);
1516   if (!is_gimple_assign (def1))
1517     return false;
1518   lcode = gimple_assign_rhs_code (def1);
1519   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1520     return false;
1521   op1 = gimple_assign_rhs1 (def1);
1522   op2 = gimple_assign_rhs2 (def1);
1523
1524   /* Now look for a similar comparison in the remaining OPS.  */
1525   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1526     {
1527       tree t;
1528
1529       if (TREE_CODE (oe->op) != SSA_NAME)
1530         continue;
1531       def2 = SSA_NAME_DEF_STMT (oe->op);
1532       if (!is_gimple_assign (def2))
1533         continue;
1534       rcode = gimple_assign_rhs_code (def2);
1535       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1536         continue;
1537
1538       /* If we got here, we have a match.  See if we can combine the
1539          two comparisons.  */
1540       if (opcode == BIT_IOR_EXPR)
1541         t = maybe_fold_or_comparisons (lcode, op1, op2,
1542                                        rcode, gimple_assign_rhs1 (def2),
1543                                        gimple_assign_rhs2 (def2));
1544       else
1545         t = maybe_fold_and_comparisons (lcode, op1, op2,
1546                                         rcode, gimple_assign_rhs1 (def2),
1547                                         gimple_assign_rhs2 (def2));
1548       if (!t)
1549         continue;
1550
1551       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1552          always give us a boolean_type_node value back.  If the original
1553          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1554          we need to convert.  */
1555       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1556         t = fold_convert (TREE_TYPE (curr->op), t);
1557
1558       if (TREE_CODE (t) != INTEGER_CST
1559           && !operand_equal_p (t, curr->op, 0))
1560         {
1561           enum tree_code subcode;
1562           tree newop1, newop2;
1563           if (!COMPARISON_CLASS_P (t))
1564             continue;
1565           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1566           STRIP_USELESS_TYPE_CONVERSION (newop1);
1567           STRIP_USELESS_TYPE_CONVERSION (newop2);
1568           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1569             continue;
1570         }
1571
1572       if (dump_file && (dump_flags & TDF_DETAILS))
1573         {
1574           fprintf (dump_file, "Equivalence: ");
1575           print_generic_expr (dump_file, curr->op, 0);
1576           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1577           print_generic_expr (dump_file, oe->op, 0);
1578           fprintf (dump_file, " -> ");
1579           print_generic_expr (dump_file, t, 0);
1580           fprintf (dump_file, "\n");
1581         }
1582
1583       /* Now we can delete oe, as it has been subsumed by the new combined
1584          expression t.  */
1585       ops->ordered_remove (i);
1586       reassociate_stats.ops_eliminated ++;
1587
1588       /* If t is the same as curr->op, we're done.  Otherwise we must
1589          replace curr->op with t.  Special case is if we got a constant
1590          back, in which case we add it to the end instead of in place of
1591          the current entry.  */
1592       if (TREE_CODE (t) == INTEGER_CST)
1593         {
1594           ops->ordered_remove (currindex);
1595           add_to_ops_vec (ops, t);
1596         }
1597       else if (!operand_equal_p (t, curr->op, 0))
1598         {
1599           gimple sum;
1600           enum tree_code subcode;
1601           tree newop1;
1602           tree newop2;
1603           gcc_assert (COMPARISON_CLASS_P (t));
1604           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1605           STRIP_USELESS_TYPE_CONVERSION (newop1);
1606           STRIP_USELESS_TYPE_CONVERSION (newop2);
1607           gcc_checking_assert (is_gimple_val (newop1)
1608                                && is_gimple_val (newop2));
1609           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1610           curr->op = gimple_get_lhs (sum);
1611         }
1612       return true;
1613     }
1614
1615   return false;
1616 }
1617
1618 /* Perform various identities and other optimizations on the list of
1619    operand entries, stored in OPS.  The tree code for the binary
1620    operation between all the operands is OPCODE.  */
1621
1622 static void
1623 optimize_ops_list (enum tree_code opcode,
1624                    vec<operand_entry_t> *ops)
1625 {
1626   unsigned int length = ops->length ();
1627   unsigned int i;
1628   operand_entry_t oe;
1629   operand_entry_t oelast = NULL;
1630   bool iterate = false;
1631
1632   if (length == 1)
1633     return;
1634
1635   oelast = ops->last ();
1636
1637   /* If the last two are constants, pop the constants off, merge them
1638      and try the next two.  */
1639   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1640     {
1641       operand_entry_t oelm1 = (*ops)[length - 2];
1642
1643       if (oelm1->rank == 0
1644           && is_gimple_min_invariant (oelm1->op)
1645           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1646                                        TREE_TYPE (oelast->op)))
1647         {
1648           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1649                                      oelm1->op, oelast->op);
1650
1651           if (folded && is_gimple_min_invariant (folded))
1652             {
1653               if (dump_file && (dump_flags & TDF_DETAILS))
1654                 fprintf (dump_file, "Merging constants\n");
1655
1656               ops->pop ();
1657               ops->pop ();
1658
1659               add_to_ops_vec (ops, folded);
1660               reassociate_stats.constants_eliminated++;
1661
1662               optimize_ops_list (opcode, ops);
1663               return;
1664             }
1665         }
1666     }
1667
1668   eliminate_using_constants (opcode, ops);
1669   oelast = NULL;
1670
1671   for (i = 0; ops->iterate (i, &oe);)
1672     {
1673       bool done = false;
1674
1675       if (eliminate_not_pairs (opcode, ops, i, oe))
1676         return;
1677       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1678           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1679           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1680         {
1681           if (done)
1682             return;
1683           iterate = true;
1684           oelast = NULL;
1685           continue;
1686         }
1687       oelast = oe;
1688       i++;
1689     }
1690
1691   length = ops->length ();
1692   oelast = ops->last ();
1693
1694   if (iterate)
1695     optimize_ops_list (opcode, ops);
1696 }
1697
1698 /* The following functions are subroutines to optimize_range_tests and allow
1699    it to try to change a logical combination of comparisons into a range
1700    test.
1701
1702    For example, both
1703         X == 2 || X == 5 || X == 3 || X == 4
1704    and
1705         X >= 2 && X <= 5
1706    are converted to
1707         (unsigned) (X - 2) <= 3
1708
1709    For more information see comments above fold_test_range in fold-const.c,
1710    this implementation is for GIMPLE.  */
1711
1712 struct range_entry
1713 {
1714   tree exp;
1715   tree low;
1716   tree high;
1717   bool in_p;
1718   bool strict_overflow_p;
1719   unsigned int idx, next;
1720 };
1721
1722 /* This is similar to make_range in fold-const.c, but on top of
1723    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1724    an SSA_NAME and STMT argument is ignored, otherwise STMT
1725    argument should be a GIMPLE_COND.  */
1726
1727 static void
1728 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1729 {
1730   int in_p;
1731   tree low, high;
1732   bool is_bool, strict_overflow_p;
1733
1734   r->exp = NULL_TREE;
1735   r->in_p = false;
1736   r->strict_overflow_p = false;
1737   r->low = NULL_TREE;
1738   r->high = NULL_TREE;
1739   if (exp != NULL_TREE
1740       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1741     return;
1742
1743   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1744      and see if we can refine the range.  Some of the cases below may not
1745      happen, but it doesn't seem worth worrying about this.  We "continue"
1746      the outer loop when we've changed something; otherwise we "break"
1747      the switch, which will "break" the while.  */
1748   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1749   high = low;
1750   in_p = 0;
1751   strict_overflow_p = false;
1752   is_bool = false;
1753   if (exp == NULL_TREE)
1754     is_bool = true;
1755   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1756     {
1757       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1758         is_bool = true;
1759       else
1760         return;
1761     }
1762   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1763     is_bool = true;
1764
1765   while (1)
1766     {
1767       enum tree_code code;
1768       tree arg0, arg1, exp_type;
1769       tree nexp;
1770       location_t loc;
1771
1772       if (exp != NULL_TREE)
1773         {
1774           if (TREE_CODE (exp) != SSA_NAME)
1775             break;
1776
1777           stmt = SSA_NAME_DEF_STMT (exp);
1778           if (!is_gimple_assign (stmt))
1779             break;
1780
1781           code = gimple_assign_rhs_code (stmt);
1782           arg0 = gimple_assign_rhs1 (stmt);
1783           arg1 = gimple_assign_rhs2 (stmt);
1784           exp_type = TREE_TYPE (exp);
1785         }
1786       else
1787         {
1788           code = gimple_cond_code (stmt);
1789           arg0 = gimple_cond_lhs (stmt);
1790           arg1 = gimple_cond_rhs (stmt);
1791           exp_type = boolean_type_node;
1792         }
1793
1794       if (TREE_CODE (arg0) != SSA_NAME)
1795         break;
1796       loc = gimple_location (stmt);
1797       switch (code)
1798         {
1799         case BIT_NOT_EXPR:
1800           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1801             {
1802               in_p = !in_p;
1803               exp = arg0;
1804               continue;
1805             }
1806           break;
1807         case SSA_NAME:
1808           exp = arg0;
1809           continue;
1810         CASE_CONVERT:
1811           if (is_bool)
1812             goto do_default;
1813           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1814             {
1815               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1816                 is_bool = true;
1817               else
1818                 return;
1819             }
1820           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1821             is_bool = true;
1822           goto do_default;
1823         case EQ_EXPR:
1824         case NE_EXPR:
1825         case LT_EXPR:
1826         case LE_EXPR:
1827         case GE_EXPR:
1828         case GT_EXPR:
1829           is_bool = true;
1830           /* FALLTHRU */
1831         default:
1832           if (!is_bool)
1833             return;
1834         do_default:
1835           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1836                                   &low, &high, &in_p,
1837                                   &strict_overflow_p);
1838           if (nexp != NULL_TREE)
1839             {
1840               exp = nexp;
1841               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1842               continue;
1843             }
1844           break;
1845         }
1846       break;
1847     }
1848   if (is_bool)
1849     {
1850       r->exp = exp;
1851       r->in_p = in_p;
1852       r->low = low;
1853       r->high = high;
1854       r->strict_overflow_p = strict_overflow_p;
1855     }
1856 }
1857
1858 /* Comparison function for qsort.  Sort entries
1859    without SSA_NAME exp first, then with SSA_NAMEs sorted
1860    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1861    by increasing ->low and if ->low is the same, by increasing
1862    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1863    maximum.  */
1864
1865 static int
1866 range_entry_cmp (const void *a, const void *b)
1867 {
1868   const struct range_entry *p = (const struct range_entry *) a;
1869   const struct range_entry *q = (const struct range_entry *) b;
1870
1871   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1872     {
1873       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1874         {
1875           /* Group range_entries for the same SSA_NAME together.  */
1876           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1877             return -1;
1878           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1879             return 1;
1880           /* If ->low is different, NULL low goes first, then by
1881              ascending low.  */
1882           if (p->low != NULL_TREE)
1883             {
1884               if (q->low != NULL_TREE)
1885                 {
1886                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1887                                           p->low, q->low);
1888                   if (tem && integer_onep (tem))
1889                     return -1;
1890                   tem = fold_binary (GT_EXPR, boolean_type_node,
1891                                      p->low, q->low);
1892                   if (tem && integer_onep (tem))
1893                     return 1;
1894                 }
1895               else
1896                 return 1;
1897             }
1898           else if (q->low != NULL_TREE)
1899             return -1;
1900           /* If ->high is different, NULL high goes last, before that by
1901              ascending high.  */
1902           if (p->high != NULL_TREE)
1903             {
1904               if (q->high != NULL_TREE)
1905                 {
1906                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1907                                           p->high, q->high);
1908                   if (tem && integer_onep (tem))
1909                     return -1;
1910                   tem = fold_binary (GT_EXPR, boolean_type_node,
1911                                      p->high, q->high);
1912                   if (tem && integer_onep (tem))
1913                     return 1;
1914                 }
1915               else
1916                 return -1;
1917             }
1918           else if (p->high != NULL_TREE)
1919             return 1;
1920           /* If both ranges are the same, sort below by ascending idx.  */
1921         }
1922       else
1923         return 1;
1924     }
1925   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1926     return -1;
1927
1928   if (p->idx < q->idx)
1929     return -1;
1930   else
1931     {
1932       gcc_checking_assert (p->idx > q->idx);
1933       return 1;
1934     }
1935 }
1936
1937 /* Helper routine of optimize_range_test.
1938    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1939    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1940    OPCODE and OPS are arguments of optimize_range_tests.  Return
1941    true if the range merge has been successful.
1942    If OPCODE is ERROR_MARK, this is called from within
1943    maybe_optimize_range_tests and is performing inter-bb range optimization.
1944    Changes should be then performed right away, and whether an op is
1945    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1946
1947 static bool
1948 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1949                    unsigned int count, enum tree_code opcode,
1950                    vec<operand_entry_t> *ops, tree exp, bool in_p,
1951                    tree low, tree high, bool strict_overflow_p)
1952 {
1953   operand_entry_t oe = (*ops)[range->idx];
1954   tree op = oe->op;
1955   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1956   location_t loc = gimple_location (stmt);
1957   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1958   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1959   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1960   gimple_stmt_iterator gsi;
1961
1962   if (tem == NULL_TREE)
1963     return false;
1964
1965   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1966     warning_at (loc, OPT_Wstrict_overflow,
1967                 "assuming signed overflow does not occur "
1968                 "when simplifying range test");
1969
1970   if (dump_file && (dump_flags & TDF_DETAILS))
1971     {
1972       struct range_entry *r;
1973       fprintf (dump_file, "Optimizing range tests ");
1974       print_generic_expr (dump_file, range->exp, 0);
1975       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1976       print_generic_expr (dump_file, range->low, 0);
1977       fprintf (dump_file, ", ");
1978       print_generic_expr (dump_file, range->high, 0);
1979       fprintf (dump_file, "]");
1980       for (r = otherrange; r < otherrange + count; r++)
1981         {
1982           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1983           print_generic_expr (dump_file, r->low, 0);
1984           fprintf (dump_file, ", ");
1985           print_generic_expr (dump_file, r->high, 0);
1986           fprintf (dump_file, "]");
1987         }
1988       fprintf (dump_file, "\n into ");
1989       print_generic_expr (dump_file, tem, 0);
1990       fprintf (dump_file, "\n");
1991     }
1992
1993   if (opcode == BIT_IOR_EXPR
1994       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1995     tem = invert_truthvalue_loc (loc, tem);
1996
1997   tem = fold_convert_loc (loc, optype, tem);
1998   gsi = gsi_for_stmt (stmt);
1999   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2000                                   GSI_SAME_STMT);
2001
2002   /* If doing inter-bb range test optimization, update the
2003      stmts immediately.  Start with changing the first range test
2004      immediate use to the new value (TEM), or, if the first range
2005      test is a GIMPLE_COND stmt, change that condition.  */
2006   if (opcode == ERROR_MARK)
2007     {
2008       if (op)
2009         {
2010           imm_use_iterator iter;
2011           use_operand_p use_p;
2012           gimple use_stmt;
2013
2014           FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2015             {
2016               if (is_gimple_debug (use_stmt))
2017                 continue;
2018               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2019                 SET_USE (use_p, tem);
2020               update_stmt (use_stmt);
2021             }
2022         }
2023       else
2024         {
2025           gimple_cond_set_code (stmt, NE_EXPR);
2026           gimple_cond_set_lhs (stmt, tem);
2027           gimple_cond_set_rhs (stmt, boolean_false_node);
2028           update_stmt (stmt);
2029         }
2030     }
2031   oe->op = tem;
2032   range->exp = exp;
2033   range->low = low;
2034   range->high = high;
2035   range->in_p = in_p;
2036   range->strict_overflow_p = false;
2037
2038   for (range = otherrange; range < otherrange + count; range++)
2039     {
2040       oe = (*ops)[range->idx];
2041       /* Now change all the other range test immediate uses, so that
2042          those tests will be optimized away.  */
2043       if (opcode == ERROR_MARK)
2044         {
2045           if (oe->op)
2046             {
2047               imm_use_iterator iter;
2048               use_operand_p use_p;
2049               gimple use_stmt;
2050
2051               FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2052                 {
2053                   if (is_gimple_debug (use_stmt))
2054                     continue;
2055                   /* If imm use of _8 is a statement like _7 = _8 | _9;,
2056                      adjust it into _7 = _9;.  */
2057                   if (is_gimple_assign (use_stmt)
2058                       && gimple_assign_rhs_code (use_stmt) == oe->rank)
2059                     {
2060                       tree expr = NULL_TREE;
2061                       if (oe->op == gimple_assign_rhs1 (use_stmt))
2062                         expr = gimple_assign_rhs2 (use_stmt);
2063                       else if (oe->op == gimple_assign_rhs2 (use_stmt))
2064                         expr = gimple_assign_rhs1 (use_stmt);
2065                       if (expr
2066                           && expr != oe->op
2067                           && TREE_CODE (expr) == SSA_NAME)
2068                         {
2069                           gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2070                           gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2071                                                           expr, NULL_TREE);
2072                           update_stmt (use_stmt);
2073                           continue;
2074                         }
2075                     }
2076                   /* If imm use of _8 is a statement like _7 = (int) _8;,
2077                      adjust it into _7 = 0; or _7 = 1;.  */
2078                   if (gimple_assign_cast_p (use_stmt)
2079                       && oe->op == gimple_assign_rhs1 (use_stmt))
2080                     {
2081                       tree lhs = gimple_assign_lhs (use_stmt);
2082                       if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2083                         {
2084                           gimple_stmt_iterator gsi2
2085                             = gsi_for_stmt (use_stmt);
2086                           tree expr = build_int_cst (TREE_TYPE (lhs),
2087                                                      oe->rank == BIT_IOR_EXPR
2088                                                      ? 0 : 1);
2089                           gimple_assign_set_rhs_with_ops (&gsi2,
2090                                                           INTEGER_CST,
2091                                                           expr, NULL_TREE);
2092                           update_stmt (use_stmt);
2093                           continue;
2094                         }
2095                     }
2096                   /* Otherwise replace the use with 0 or 1.  */
2097                   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2098                     SET_USE (use_p,
2099                              build_int_cst (TREE_TYPE (oe->op),
2100                                             oe->rank == BIT_IOR_EXPR
2101                                             ? 0 : 1));
2102                   update_stmt (use_stmt);
2103                 }
2104             }
2105           else
2106             {
2107               /* If range test was a GIMPLE_COND, simply change it
2108                  into an always false or always true condition.  */
2109               stmt = last_stmt (BASIC_BLOCK (oe->id));
2110               if (oe->rank == BIT_IOR_EXPR)
2111                 gimple_cond_make_false (stmt);
2112               else
2113                 gimple_cond_make_true (stmt);
2114               update_stmt (stmt);
2115             }
2116         }
2117       oe->op = error_mark_node;
2118       range->exp = NULL_TREE;
2119     }
2120   return true;
2121 }
2122
2123 /* Optimize range tests, similarly how fold_range_test optimizes
2124    it on trees.  The tree code for the binary
2125    operation between all the operands is OPCODE.
2126    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2127    maybe_optimize_range_tests for inter-bb range optimization.
2128    In that case if oe->op is NULL, oe->id is bb->index whose
2129    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2130    the actual opcode.  */
2131
2132 static void
2133 optimize_range_tests (enum tree_code opcode,
2134                       vec<operand_entry_t> *ops)
2135 {
2136   unsigned int length = ops->length (), i, j, first;
2137   operand_entry_t oe;
2138   struct range_entry *ranges;
2139   bool any_changes = false;
2140
2141   if (length == 1)
2142     return;
2143
2144   ranges = XNEWVEC (struct range_entry, length);
2145   for (i = 0; i < length; i++)
2146     {
2147       oe = (*ops)[i];
2148       ranges[i].idx = i;
2149       init_range_entry (ranges + i, oe->op,
2150                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2151       /* For | invert it now, we will invert it again before emitting
2152          the optimized expression.  */
2153       if (opcode == BIT_IOR_EXPR
2154           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2155         ranges[i].in_p = !ranges[i].in_p;
2156     }
2157
2158   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2159   for (i = 0; i < length; i++)
2160     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2161       break;
2162
2163   /* Try to merge ranges.  */
2164   for (first = i; i < length; i++)
2165     {
2166       tree low = ranges[i].low;
2167       tree high = ranges[i].high;
2168       int in_p = ranges[i].in_p;
2169       bool strict_overflow_p = ranges[i].strict_overflow_p;
2170       int update_fail_count = 0;
2171
2172       for (j = i + 1; j < length; j++)
2173         {
2174           if (ranges[i].exp != ranges[j].exp)
2175             break;
2176           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2177                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2178             break;
2179           strict_overflow_p |= ranges[j].strict_overflow_p;
2180         }
2181
2182       if (j == i + 1)
2183         continue;
2184
2185       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2186                              ops, ranges[i].exp, in_p, low, high,
2187                              strict_overflow_p))
2188         {
2189           i = j - 1;
2190           any_changes = true;
2191         }
2192       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2193          while update_range_test would fail.  */
2194       else if (update_fail_count == 64)
2195         i = j - 1;
2196       else
2197         ++update_fail_count;
2198     }
2199
2200   /* Optimize X == CST1 || X == CST2
2201      if popcount (CST1 ^ CST2) == 1 into
2202      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2203      Similarly for ranges.  E.g.
2204      X != 2 && X != 3 && X != 10 && X != 11
2205      will be transformed by the above loop into
2206      (X - 2U) <= 1U && (X - 10U) <= 1U
2207      and this loop can transform that into
2208      ((X & ~8) - 2U) <= 1U.  */
2209   for (i = first; i < length; i++)
2210     {
2211       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2212
2213       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2214         continue;
2215       type = TREE_TYPE (ranges[i].exp);
2216       if (!INTEGRAL_TYPE_P (type))
2217         continue;
2218       lowi = ranges[i].low;
2219       if (lowi == NULL_TREE)
2220         lowi = TYPE_MIN_VALUE (type);
2221       highi = ranges[i].high;
2222       if (highi == NULL_TREE)
2223         continue;
2224       for (j = i + 1; j < length && j < i + 64; j++)
2225         {
2226           if (ranges[j].exp == NULL_TREE)
2227             continue;
2228           if (ranges[i].exp != ranges[j].exp)
2229             break;
2230           if (ranges[j].in_p)
2231             continue;
2232           lowj = ranges[j].low;
2233           if (lowj == NULL_TREE)
2234             continue;
2235           highj = ranges[j].high;
2236           if (highj == NULL_TREE)
2237             highj = TYPE_MAX_VALUE (type);
2238           tem = fold_binary (GT_EXPR, boolean_type_node,
2239                              lowj, highi);
2240           if (tem == NULL_TREE || !integer_onep (tem))
2241             continue;
2242           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2243           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2244             continue;
2245           gcc_checking_assert (!integer_zerop (lowxor));
2246           tem = fold_binary (MINUS_EXPR, type, lowxor,
2247                              build_int_cst (type, 1));
2248           if (tem == NULL_TREE)
2249             continue;
2250           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2251           if (tem == NULL_TREE || !integer_zerop (tem))
2252             continue;
2253           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2254           if (!tree_int_cst_equal (lowxor, highxor))
2255             continue;
2256           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2257           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2258           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2259           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2260           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2261                                  ranges[i].in_p, lowj, highj,
2262                                  ranges[i].strict_overflow_p
2263                                  || ranges[j].strict_overflow_p))
2264             {
2265               any_changes = true;
2266               break;
2267             }
2268         }
2269     }
2270
2271   if (any_changes && opcode != ERROR_MARK)
2272     {
2273       j = 0;
2274       FOR_EACH_VEC_ELT (*ops, i, oe)
2275         {
2276           if (oe->op == error_mark_node)
2277             continue;
2278           else if (i != j)
2279             (*ops)[j] = oe;
2280           j++;
2281         }
2282       ops->truncate (j);
2283     }
2284
2285   XDELETEVEC (ranges);
2286 }
2287
2288 /* Return true if STMT is a cast like:
2289    <bb N>:
2290    ...
2291    _123 = (int) _234;
2292
2293    <bb M>:
2294    # _345 = PHI <_123(N), 1(...), 1(...)>
2295    where _234 has bool type, _123 has single use and
2296    bb N has a single successor M.  This is commonly used in
2297    the last block of a range test.  */
2298
2299 static bool
2300 final_range_test_p (gimple stmt)
2301 {
2302   basic_block bb, rhs_bb;
2303   edge e;
2304   tree lhs, rhs;
2305   use_operand_p use_p;
2306   gimple use_stmt;
2307
2308   if (!gimple_assign_cast_p (stmt))
2309     return false;
2310   bb = gimple_bb (stmt);
2311   if (!single_succ_p (bb))
2312     return false;
2313   e = single_succ_edge (bb);
2314   if (e->flags & EDGE_COMPLEX)
2315     return false;
2316
2317   lhs = gimple_assign_lhs (stmt);
2318   rhs = gimple_assign_rhs1 (stmt);
2319   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2320       || TREE_CODE (rhs) != SSA_NAME
2321       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2322     return false;
2323
2324   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2325   if (!single_imm_use (lhs, &use_p, &use_stmt))
2326     return false;
2327
2328   if (gimple_code (use_stmt) != GIMPLE_PHI
2329       || gimple_bb (use_stmt) != e->dest)
2330     return false;
2331
2332   /* And that the rhs is defined in the same loop.  */
2333   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2334   if (rhs_bb == NULL
2335       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2336     return false;
2337
2338   return true;
2339 }
2340
2341 /* Return true if BB is suitable basic block for inter-bb range test
2342    optimization.  If BACKWARD is true, BB should be the only predecessor
2343    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2344    or compared with to find a common basic block to which all conditions
2345    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2346    be the only predecessor of BB.  */
2347
2348 static bool
2349 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2350                   bool backward)
2351 {
2352   edge_iterator ei, ei2;
2353   edge e, e2;
2354   gimple stmt;
2355   gimple_stmt_iterator gsi;
2356   bool other_edge_seen = false;
2357   bool is_cond;
2358
2359   if (test_bb == bb)
2360     return false;
2361   /* Check last stmt first.  */
2362   stmt = last_stmt (bb);
2363   if (stmt == NULL
2364       || (gimple_code (stmt) != GIMPLE_COND
2365           && (backward || !final_range_test_p (stmt)))
2366       || gimple_visited_p (stmt)
2367       || stmt_could_throw_p (stmt)
2368       || *other_bb == bb)
2369     return false;
2370   is_cond = gimple_code (stmt) == GIMPLE_COND;
2371   if (is_cond)
2372     {
2373       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2374          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2375          to *OTHER_BB (if not set yet, try to find it out).  */
2376       if (EDGE_COUNT (bb->succs) != 2)
2377         return false;
2378       FOR_EACH_EDGE (e, ei, bb->succs)
2379         {
2380           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2381             return false;
2382           if (e->dest == test_bb)
2383             {
2384               if (backward)
2385                 continue;
2386               else
2387                 return false;
2388             }
2389           if (e->dest == bb)
2390             return false;
2391           if (*other_bb == NULL)
2392             {
2393               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2394                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2395                   return false;
2396                 else if (e->dest == e2->dest)
2397                   *other_bb = e->dest;
2398               if (*other_bb == NULL)
2399                 return false;
2400             }
2401           if (e->dest == *other_bb)
2402             other_edge_seen = true;
2403           else if (backward)
2404             return false;
2405         }
2406       if (*other_bb == NULL || !other_edge_seen)
2407         return false;
2408     }
2409   else if (single_succ (bb) != *other_bb)
2410     return false;
2411
2412   /* Now check all PHIs of *OTHER_BB.  */
2413   e = find_edge (bb, *other_bb);
2414   e2 = find_edge (test_bb, *other_bb);
2415   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2416     {
2417       gimple phi = gsi_stmt (gsi);
2418       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2419          corresponding to BB and TEST_BB predecessor must be the same.  */
2420       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2421                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2422         {
2423           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2424              one of the PHIs should have the lhs of the last stmt in
2425              that block as PHI arg and that PHI should have 0 or 1
2426              corresponding to it in all other range test basic blocks
2427              considered.  */
2428           if (!is_cond)
2429             {
2430               if (gimple_phi_arg_def (phi, e->dest_idx)
2431                   == gimple_assign_lhs (stmt)
2432                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2433                       || integer_onep (gimple_phi_arg_def (phi,
2434                                                            e2->dest_idx))))
2435                 continue;
2436             }
2437           else
2438             {
2439               gimple test_last = last_stmt (test_bb);
2440               if (gimple_code (test_last) != GIMPLE_COND
2441                   && gimple_phi_arg_def (phi, e2->dest_idx)
2442                      == gimple_assign_lhs (test_last)
2443                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2444                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2445                 continue;
2446             }
2447
2448           return false;
2449         }
2450     }
2451   return true;
2452 }
2453
2454 /* Return true if BB doesn't have side-effects that would disallow
2455    range test optimization, all SSA_NAMEs set in the bb are consumed
2456    in the bb and there are no PHIs.  */
2457
2458 static bool
2459 no_side_effect_bb (basic_block bb)
2460 {
2461   gimple_stmt_iterator gsi;
2462   gimple last;
2463
2464   if (!gimple_seq_empty_p (phi_nodes (bb)))
2465     return false;
2466   last = last_stmt (bb);
2467   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2468     {
2469       gimple stmt = gsi_stmt (gsi);
2470       tree lhs;
2471       imm_use_iterator imm_iter;
2472       use_operand_p use_p;
2473
2474       if (is_gimple_debug (stmt))
2475         continue;
2476       if (gimple_has_side_effects (stmt))
2477         return false;
2478       if (stmt == last)
2479         return true;
2480       if (!is_gimple_assign (stmt))
2481         return false;
2482       lhs = gimple_assign_lhs (stmt);
2483       if (TREE_CODE (lhs) != SSA_NAME)
2484         return false;
2485       if (gimple_assign_rhs_could_trap_p (stmt))
2486         return false;
2487       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2488         {
2489           gimple use_stmt = USE_STMT (use_p);
2490           if (is_gimple_debug (use_stmt))
2491             continue;
2492           if (gimple_bb (use_stmt) != bb)
2493             return false;
2494         }
2495     }
2496   return false;
2497 }
2498
2499 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2500    return true and fill in *OPS recursively.  */
2501
2502 static bool
2503 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2504          struct loop *loop)
2505 {
2506   gimple stmt = SSA_NAME_DEF_STMT (var);
2507   tree rhs[2];
2508   int i;
2509
2510   if (!is_reassociable_op (stmt, code, loop))
2511     return false;
2512
2513   rhs[0] = gimple_assign_rhs1 (stmt);
2514   rhs[1] = gimple_assign_rhs2 (stmt);
2515   gimple_set_visited (stmt, true);
2516   for (i = 0; i < 2; i++)
2517     if (TREE_CODE (rhs[i]) == SSA_NAME
2518         && !get_ops (rhs[i], code, ops, loop)
2519         && has_single_use (rhs[i]))
2520       {
2521         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2522
2523         oe->op = rhs[i];
2524         oe->rank = code;
2525         oe->id = 0;
2526         oe->count = 1;
2527         ops->safe_push (oe);
2528       }
2529   return true;
2530 }
2531
2532 /* Inter-bb range test optimization.  */
2533
2534 static void
2535 maybe_optimize_range_tests (gimple stmt)
2536 {
2537   basic_block first_bb = gimple_bb (stmt);
2538   basic_block last_bb = first_bb;
2539   basic_block other_bb = NULL;
2540   basic_block bb;
2541   edge_iterator ei;
2542   edge e;
2543   vec<operand_entry_t> ops = vNULL;
2544
2545   /* Consider only basic blocks that end with GIMPLE_COND or
2546      a cast statement satisfying final_range_test_p.  All
2547      but the last bb in the first_bb .. last_bb range
2548      should end with GIMPLE_COND.  */
2549   if (gimple_code (stmt) == GIMPLE_COND)
2550     {
2551       if (EDGE_COUNT (first_bb->succs) != 2)
2552         return;
2553     }
2554   else if (final_range_test_p (stmt))
2555     other_bb = single_succ (first_bb);
2556   else
2557     return;
2558
2559   if (stmt_could_throw_p (stmt))
2560     return;
2561
2562   /* As relative ordering of post-dominator sons isn't fixed,
2563      maybe_optimize_range_tests can be called first on any
2564      bb in the range we want to optimize.  So, start searching
2565      backwards, if first_bb can be set to a predecessor.  */
2566   while (single_pred_p (first_bb))
2567     {
2568       basic_block pred_bb = single_pred (first_bb);
2569       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2570         break;
2571       if (!no_side_effect_bb (first_bb))
2572         break;
2573       first_bb = pred_bb;
2574     }
2575   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2576      Before starting forward search in last_bb successors, find
2577      out the other_bb.  */
2578   if (first_bb == last_bb)
2579     {
2580       other_bb = NULL;
2581       /* As non-GIMPLE_COND last stmt always terminates the range,
2582          if forward search didn't discover anything, just give up.  */
2583       if (gimple_code (stmt) != GIMPLE_COND)
2584         return;
2585       /* Look at both successors.  Either it ends with a GIMPLE_COND
2586          and satisfies suitable_cond_bb, or ends with a cast and
2587          other_bb is that cast's successor.  */
2588       FOR_EACH_EDGE (e, ei, first_bb->succs)
2589         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2590             || e->dest == first_bb)
2591           return;
2592         else if (single_pred_p (e->dest))
2593           {
2594             stmt = last_stmt (e->dest);
2595             if (stmt
2596                 && gimple_code (stmt) == GIMPLE_COND
2597                 && EDGE_COUNT (e->dest->succs) == 2)
2598               {
2599                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2600                   break;
2601                 else
2602                   other_bb = NULL;
2603               }
2604             else if (stmt
2605                      && final_range_test_p (stmt)
2606                      && find_edge (first_bb, single_succ (e->dest)))
2607               {
2608                 other_bb = single_succ (e->dest);
2609                 if (other_bb == first_bb)
2610                   other_bb = NULL;
2611               }
2612           }
2613       if (other_bb == NULL)
2614         return;
2615     }
2616   /* Now do the forward search, moving last_bb to successor bbs
2617      that aren't other_bb.  */
2618   while (EDGE_COUNT (last_bb->succs) == 2)
2619     {
2620       FOR_EACH_EDGE (e, ei, last_bb->succs)
2621         if (e->dest != other_bb)
2622           break;
2623       if (e == NULL)
2624         break;
2625       if (!single_pred_p (e->dest))
2626         break;
2627       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2628         break;
2629       if (!no_side_effect_bb (e->dest))
2630         break;
2631       last_bb = e->dest;
2632     }
2633   if (first_bb == last_bb)
2634     return;
2635   /* Here basic blocks first_bb through last_bb's predecessor
2636      end with GIMPLE_COND, all of them have one of the edges to
2637      other_bb and another to another block in the range,
2638      all blocks except first_bb don't have side-effects and
2639      last_bb ends with either GIMPLE_COND, or cast satisfying
2640      final_range_test_p.  */
2641   for (bb = last_bb; ; bb = single_pred (bb))
2642     {
2643       enum tree_code code;
2644       tree lhs, rhs;
2645
2646       e = find_edge (bb, other_bb);
2647       stmt = last_stmt (bb);
2648       gimple_set_visited (stmt, true);
2649       if (gimple_code (stmt) != GIMPLE_COND)
2650         {
2651           use_operand_p use_p;
2652           gimple phi;
2653           edge e2;
2654           unsigned int d;
2655
2656           lhs = gimple_assign_lhs (stmt);
2657           rhs = gimple_assign_rhs1 (stmt);
2658           gcc_assert (bb == last_bb);
2659
2660           /* stmt is
2661              _123 = (int) _234;
2662
2663              followed by:
2664              <bb M>:
2665              # _345 = PHI <_123(N), 1(...), 1(...)>
2666
2667              or 0 instead of 1.  If it is 0, the _234
2668              range test is anded together with all the
2669              other range tests, if it is 1, it is ored with
2670              them.  */
2671           single_imm_use (lhs, &use_p, &phi);
2672           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2673           e2 = find_edge (first_bb, other_bb);
2674           d = e2->dest_idx;
2675           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2676           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2677             code = BIT_AND_EXPR;
2678           else
2679             {
2680               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2681               code = BIT_IOR_EXPR;
2682             }
2683
2684           /* If _234 SSA_NAME_DEF_STMT is
2685              _234 = _567 | _789;
2686              (or &, corresponding to 1/0 in the phi arguments,
2687              push into ops the individual range test arguments
2688              of the bitwise or resp. and, recursively.  */
2689           if (!get_ops (rhs, code, &ops,
2690                         loop_containing_stmt (stmt))
2691               && has_single_use (rhs))
2692             {
2693               /* Otherwise, push the _234 range test itself.  */
2694               operand_entry_t oe
2695                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2696
2697               oe->op = rhs;
2698               oe->rank = code;
2699               oe->id = 0;
2700               oe->count = 1;
2701               ops.safe_push (oe);
2702             }
2703           continue;
2704         }
2705       /* Otherwise stmt is GIMPLE_COND.  */
2706       code = gimple_cond_code (stmt);
2707       lhs = gimple_cond_lhs (stmt);
2708       rhs = gimple_cond_rhs (stmt);
2709       if (TREE_CODE (lhs) == SSA_NAME
2710           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2711           && ((code != EQ_EXPR && code != NE_EXPR)
2712               || rhs != boolean_false_node
2713                  /* Either push into ops the individual bitwise
2714                     or resp. and operands, depending on which
2715                     edge is other_bb.  */
2716               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2717                                  ^ (code == EQ_EXPR))
2718                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2719                            loop_containing_stmt (stmt))))
2720         {
2721           /* Or push the GIMPLE_COND stmt itself.  */
2722           operand_entry_t oe
2723             = (operand_entry_t) pool_alloc (operand_entry_pool);
2724
2725           oe->op = NULL;
2726           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2727                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2728           /* oe->op = NULL signs that there is no SSA_NAME
2729              for the range test, and oe->id instead is the
2730              basic block number, at which's end the GIMPLE_COND
2731              is.  */
2732           oe->id = bb->index;
2733           oe->count = 1;
2734           ops.safe_push (oe);
2735         }
2736       if (bb == first_bb)
2737         break;
2738     }
2739   if (ops.length () > 1)
2740     optimize_range_tests (ERROR_MARK, &ops);
2741   ops.release ();
2742 }
2743
2744 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2745    of STMT in it's operands.  This is also known as a "destructive
2746    update" operation.  */
2747
2748 static bool
2749 is_phi_for_stmt (gimple stmt, tree operand)
2750 {
2751   gimple def_stmt;
2752   tree lhs;
2753   use_operand_p arg_p;
2754   ssa_op_iter i;
2755
2756   if (TREE_CODE (operand) != SSA_NAME)
2757     return false;
2758
2759   lhs = gimple_assign_lhs (stmt);
2760
2761   def_stmt = SSA_NAME_DEF_STMT (operand);
2762   if (gimple_code (def_stmt) != GIMPLE_PHI)
2763     return false;
2764
2765   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2766     if (lhs == USE_FROM_PTR (arg_p))
2767       return true;
2768   return false;
2769 }
2770
2771 /* Remove def stmt of VAR if VAR has zero uses and recurse
2772    on rhs1 operand if so.  */
2773
2774 static void
2775 remove_visited_stmt_chain (tree var)
2776 {
2777   gimple stmt;
2778   gimple_stmt_iterator gsi;
2779
2780   while (1)
2781     {
2782       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2783         return;
2784       stmt = SSA_NAME_DEF_STMT (var);
2785       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2786         {
2787           var = gimple_assign_rhs1 (stmt);
2788           gsi = gsi_for_stmt (stmt);
2789           gsi_remove (&gsi, true);
2790           release_defs (stmt);
2791         }
2792       else
2793         return;
2794     }
2795 }
2796
2797 /* This function checks three consequtive operands in
2798    passed operands vector OPS starting from OPINDEX and
2799    swaps two operands if it is profitable for binary operation
2800    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2801
2802    We pair ops with the same rank if possible.
2803
2804    The alternative we try is to see if STMT is a destructive
2805    update style statement, which is like:
2806    b = phi (a, ...)
2807    a = c + b;
2808    In that case, we want to use the destructive update form to
2809    expose the possible vectorizer sum reduction opportunity.
2810    In that case, the third operand will be the phi node. This
2811    check is not performed if STMT is null.
2812
2813    We could, of course, try to be better as noted above, and do a
2814    lot of work to try to find these opportunities in >3 operand
2815    cases, but it is unlikely to be worth it.  */
2816
2817 static void
2818 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2819                           unsigned int opindex, gimple stmt)
2820 {
2821   operand_entry_t oe1, oe2, oe3;
2822
2823   oe1 = ops[opindex];
2824   oe2 = ops[opindex + 1];
2825   oe3 = ops[opindex + 2];
2826
2827   if ((oe1->rank == oe2->rank
2828        && oe2->rank != oe3->rank)
2829       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2830           && !is_phi_for_stmt (stmt, oe1->op)
2831           && !is_phi_for_stmt (stmt, oe2->op)))
2832     {
2833       struct operand_entry temp = *oe3;
2834       oe3->op = oe1->op;
2835       oe3->rank = oe1->rank;
2836       oe1->op = temp.op;
2837       oe1->rank= temp.rank;
2838     }
2839   else if ((oe1->rank == oe3->rank
2840             && oe2->rank != oe3->rank)
2841            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2842                && !is_phi_for_stmt (stmt, oe1->op)
2843                && !is_phi_for_stmt (stmt, oe3->op)))
2844     {
2845       struct operand_entry temp = *oe2;
2846       oe2->op = oe1->op;
2847       oe2->rank = oe1->rank;
2848       oe1->op = temp.op;
2849       oe1->rank= temp.rank;
2850     }
2851 }
2852
2853 /* Determine if stmt A is not dominated by stmt B. If A and B are in
2854    same basic block, then A's UID has to be less than B. If they are
2855    in different BB's, then A's BB must not be dominated by B's BB.  */
2856
2857 static inline bool
2858 not_dominated_by (gimple a, gimple b)
2859 {
2860   basic_block bb_a, bb_b;
2861   bb_a = gimple_bb (a);
2862   bb_b = gimple_bb (b);
2863   return ((bb_a == bb_b && gimple_uid (a)  < gimple_uid (b))
2864           || (bb_a != bb_b
2865               && !dominated_by_p (CDI_DOMINATORS, bb_a, bb_b)));
2866
2867 }
2868
2869 /* Among STMT1 and STMT2, return the statement that appears later. Both
2870    statements are in same BB and have the same UID.  */
2871
2872 static gimple
2873 appears_later_in_bb (gimple stmt1, gimple stmt2)
2874 {
2875   unsigned uid = gimple_uid (stmt1);
2876   gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
2877   gsi_next (&gsi);
2878   if (gsi_end_p (gsi))
2879     return stmt1;
2880   for (; !gsi_end_p (gsi); gsi_next (&gsi))
2881     {
2882       gimple stmt = gsi_stmt (gsi);
2883
2884       /* If STMT has a different UID than STMT1 and we haven't seen
2885          STMT2 during traversal, we know STMT1 appears later.  */
2886       if (gimple_uid (stmt) != uid)
2887         return stmt1;
2888       else if (stmt == stmt2)
2889         return stmt2;
2890     }
2891   return stmt1;
2892 }
2893
2894 /* Find the statement after which STMT must be moved so that the
2895    dependency from DEP_STMT to STMT is maintained.  */
2896
2897 static gimple
2898 find_insert_point (gimple stmt, gimple dep_stmt)
2899 {
2900   gimple insert_stmt = stmt;
2901   if (dep_stmt == NULL)
2902     return stmt;
2903   if (gimple_uid (insert_stmt) == gimple_uid (dep_stmt)
2904       && gimple_bb (insert_stmt) == gimple_bb (dep_stmt)
2905       && insert_stmt != dep_stmt)
2906     insert_stmt = appears_later_in_bb (insert_stmt, dep_stmt);
2907   else if (not_dominated_by (insert_stmt, dep_stmt))
2908     insert_stmt = dep_stmt;
2909   return insert_stmt;
2910 }
2911
2912 /* Insert STMT after INSERT_POINT.  */
2913
2914 static void
2915 insert_stmt_after (gimple stmt, gimple insert_point)
2916 {
2917   imm_use_iterator iter;
2918   tree lhs;
2919   gimple use_stmt;
2920   gimple_stmt_iterator gsistmt = gsi_for_stmt (stmt), gsi_insert;
2921   basic_block insert_bb = gimple_bb (insert_point);
2922   bool insert_bb_different = (insert_bb != gimple_bb (stmt));
2923   lhs = gimple_assign_lhs (stmt);
2924   /* If there are any debug uses of LHS, reset them.  */
2925   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2926     {
2927       if (is_gimple_debug (use_stmt)
2928           && not_dominated_by (use_stmt, insert_point))
2929         {
2930           gimple_debug_bind_reset_value (use_stmt);
2931           update_stmt (use_stmt);
2932         }
2933     }
2934   /* If INSERT_STMT is a phi node, then do not insert just after that statement.
2935      Instead, find the first non-label gimple statement in BB and insert before
2936      that.  */
2937   if (gimple_code (insert_point) == GIMPLE_PHI)
2938     {
2939       gsi_insert = gsi_after_labels (insert_bb);
2940       gsi_move_before (&gsistmt, &gsi_insert);
2941     }
2942   /* Statements marked for throw can not be in the middle of a basic block. So
2943      we can not insert a statement (not marked for throw) immediately after.  */
2944   else if (stmt_ends_bb_p (insert_point))
2945     {
2946       edge succ_edge = find_fallthru_edge (insert_bb->succs);
2947       insert_bb = succ_edge->dest;
2948       insert_bb_different = (insert_bb != gimple_bb (stmt));
2949       /* Insert STMT at the beginning of the successor basic block.  */
2950       gsi_insert = gsi_after_labels (insert_bb);
2951       gsi_move_before (&gsistmt, &gsi_insert);
2952     }
2953   else
2954     {
2955       gsi_insert = gsi_for_stmt (insert_point);
2956       gsi_move_after (&gsistmt, &gsi_insert);
2957     }
2958   /* Set the UID of STMT to that of INSERT_POINT so that subsequent comparisons
2959      of UIDs to determine dominance within a basic block works.  */
2960   gimple_set_uid (stmt, gimple_uid (insert_point));
2961   if (dump_file && (dump_flags & TDF_DETAILS))
2962     {
2963       fprintf (dump_file, "Moved stmt ");
2964       print_gimple_stmt (dump_file, stmt, 0, 0);
2965       fprintf (dump_file, " %s to satisfy dependences\n",
2966                insert_bb_different ? "to a different BB" : "within same BB");
2967     }
2968
2969 }
2970
2971 /* If OP is a SSA variable and is not the default definition, return the
2972    gimple statement that defines OP. Else return NULL.  */
2973
2974 static inline gimple
2975 get_def_stmt (tree op)
2976 {
2977   if (TREE_CODE (op) == SSA_NAME
2978       && !SSA_NAME_IS_DEFAULT_DEF (op))
2979     return SSA_NAME_DEF_STMT (op);
2980   else
2981     return NULL;
2982 }
2983
2984 /* Ensure that operands in the OPS vector are available for STMT and all
2985    gimple statements on which STMT depends.  */
2986
2987 static void
2988 ensure_ops_are_available (gimple stmt, vec<operand_entry_t> ops, int opindex)
2989 {
2990   unsigned int len = ops.length ();
2991   gimple insert_stmt = stmt;
2992   gimple dep_stmts[2];
2993   dep_stmts[0] = get_def_stmt (ops[opindex]->op);
2994   if (len - opindex == 2)
2995     {
2996       dep_stmts[1] = get_def_stmt (ops[opindex + 1]->op);
2997     }
2998   else
2999     {
3000       gimple stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3001       ensure_ops_are_available (stmt1, ops, opindex + 1);
3002       dep_stmts[1] = stmt1;
3003     }
3004   for (int i = 0; i < 2; i++)
3005     insert_stmt = find_insert_point (insert_stmt, dep_stmts[i]);
3006
3007   if (insert_stmt != stmt)
3008     insert_stmt_after (stmt, insert_stmt);
3009 }
3010
3011 /* Recursively rewrite our linearized statements so that the operators
3012    match those in OPS[OPINDEX], putting the computation in rank
3013    order.  */
3014
3015 static void
3016 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3017                    vec<operand_entry_t> ops, bool moved)
3018 {
3019   tree rhs1 = gimple_assign_rhs1 (stmt);
3020   tree rhs2 = gimple_assign_rhs2 (stmt);
3021   operand_entry_t oe;
3022
3023   /* The final recursion case for this function is that you have
3024      exactly two operations left.
3025      If we had one exactly one op in the entire list to start with, we
3026      would have never called this function, and the tail recursion
3027      rewrites them one at a time.  */
3028   if (opindex + 2 == ops.length ())
3029     {
3030       operand_entry_t oe1, oe2;
3031
3032       oe1 = ops[opindex];
3033       oe2 = ops[opindex + 1];
3034
3035       if (rhs1 != oe1->op || rhs2 != oe2->op)
3036         {
3037           if (dump_file && (dump_flags & TDF_DETAILS))
3038             {
3039               fprintf (dump_file, "Transforming ");
3040               print_gimple_stmt (dump_file, stmt, 0, 0);
3041             }
3042
3043           gimple_assign_set_rhs1 (stmt, oe1->op);
3044           gimple_assign_set_rhs2 (stmt, oe2->op);
3045           update_stmt (stmt);
3046           if (rhs1 != oe1->op && rhs1 != oe2->op)
3047             remove_visited_stmt_chain (rhs1);
3048
3049           if (dump_file && (dump_flags & TDF_DETAILS))
3050             {
3051               fprintf (dump_file, " into ");
3052               print_gimple_stmt (dump_file, stmt, 0, 0);
3053             }
3054         }
3055       return;
3056     }
3057
3058   /* If we hit here, we should have 3 or more ops left.  */
3059   gcc_assert (opindex + 2 < ops.length ());
3060
3061   /* Rewrite the next operator.  */
3062   oe = ops[opindex];
3063
3064   if (oe->op != rhs2)
3065     {
3066       if (!moved)
3067         {
3068           ensure_ops_are_available (stmt, ops, opindex);
3069           moved = true;
3070         }
3071
3072       if (dump_file && (dump_flags & TDF_DETAILS))
3073         {
3074           fprintf (dump_file, "Transforming ");
3075           print_gimple_stmt (dump_file, stmt, 0, 0);
3076         }
3077
3078       gimple_assign_set_rhs2 (stmt, oe->op);
3079       update_stmt (stmt);
3080
3081       if (dump_file && (dump_flags & TDF_DETAILS))
3082         {
3083           fprintf (dump_file, " into ");
3084           print_gimple_stmt (dump_file, stmt, 0, 0);
3085         }
3086     }
3087   /* Recurse on the LHS of the binary operator, which is guaranteed to
3088      be the non-leaf side.  */
3089   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
3090 }
3091
3092 /* Find out how many cycles we need to compute statements chain.
3093    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3094    maximum number of independent statements we may execute per cycle.  */
3095
3096 static int
3097 get_required_cycles (int ops_num, int cpu_width)
3098 {
3099   int res;
3100   int elog;
3101   unsigned int rest;
3102
3103   /* While we have more than 2 * cpu_width operands
3104      we may reduce number of operands by cpu_width
3105      per cycle.  */
3106   res = ops_num / (2 * cpu_width);
3107
3108   /* Remained operands count may be reduced twice per cycle
3109      until we have only one operand.  */
3110   rest = (unsigned)(ops_num - res * cpu_width);
3111   elog = exact_log2 (rest);
3112   if (elog >= 0)
3113     res += elog;
3114   else
3115     res += floor_log2 (rest) + 1;
3116
3117   return res;
3118 }
3119
3120 /* Returns an optimal number of registers to use for computation of
3121    given statements.  */
3122
3123 static int
3124 get_reassociation_width (int ops_num, enum tree_code opc,
3125                          enum machine_mode mode)
3126 {
3127   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3128   int width;
3129   int width_min;
3130   int cycles_best;
3131
3132   if (param_width > 0)
3133     width = param_width;
3134   else
3135     width = targetm.sched.reassociation_width (opc, mode);
3136
3137   if (width == 1)
3138     return width;
3139
3140   /* Get the minimal time required for sequence computation.  */
3141   cycles_best = get_required_cycles (ops_num, width);
3142
3143   /* Check if we may use less width and still compute sequence for
3144      the same time.  It will allow us to reduce registers usage.
3145      get_required_cycles is monotonically increasing with lower width
3146      so we can perform a binary search for the minimal width that still
3147      results in the optimal cycle count.  */
3148   width_min = 1;
3149   while (width > width_min)
3150     {
3151       int width_mid = (width + width_min) / 2;
3152
3153       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3154         width = width_mid;
3155       else if (width_min < width_mid)
3156         width_min = width_mid;
3157       else
3158         break;
3159     }
3160
3161   return width;
3162 }
3163
3164 /* Recursively rewrite our linearized statements so that the operators
3165    match those in OPS[OPINDEX], putting the computation in rank
3166    order and trying to allow operations to be executed in
3167    parallel.  */
3168
3169 static void
3170 rewrite_expr_tree_parallel (gimple stmt, int width,
3171                             vec<operand_entry_t>  ops)
3172 {
3173   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3174   int op_num = ops.length ();
3175   int stmt_num = op_num - 1;
3176   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3177   int op_index = op_num - 1;
3178   int stmt_index = 0;
3179   int ready_stmts_end = 0;
3180   int i = 0;
3181   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3182
3183   /* We start expression rewriting from the top statements.
3184      So, in this loop we create a full list of statements
3185      we will work with.  */
3186   stmts[stmt_num - 1] = stmt;
3187   for (i = stmt_num - 2; i >= 0; i--)
3188     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3189
3190   for (i = 0; i < stmt_num; i++)
3191     {
3192       tree op1, op2;
3193
3194       /* Determine whether we should use results of
3195          already handled statements or not.  */
3196       if (ready_stmts_end == 0
3197           && (i - stmt_index >= width || op_index < 1))
3198         ready_stmts_end = i;
3199
3200       /* Now we choose operands for the next statement.  Non zero
3201          value in ready_stmts_end means here that we should use
3202          the result of already generated statements as new operand.  */
3203       if (ready_stmts_end > 0)
3204         {
3205           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3206           if (ready_stmts_end > stmt_index)
3207             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3208           else if (op_index >= 0)
3209             op2 = ops[op_index--]->op;
3210           else
3211             {
3212               gcc_assert (stmt_index < i);
3213               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3214             }
3215
3216           if (stmt_index >= ready_stmts_end)
3217             ready_stmts_end = 0;
3218         }
3219       else
3220         {
3221           if (op_index > 1)
3222             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3223           op2 = ops[op_index--]->op;
3224           op1 = ops[op_index--]->op;
3225         }
3226
3227       /* If we emit the last statement then we should put
3228          operands into the last statement.  It will also
3229          break the loop.  */
3230       if (op_index < 0 && stmt_index == i)
3231         i = stmt_num - 1;
3232
3233       if (dump_file && (dump_flags & TDF_DETAILS))
3234         {
3235           fprintf (dump_file, "Transforming ");
3236           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3237         }
3238
3239       /* We keep original statement only for the last one.  All
3240          others are recreated.  */
3241       if (i == stmt_num - 1)
3242         {
3243           gimple_assign_set_rhs1 (stmts[i], op1);
3244           gimple_assign_set_rhs2 (stmts[i], op2);
3245           update_stmt (stmts[i]);
3246         }
3247       else
3248         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3249
3250       if (dump_file && (dump_flags & TDF_DETAILS))
3251         {
3252           fprintf (dump_file, " into ");
3253           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3254         }
3255     }
3256
3257   remove_visited_stmt_chain (last_rhs1);
3258 }
3259
3260 /* Transform STMT, which is really (A +B) + (C + D) into the left
3261    linear form, ((A+B)+C)+D.
3262    Recurse on D if necessary.  */
3263
3264 static void
3265 linearize_expr (gimple stmt)
3266 {
3267   gimple_stmt_iterator gsinow, gsirhs;
3268   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3269   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3270   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3271   gimple newbinrhs = NULL;
3272   struct loop *loop = loop_containing_stmt (stmt);
3273
3274   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3275               && is_reassociable_op (binrhs, rhscode, loop));
3276
3277   gsinow = gsi_for_stmt (stmt);
3278   gsirhs = gsi_for_stmt (binrhs);
3279   gsi_move_before (&gsirhs, &gsinow);
3280   gimple_set_uid (binrhs, gimple_uid (stmt));
3281
3282   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3283   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3284   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3285
3286   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3287     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3288
3289   if (dump_file && (dump_flags & TDF_DETAILS))
3290     {
3291       fprintf (dump_file, "Linearized: ");
3292       print_gimple_stmt (dump_file, stmt, 0, 0);
3293     }
3294
3295   reassociate_stats.linearized++;
3296   update_stmt (binrhs);
3297   update_stmt (binlhs);
3298   update_stmt (stmt);
3299
3300   gimple_set_visited (stmt, true);
3301   gimple_set_visited (binlhs, true);
3302   gimple_set_visited (binrhs, true);
3303
3304   /* Tail recurse on the new rhs if it still needs reassociation.  */
3305   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3306     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3307            want to change the algorithm while converting to tuples.  */
3308     linearize_expr (stmt);
3309 }
3310
3311 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3312    it.  Otherwise, return NULL.  */
3313
3314 static gimple
3315 get_single_immediate_use (tree lhs)
3316 {
3317   use_operand_p immuse;
3318   gimple immusestmt;
3319
3320   if (TREE_CODE (lhs) == SSA_NAME
3321       && single_imm_use (lhs, &immuse, &immusestmt)
3322       && is_gimple_assign (immusestmt))
3323     return immusestmt;
3324
3325   return NULL;
3326 }
3327
3328 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3329    representing the negated value.  Insertions of any necessary
3330    instructions go before GSI.
3331    This function is recursive in that, if you hand it "a_5" as the
3332    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3333    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3334
3335 static tree
3336 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3337 {
3338   gimple negatedefstmt= NULL;
3339   tree resultofnegate;
3340
3341   /* If we are trying to negate a name, defined by an add, negate the
3342      add operands instead.  */
3343   if (TREE_CODE (tonegate) == SSA_NAME)
3344     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3345   if (TREE_CODE (tonegate) == SSA_NAME
3346       && is_gimple_assign (negatedefstmt)
3347       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3348       && has_single_use (gimple_assign_lhs (negatedefstmt))
3349       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3350     {
3351       gimple_stmt_iterator gsi;
3352       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3353       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3354
3355       gsi = gsi_for_stmt (negatedefstmt);
3356       rhs1 = negate_value (rhs1, &gsi);
3357       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3358
3359       gsi = gsi_for_stmt (negatedefstmt);
3360       rhs2 = negate_value (rhs2, &gsi);
3361       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3362
3363       update_stmt (negatedefstmt);
3364       return gimple_assign_lhs (negatedefstmt);
3365     }
3366
3367   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3368   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3369                                              NULL_TREE, true, GSI_SAME_STMT);
3370   return resultofnegate;
3371 }
3372
3373 /* Return true if we should break up the subtract in STMT into an add
3374    with negate.  This is true when we the subtract operands are really
3375    adds, or the subtract itself is used in an add expression.  In
3376    either case, breaking up the subtract into an add with negate
3377    exposes the adds to reassociation.  */
3378
3379 static bool
3380 should_break_up_subtract (gimple stmt)
3381 {
3382   tree lhs = gimple_assign_lhs (stmt);
3383   tree binlhs = gimple_assign_rhs1 (stmt);
3384   tree binrhs = gimple_assign_rhs2 (stmt);
3385   gimple immusestmt;
3386   struct loop *loop = loop_containing_stmt (stmt);
3387
3388   if (TREE_CODE (binlhs) == SSA_NAME
3389       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3390     return true;
3391
3392   if (TREE_CODE (binrhs) == SSA_NAME
3393       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3394     return true;
3395
3396   if (TREE_CODE (lhs) == SSA_NAME
3397       && (immusestmt = get_single_immediate_use (lhs))
3398       && is_gimple_assign (immusestmt)
3399       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3400           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3401     return true;
3402   return false;
3403 }
3404
3405 /* Transform STMT from A - B into A + -B.  */
3406
3407 static void
3408 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3409 {
3410   tree rhs1 = gimple_assign_rhs1 (stmt);
3411   tree rhs2 = gimple_assign_rhs2 (stmt);
3412
3413   if (dump_file && (dump_flags & TDF_DETAILS))
3414     {
3415       fprintf (dump_file, "Breaking up subtract ");
3416       print_gimple_stmt (dump_file, stmt, 0, 0);
3417     }
3418
3419   rhs2 = negate_value (rhs2, gsip);
3420   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3421   update_stmt (stmt);
3422 }
3423
3424 /* Determine whether STMT is a builtin call that raises an SSA name
3425    to an integer power and has only one use.  If so, and this is early
3426    reassociation and unsafe math optimizations are permitted, place
3427    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3428    If any of these conditions does not hold, return FALSE.  */
3429
3430 static bool
3431 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3432 {
3433   tree fndecl, arg1;
3434   REAL_VALUE_TYPE c, cint;
3435
3436   if (!first_pass_instance
3437       || !flag_unsafe_math_optimizations
3438       || !is_gimple_call (stmt)
3439       || !has_single_use (gimple_call_lhs (stmt)))
3440     return false;
3441
3442   fndecl = gimple_call_fndecl (stmt);
3443
3444   if (!fndecl
3445       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3446     return false;
3447
3448   switch (DECL_FUNCTION_CODE (fndecl))
3449     {
3450     CASE_FLT_FN (BUILT_IN_POW):
3451       *base = gimple_call_arg (stmt, 0);
3452       arg1 = gimple_call_arg (stmt, 1);
3453
3454       if (TREE_CODE (arg1) != REAL_CST)
3455         return false;
3456
3457       c = TREE_REAL_CST (arg1);
3458
3459       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3460         return false;
3461
3462       *exponent = real_to_integer (&c);
3463       real_from_integer (&cint, VOIDmode, *exponent,
3464                          *exponent < 0 ? -1 : 0, 0);
3465       if (!real_identical (&c, &cint))
3466         return false;
3467
3468       break;
3469
3470     CASE_FLT_FN (BUILT_IN_POWI):
3471       *base = gimple_call_arg (stmt, 0);
3472       arg1 = gimple_call_arg (stmt, 1);
3473
3474       if (!host_integerp (arg1, 0))
3475         return false;
3476
3477       *exponent = TREE_INT_CST_LOW (arg1);
3478       break;
3479
3480     default:
3481       return false;
3482     }
3483
3484   /* Expanding negative exponents is generally unproductive, so we don't
3485      complicate matters with those.  Exponents of zero and one should
3486      have been handled by expression folding.  */
3487   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3488     return false;
3489
3490   return true;
3491 }
3492
3493 /* Recursively linearize a binary expression that is the RHS of STMT.
3494    Place the operands of the expression tree in the vector named OPS.  */
3495
3496 static void
3497 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3498                      bool is_associative, bool set_visited)
3499 {
3500   tree binlhs = gimple_assign_rhs1 (stmt);
3501   tree binrhs = gimple_assign_rhs2 (stmt);
3502   gimple binlhsdef = NULL, binrhsdef = NULL;
3503   bool binlhsisreassoc = false;
3504   bool binrhsisreassoc = false;
3505   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3506   struct loop *loop = loop_containing_stmt (stmt);
3507   tree base = NULL_TREE;
3508   HOST_WIDE_INT exponent = 0;
3509
3510   if (set_visited)
3511     gimple_set_visited (stmt, true);
3512
3513   if (TREE_CODE (binlhs) == SSA_NAME)
3514     {
3515       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3516       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3517                          && !stmt_could_throw_p (binlhsdef));
3518     }
3519
3520   if (TREE_CODE (binrhs) == SSA_NAME)
3521     {
3522       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3523       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3524                          && !stmt_could_throw_p (binrhsdef));
3525     }
3526
3527   /* If the LHS is not reassociable, but the RHS is, we need to swap
3528      them.  If neither is reassociable, there is nothing we can do, so
3529      just put them in the ops vector.  If the LHS is reassociable,
3530      linearize it.  If both are reassociable, then linearize the RHS
3531      and the LHS.  */
3532
3533   if (!binlhsisreassoc)
3534     {
3535       tree temp;
3536
3537       /* If this is not a associative operation like division, give up.  */
3538       if (!is_associative)
3539         {
3540           add_to_ops_vec (ops, binrhs);
3541           return;
3542         }
3543
3544       if (!binrhsisreassoc)
3545         {
3546           if (rhscode == MULT_EXPR
3547               && TREE_CODE (binrhs) == SSA_NAME
3548               && acceptable_pow_call (binrhsdef, &base, &exponent))
3549             {
3550               add_repeat_to_ops_vec (ops, base, exponent);
3551               gimple_set_visited (binrhsdef, true);
3552             }
3553           else
3554             add_to_ops_vec (ops, binrhs);
3555
3556           if (rhscode == MULT_EXPR
3557               && TREE_CODE (binlhs) == SSA_NAME
3558               && acceptable_pow_call (binlhsdef, &base, &exponent))
3559             {
3560               add_repeat_to_ops_vec (ops, base, exponent);
3561               gimple_set_visited (binlhsdef, true);
3562             }
3563           else
3564             add_to_ops_vec (ops, binlhs);
3565
3566           return;
3567         }
3568
3569       if (dump_file && (dump_flags & TDF_DETAILS))
3570         {
3571           fprintf (dump_file, "swapping operands of ");
3572           print_gimple_stmt (dump_file, stmt, 0, 0);
3573         }
3574
3575       swap_tree_operands (stmt,
3576                           gimple_assign_rhs1_ptr (stmt),
3577                           gimple_assign_rhs2_ptr (stmt));
3578       update_stmt (stmt);
3579
3580       if (dump_file && (dump_flags & TDF_DETAILS))
3581         {
3582           fprintf (dump_file, " is now ");
3583           print_gimple_stmt (dump_file, stmt, 0, 0);
3584         }
3585
3586       /* We want to make it so the lhs is always the reassociative op,
3587          so swap.  */
3588       temp = binlhs;
3589       binlhs = binrhs;
3590       binrhs = temp;
3591     }
3592   else if (binrhsisreassoc)
3593     {
3594       linearize_expr (stmt);
3595       binlhs = gimple_assign_rhs1 (stmt);
3596       binrhs = gimple_assign_rhs2 (stmt);
3597     }
3598
3599   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3600               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3601                                       rhscode, loop));
3602   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3603                        is_associative, set_visited);
3604
3605   if (rhscode == MULT_EXPR
3606       && TREE_CODE (binrhs) == SSA_NAME
3607       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3608     {
3609       add_repeat_to_ops_vec (ops, base, exponent);
3610       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3611     }
3612   else
3613     add_to_ops_vec (ops, binrhs);
3614 }
3615
3616 /* Repropagate the negates back into subtracts, since no other pass
3617    currently does it.  */
3618
3619 static void
3620 repropagate_negates (void)
3621 {
3622   unsigned int i = 0;
3623   tree negate;
3624
3625   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3626     {
3627       gimple user = get_single_immediate_use (negate);
3628
3629       if (!user || !is_gimple_assign (user))
3630         continue;
3631
3632       /* The negate operand can be either operand of a PLUS_EXPR
3633          (it can be the LHS if the RHS is a constant for example).
3634
3635          Force the negate operand to the RHS of the PLUS_EXPR, then
3636          transform the PLUS_EXPR into a MINUS_EXPR.  */
3637       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3638         {
3639           /* If the negated operand appears on the LHS of the
3640              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3641              to force the negated operand to the RHS of the PLUS_EXPR.  */
3642           if (gimple_assign_rhs1 (user) == negate)
3643             {
3644               swap_tree_operands (user,
3645                                   gimple_assign_rhs1_ptr (user),
3646                                   gimple_assign_rhs2_ptr (user));
3647             }
3648
3649           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3650              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3651           if (gimple_assign_rhs2 (user) == negate)
3652             {
3653               tree rhs1 = gimple_assign_rhs1 (user);
3654               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3655               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3656               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3657               update_stmt (user);
3658             }
3659         }
3660       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3661         {
3662           if (gimple_assign_rhs1 (user) == negate)
3663             {
3664               /* We have
3665                    x = -a
3666                    y = x - b
3667                  which we transform into
3668                    x = a + b
3669                    y = -x .
3670                  This pushes down the negate which we possibly can merge
3671                  into some other operation, hence insert it into the
3672                  plus_negates vector.  */
3673               gimple feed = SSA_NAME_DEF_STMT (negate);
3674               tree a = gimple_assign_rhs1 (feed);
3675               tree rhs2 = gimple_assign_rhs2 (user);
3676               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3677               gimple_replace_lhs (feed, negate);
3678               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3679               update_stmt (gsi_stmt (gsi));
3680               gsi2 = gsi_for_stmt (user);
3681               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3682               update_stmt (gsi_stmt (gsi2));
3683               gsi_move_before (&gsi, &gsi2);
3684               plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3685             }
3686           else
3687             {
3688               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3689                  rid of one operation.  */
3690               gimple feed = SSA_NAME_DEF_STMT (negate);
3691               tree a = gimple_assign_rhs1 (feed);
3692               tree rhs1 = gimple_assign_rhs1 (user);
3693               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3694               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3695               update_stmt (gsi_stmt (gsi));
3696             }
3697         }
3698     }
3699 }
3700
3701 /* Returns true if OP is of a type for which we can do reassociation.
3702    That is for integral or non-saturating fixed-point types, and for
3703    floating point type when associative-math is enabled.  */
3704
3705 static bool
3706 can_reassociate_p (tree op)
3707 {
3708   tree type = TREE_TYPE (op);
3709   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3710       || NON_SAT_FIXED_POINT_TYPE_P (type)
3711       || (flag_associative_math && FLOAT_TYPE_P (type)))
3712     return true;
3713   return false;
3714 }
3715
3716 /* Break up subtract operations in block BB.
3717
3718    We do this top down because we don't know whether the subtract is
3719    part of a possible chain of reassociation except at the top.
3720
3721    IE given
3722    d = f + g
3723    c = a + e
3724    b = c - d
3725    q = b - r
3726    k = t - q
3727
3728    we want to break up k = t - q, but we won't until we've transformed q
3729    = b - r, which won't be broken up until we transform b = c - d.
3730
3731    En passant, clear the GIMPLE visited flag on every statement.  */
3732
3733 static void
3734 break_up_subtract_bb (basic_block bb)
3735 {
3736   gimple_stmt_iterator gsi;
3737   basic_block son;
3738
3739   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3740     {
3741       gimple stmt = gsi_stmt (gsi);
3742       gimple_set_visited (stmt, false);
3743
3744       if (!is_gimple_assign (stmt)
3745           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3746         continue;
3747
3748       /* Look for simple gimple subtract operations.  */
3749       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3750         {
3751           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3752               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3753             continue;
3754
3755           /* Check for a subtract used only in an addition.  If this
3756              is the case, transform it into add of a negate for better
3757              reassociation.  IE transform C = A-B into C = A + -B if C
3758              is only used in an addition.  */
3759           if (should_break_up_subtract (stmt))
3760             break_up_subtract (stmt, &gsi);
3761         }
3762       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3763                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3764         plus_negates.safe_push (gimple_assign_lhs (stmt));
3765     }
3766   for (son = first_dom_son (CDI_DOMINATORS, bb);
3767        son;
3768        son = next_dom_son (CDI_DOMINATORS, son))
3769     break_up_subtract_bb (son);
3770 }
3771
3772 /* Used for repeated factor analysis.  */
3773 struct repeat_factor_d
3774 {
3775   /* An SSA name that occurs in a multiply chain.  */
3776   tree factor;
3777
3778   /* Cached rank of the factor.  */
3779   unsigned rank;
3780
3781   /* Number of occurrences of the factor in the chain.  */
3782   HOST_WIDE_INT count;
3783
3784   /* An SSA name representing the product of this factor and
3785      all factors appearing later in the repeated factor vector.  */
3786   tree repr;
3787 };
3788
3789 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3790 typedef const struct repeat_factor_d *const_repeat_factor_t;
3791
3792
3793 static vec<repeat_factor> repeat_factor_vec;
3794
3795 /* Used for sorting the repeat factor vector.  Sort primarily by
3796    ascending occurrence count, secondarily by descending rank.  */
3797
3798 static int
3799 compare_repeat_factors (const void *x1, const void *x2)
3800 {
3801   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3802   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3803
3804   if (rf1->count != rf2->count)
3805     return rf1->count - rf2->count;
3806
3807   return rf2->rank - rf1->rank;
3808 }
3809
3810 /* Look for repeated operands in OPS in the multiply tree rooted at
3811    STMT.  Replace them with an optimal sequence of multiplies and powi
3812    builtin calls, and remove the used operands from OPS.  Return an
3813    SSA name representing the value of the replacement sequence.  */
3814
3815 static tree
3816 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3817 {
3818   unsigned i, j, vec_len;
3819   int ii;
3820   operand_entry_t oe;
3821   repeat_factor_t rf1, rf2;
3822   repeat_factor rfnew;
3823   tree result = NULL_TREE;
3824   tree target_ssa, iter_result;
3825   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3826   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3827   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3828   gimple mul_stmt, pow_stmt;
3829
3830   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3831      target.  */
3832   if (!powi_fndecl)
3833     return NULL_TREE;
3834
3835   /* Allocate the repeated factor vector.  */
3836   repeat_factor_vec.create (10);
3837
3838   /* Scan the OPS vector for all SSA names in the product and build
3839      up a vector of occurrence counts for each factor.  */
3840   FOR_EACH_VEC_ELT (*ops, i, oe)
3841     {
3842       if (TREE_CODE (oe->op) == SSA_NAME)
3843         {
3844           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3845             {
3846               if (rf1->factor == oe->op)
3847                 {
3848                   rf1->count += oe->count;
3849                   break;
3850                 }
3851             }
3852
3853           if (j >= repeat_factor_vec.length ())
3854             {
3855               rfnew.factor = oe->op;
3856               rfnew.rank = oe->rank;
3857               rfnew.count = oe->count;
3858               rfnew.repr = NULL_TREE;
3859               repeat_factor_vec.safe_push (rfnew);
3860             }
3861         }
3862     }
3863
3864   /* Sort the repeated factor vector by (a) increasing occurrence count,
3865      and (b) decreasing rank.  */
3866   repeat_factor_vec.qsort (compare_repeat_factors);
3867
3868   /* It is generally best to combine as many base factors as possible
3869      into a product before applying __builtin_powi to the result.
3870      However, the sort order chosen for the repeated factor vector
3871      allows us to cache partial results for the product of the base
3872      factors for subsequent use.  When we already have a cached partial
3873      result from a previous iteration, it is best to make use of it
3874      before looking for another __builtin_pow opportunity.
3875
3876      As an example, consider x * x * y * y * y * z * z * z * z.
3877      We want to first compose the product x * y * z, raise it to the
3878      second power, then multiply this by y * z, and finally multiply
3879      by z.  This can be done in 5 multiplies provided we cache y * z
3880      for use in both expressions:
3881
3882         t1 = y * z
3883         t2 = t1 * x
3884         t3 = t2 * t2
3885         t4 = t1 * t3
3886         result = t4 * z
3887
3888      If we instead ignored the cached y * z and first multiplied by
3889      the __builtin_pow opportunity z * z, we would get the inferior:
3890
3891         t1 = y * z
3892         t2 = t1 * x
3893         t3 = t2 * t2
3894         t4 = z * z
3895         t5 = t3 * t4
3896         result = t5 * y  */
3897
3898   vec_len = repeat_factor_vec.length ();
3899
3900   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3901   while (true)
3902     {
3903       HOST_WIDE_INT power;
3904
3905       /* First look for the largest cached product of factors from
3906          preceding iterations.  If found, create a builtin_powi for
3907          it if the minimum occurrence count for its factors is at
3908          least 2, or just use this cached product as our next
3909          multiplicand if the minimum occurrence count is 1.  */
3910       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3911         {
3912           if (rf1->repr && rf1->count > 0)
3913             break;
3914         }
3915
3916       if (j < vec_len)
3917         {
3918           power = rf1->count;
3919
3920           if (power == 1)
3921             {
3922               iter_result = rf1->repr;
3923
3924               if (dump_file && (dump_flags & TDF_DETAILS))
3925                 {
3926                   unsigned elt;
3927                   repeat_factor_t rf;
3928                   fputs ("Multiplying by cached product ", dump_file);
3929                   for (elt = j; elt < vec_len; elt++)
3930                     {
3931                       rf = &repeat_factor_vec[elt];
3932                       print_generic_expr (dump_file, rf->factor, 0);
3933                       if (elt < vec_len - 1)
3934                         fputs (" * ", dump_file);
3935                     }
3936                   fputs ("\n", dump_file);
3937                 }
3938             }
3939           else
3940             {
3941               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3942               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3943                                             build_int_cst (integer_type_node,
3944                                                            power));
3945               gimple_call_set_lhs (pow_stmt, iter_result);
3946               gimple_set_location (pow_stmt, gimple_location (stmt));
3947               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3948
3949               if (dump_file && (dump_flags & TDF_DETAILS))
3950                 {
3951                   unsigned elt;
3952                   repeat_factor_t rf;
3953                   fputs ("Building __builtin_pow call for cached product (",
3954                          dump_file);
3955                   for (elt = j; elt < vec_len; elt++)
3956                     {
3957                       rf = &repeat_factor_vec[elt];
3958                       print_generic_expr (dump_file, rf->factor, 0);
3959                       if (elt < vec_len - 1)
3960                         fputs (" * ", dump_file);
3961                     }
3962                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3963                            power);
3964                 }
3965             }
3966         }
3967       else
3968         {
3969           /* Otherwise, find the first factor in the repeated factor
3970              vector whose occurrence count is at least 2.  If no such
3971              factor exists, there are no builtin_powi opportunities
3972              remaining.  */
3973           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3974             {
3975               if (rf1->count >= 2)
3976                 break;
3977             }
3978
3979           if (j >= vec_len)
3980             break;
3981
3982           power = rf1->count;
3983
3984           if (dump_file && (dump_flags & TDF_DETAILS))
3985             {
3986               unsigned elt;
3987               repeat_factor_t rf;
3988               fputs ("Building __builtin_pow call for (", dump_file);
3989               for (elt = j; elt < vec_len; elt++)
3990                 {
3991                   rf = &repeat_factor_vec[elt];
3992                   print_generic_expr (dump_file, rf->factor, 0);
3993                   if (elt < vec_len - 1)
3994                     fputs (" * ", dump_file);
3995                 }
3996               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3997             }
3998
3999           reassociate_stats.pows_created++;
4000
4001           /* Visit each element of the vector in reverse order (so that
4002              high-occurrence elements are visited first, and within the
4003              same occurrence count, lower-ranked elements are visited
4004              first).  Form a linear product of all elements in this order
4005              whose occurrencce count is at least that of element J.
4006              Record the SSA name representing the product of each element
4007              with all subsequent elements in the vector.  */
4008           if (j == vec_len - 1)
4009             rf1->repr = rf1->factor;
4010           else
4011             {
4012               for (ii = vec_len - 2; ii >= (int)j; ii--)
4013                 {
4014                   tree op1, op2;
4015
4016                   rf1 = &repeat_factor_vec[ii];
4017                   rf2 = &repeat_factor_vec[ii + 1];
4018
4019                   /* Init the last factor's representative to be itself.  */
4020                   if (!rf2->repr)
4021                     rf2->repr = rf2->factor;
4022
4023                   op1 = rf1->factor;
4024                   op2 = rf2->repr;
4025
4026                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4027                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4028                                                            target_ssa,
4029                                                            op1, op2);
4030                   gimple_set_location (mul_stmt, gimple_location (stmt));
4031                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4032                   rf1->repr = target_ssa;
4033
4034                   /* Don't reprocess the multiply we just introduced.  */
4035                   gimple_set_visited (mul_stmt, true);
4036                 }
4037             }
4038
4039           /* Form a call to __builtin_powi for the maximum product
4040              just formed, raised to the power obtained earlier.  */
4041           rf1 = &repeat_factor_vec[j];
4042           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4043           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4044                                         build_int_cst (integer_type_node,
4045                                                        power));
4046           gimple_call_set_lhs (pow_stmt, iter_result);
4047           gimple_set_location (pow_stmt, gimple_location (stmt));
4048           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4049         }
4050
4051       /* If we previously formed at least one other builtin_powi call,
4052          form the product of this one and those others.  */
4053       if (result)
4054         {
4055           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4056           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4057                                                    result, iter_result);
4058           gimple_set_location (mul_stmt, gimple_location (stmt));
4059           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4060           gimple_set_visited (mul_stmt, true);
4061           result = new_result;
4062         }
4063       else
4064         result = iter_result;
4065
4066       /* Decrement the occurrence count of each element in the product
4067          by the count found above, and remove this many copies of each
4068          factor from OPS.  */
4069       for (i = j; i < vec_len; i++)
4070         {
4071           unsigned k = power;
4072           unsigned n;
4073
4074           rf1 = &repeat_factor_vec[i];
4075           rf1->count -= power;
4076
4077           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4078             {
4079               if (oe->op == rf1->factor)
4080                 {
4081                   if (oe->count <= k)
4082                     {
4083                       ops->ordered_remove (n);
4084                       k -= oe->count;
4085
4086                       if (k == 0)
4087                         break;
4088                     }
4089                   else
4090                     {
4091                       oe->count -= k;
4092                       break;
4093                     }
4094                 }
4095             }
4096         }
4097     }
4098
4099   /* At this point all elements in the repeated factor vector have a
4100      remaining occurrence count of 0 or 1, and those with a count of 1
4101      don't have cached representatives.  Re-sort the ops vector and
4102      clean up.  */
4103   ops->qsort (sort_by_operand_rank);
4104   repeat_factor_vec.release ();
4105
4106   /* Return the final product computed herein.  Note that there may
4107      still be some elements with single occurrence count left in OPS;
4108      those will be handled by the normal reassociation logic.  */
4109   return result;
4110 }
4111
4112 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4113
4114 static void
4115 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4116 {
4117   tree rhs1;
4118
4119   if (dump_file && (dump_flags & TDF_DETAILS))
4120     {
4121       fprintf (dump_file, "Transforming ");
4122       print_gimple_stmt (dump_file, stmt, 0, 0);
4123     }
4124
4125   rhs1 = gimple_assign_rhs1 (stmt);
4126   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4127   update_stmt (stmt);
4128   remove_visited_stmt_chain (rhs1);
4129
4130   if (dump_file && (dump_flags & TDF_DETAILS))
4131     {
4132       fprintf (dump_file, " into ");
4133       print_gimple_stmt (dump_file, stmt, 0, 0);
4134     }
4135 }
4136
4137 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4138
4139 static void
4140 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4141                             tree rhs1, tree rhs2)
4142 {
4143   if (dump_file && (dump_flags & TDF_DETAILS))
4144     {
4145       fprintf (dump_file, "Transforming ");
4146       print_gimple_stmt (dump_file, stmt, 0, 0);
4147     }
4148
4149   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4150   update_stmt (gsi_stmt (*gsi));
4151   remove_visited_stmt_chain (rhs1);
4152
4153   if (dump_file && (dump_flags & TDF_DETAILS))
4154     {
4155       fprintf (dump_file, " into ");
4156       print_gimple_stmt (dump_file, stmt, 0, 0);
4157     }
4158 }
4159
4160 /* Reassociate expressions in basic block BB and its post-dominator as
4161    children.  */
4162
4163 static void
4164 reassociate_bb (basic_block bb)
4165 {
4166   gimple_stmt_iterator gsi;
4167   basic_block son;
4168   gimple stmt = last_stmt (bb);
4169
4170   if (stmt && !gimple_visited_p (stmt))
4171     maybe_optimize_range_tests (stmt);
4172
4173   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4174     {
4175       stmt = gsi_stmt (gsi);
4176
4177       if (is_gimple_assign (stmt)
4178           && !stmt_could_throw_p (stmt))
4179         {
4180           tree lhs, rhs1, rhs2;
4181           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4182
4183           /* If this is not a gimple binary expression, there is
4184              nothing for us to do with it.  */
4185           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4186             continue;
4187
4188           /* If this was part of an already processed statement,
4189              we don't need to touch it again. */
4190           if (gimple_visited_p (stmt))
4191             {
4192               /* This statement might have become dead because of previous
4193                  reassociations.  */
4194               if (has_zero_uses (gimple_get_lhs (stmt)))
4195                 {
4196                   gsi_remove (&gsi, true);
4197                   release_defs (stmt);
4198                   /* We might end up removing the last stmt above which
4199                      places the iterator to the end of the sequence.
4200                      Reset it to the last stmt in this case which might
4201                      be the end of the sequence as well if we removed
4202                      the last statement of the sequence.  In which case
4203                      we need to bail out.  */
4204                   if (gsi_end_p (gsi))
4205                     {
4206                       gsi = gsi_last_bb (bb);
4207                       if (gsi_end_p (gsi))
4208                         break;
4209                     }
4210                 }
4211               continue;
4212             }
4213
4214           lhs = gimple_assign_lhs (stmt);
4215           rhs1 = gimple_assign_rhs1 (stmt);
4216           rhs2 = gimple_assign_rhs2 (stmt);
4217
4218           /* For non-bit or min/max operations we can't associate
4219              all types.  Verify that here.  */
4220           if (rhs_code != BIT_IOR_EXPR
4221               && rhs_code != BIT_AND_EXPR
4222               && rhs_code != BIT_XOR_EXPR
4223               && rhs_code != MIN_EXPR
4224               && rhs_code != MAX_EXPR
4225               && (!can_reassociate_p (lhs)
4226                   || !can_reassociate_p (rhs1)
4227                   || !can_reassociate_p (rhs2)))
4228             continue;
4229
4230           if (associative_tree_code (rhs_code))
4231             {
4232               vec<operand_entry_t> ops = vNULL;
4233               tree powi_result = NULL_TREE;
4234
4235               /* There may be no immediate uses left by the time we
4236                  get here because we may have eliminated them all.  */
4237               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4238                 continue;
4239
4240               gimple_set_visited (stmt, true);
4241               linearize_expr_tree (&ops, stmt, true, true);
4242               ops.qsort (sort_by_operand_rank);
4243               optimize_ops_list (rhs_code, &ops);
4244               if (undistribute_ops_list (rhs_code, &ops,
4245                                          loop_containing_stmt (stmt)))
4246                 {
4247                   ops.qsort (sort_by_operand_rank);
4248                   optimize_ops_list (rhs_code, &ops);
4249                 }
4250
4251               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4252                 optimize_range_tests (rhs_code, &ops);
4253
4254               if (first_pass_instance
4255                   && rhs_code == MULT_EXPR
4256                   && flag_unsafe_math_optimizations)
4257                 powi_result = attempt_builtin_powi (stmt, &ops);
4258
4259               /* If the operand vector is now empty, all operands were
4260                  consumed by the __builtin_powi optimization.  */
4261               if (ops.length () == 0)
4262                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4263               else if (ops.length () == 1)
4264                 {
4265                   tree last_op = ops.last ()->op;
4266
4267                   if (powi_result)
4268                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4269                                                 powi_result);
4270                   else
4271                     transform_stmt_to_copy (&gsi, stmt, last_op);
4272                 }
4273               else
4274                 {
4275                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4276                   int ops_num = ops.length ();
4277                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4278
4279                   if (dump_file && (dump_flags & TDF_DETAILS))
4280                     fprintf (dump_file,
4281                              "Width = %d was chosen for reassociation\n", width);
4282
4283                   if (width > 1
4284                       && ops.length () > 3)
4285                     rewrite_expr_tree_parallel (stmt, width, ops);
4286                   else
4287                     {
4288                       /* When there are three operands left, we want
4289                          to make sure the ones that get the double
4290                          binary op are chosen wisely.  */
4291                       int len = ops.length ();
4292                       if (len >= 3)
4293                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4294
4295                       rewrite_expr_tree (stmt, 0, ops, false);
4296                     }
4297
4298                   /* If we combined some repeated factors into a
4299                      __builtin_powi call, multiply that result by the
4300                      reassociated operands.  */
4301                   if (powi_result)
4302                     {
4303                       gimple mul_stmt;
4304                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
4305                       tree target_ssa = make_temp_ssa_name (type, NULL,
4306                                                             "reassocpow");
4307                       gimple_set_lhs (stmt, target_ssa);
4308                       update_stmt (stmt);
4309                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4310                                                                powi_result,
4311                                                                target_ssa);
4312                       gimple_set_location (mul_stmt, gimple_location (stmt));
4313                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4314                     }
4315                 }
4316
4317               ops.release ();
4318             }
4319         }
4320     }
4321   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4322        son;
4323        son = next_dom_son (CDI_POST_DOMINATORS, son))
4324     reassociate_bb (son);
4325 }
4326
4327 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4328 void debug_ops_vector (vec<operand_entry_t> ops);
4329
4330 /* Dump the operand entry vector OPS to FILE.  */
4331
4332 void
4333 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4334 {
4335   operand_entry_t oe;
4336   unsigned int i;
4337
4338   FOR_EACH_VEC_ELT (ops, i, oe)
4339     {
4340       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4341       print_generic_expr (file, oe->op, 0);
4342     }
4343 }
4344
4345 /* Dump the operand entry vector OPS to STDERR.  */
4346
4347 DEBUG_FUNCTION void
4348 debug_ops_vector (vec<operand_entry_t> ops)
4349 {
4350   dump_ops_vector (stderr, ops);
4351 }
4352
4353 static void
4354 do_reassoc (void)
4355 {
4356   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4357   renumber_gimple_stmt_uids ();
4358   reassociate_bb (EXIT_BLOCK_PTR);
4359 }
4360
4361 /* Initialize the reassociation pass.  */
4362
4363 static void
4364 init_reassoc (void)
4365 {
4366   int i;
4367   long rank = 2;
4368   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4369
4370   /* Find the loops, so that we can prevent moving calculations in
4371      them.  */
4372   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4373
4374   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4375
4376   operand_entry_pool = create_alloc_pool ("operand entry pool",
4377                                           sizeof (struct operand_entry), 30);
4378   next_operand_entry_id = 0;
4379
4380   /* Reverse RPO (Reverse Post Order) will give us something where
4381      deeper loops come later.  */
4382   pre_and_rev_post_order_compute (NULL, bbs, false);
4383   bb_rank = XCNEWVEC (long, last_basic_block);
4384   operand_rank = pointer_map_create ();
4385
4386   /* Give each default definition a distinct rank.  This includes
4387      parameters and the static chain.  Walk backwards over all
4388      SSA names so that we get proper rank ordering according
4389      to tree_swap_operands_p.  */
4390   for (i = num_ssa_names - 1; i > 0; --i)
4391     {
4392       tree name = ssa_name (i);
4393       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4394         insert_operand_rank (name, ++rank);
4395     }
4396
4397   /* Set up rank for each BB  */
4398   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4399     bb_rank[bbs[i]] = ++rank  << 16;
4400
4401   free (bbs);
4402   calculate_dominance_info (CDI_POST_DOMINATORS);
4403   plus_negates = vNULL;
4404 }
4405
4406 /* Cleanup after the reassociation pass, and print stats if
4407    requested.  */
4408
4409 static void
4410 fini_reassoc (void)
4411 {
4412   statistics_counter_event (cfun, "Linearized",
4413                             reassociate_stats.linearized);
4414   statistics_counter_event (cfun, "Constants eliminated",
4415                             reassociate_stats.constants_eliminated);
4416   statistics_counter_event (cfun, "Ops eliminated",
4417                             reassociate_stats.ops_eliminated);
4418   statistics_counter_event (cfun, "Statements rewritten",
4419                             reassociate_stats.rewritten);
4420   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4421                             reassociate_stats.pows_encountered);
4422   statistics_counter_event (cfun, "Built-in powi calls created",
4423                             reassociate_stats.pows_created);
4424
4425   pointer_map_destroy (operand_rank);
4426   free_alloc_pool (operand_entry_pool);
4427   free (bb_rank);
4428   plus_negates.release ();
4429   free_dominance_info (CDI_POST_DOMINATORS);
4430   loop_optimizer_finalize ();
4431 }
4432
4433 /* Gate and execute functions for Reassociation.  */
4434
4435 static unsigned int
4436 execute_reassoc (void)
4437 {
4438   init_reassoc ();
4439
4440   do_reassoc ();
4441   repropagate_negates ();
4442
4443   fini_reassoc ();
4444   return 0;
4445 }
4446
4447 static bool
4448 gate_tree_ssa_reassoc (void)
4449 {
4450   return flag_tree_reassoc != 0;
4451 }
4452
4453 namespace {
4454
4455 const pass_data pass_data_reassoc =
4456 {
4457   GIMPLE_PASS, /* type */
4458   "reassoc", /* name */
4459   OPTGROUP_NONE, /* optinfo_flags */
4460   true, /* has_gate */
4461   true, /* has_execute */
4462   TV_TREE_REASSOC, /* tv_id */
4463   ( PROP_cfg | PROP_ssa ), /* properties_required */
4464   0, /* properties_provided */
4465   0, /* properties_destroyed */
4466   0, /* todo_flags_start */
4467   ( TODO_verify_ssa
4468     | TODO_update_ssa_only_virtuals
4469     | TODO_verify_flow ), /* todo_flags_finish */
4470 };
4471
4472 class pass_reassoc : public gimple_opt_pass
4473 {
4474 public:
4475   pass_reassoc(gcc::context *ctxt)
4476     : gimple_opt_pass(pass_data_reassoc, ctxt)
4477   {}
4478
4479   /* opt_pass methods: */
4480   opt_pass * clone () { return new pass_reassoc (ctxt_); }
4481   bool gate () { return gate_tree_ssa_reassoc (); }
4482   unsigned int execute () { return execute_reassoc (); }
4483
4484 }; // class pass_reassoc
4485
4486 } // anon namespace
4487
4488 gimple_opt_pass *
4489 make_pass_reassoc (gcc::context *ctxt)
4490 {
4491   return new pass_reassoc (ctxt);
4492 }