gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "basic-block.h"
  27 #include "gimple-pretty-print.h"
  28 #include "tree-inline.h"
  29 #include "tree-flow.h"
  30 #include "gimple.h"
  31 #include "tree-iterator.h"
  32 #include "tree-pass.h"
  33 #include "alloc-pool.h"
  34 #include "vec.h"
  35 #include "langhooks.h"
  36 #include "pointer-set.h"
  37 #include "cfgloop.h"
  38 #include "flags.h"
  39 #include "target.h"
  40 #include "params.h"
  41 #include "diagnostic-core.h"
  42
  43 /*  This is a simple global reassociation pass.  It is, in part, based
  44     on the LLVM pass of the same name (They do some things more/less
  45     than we do, in different orders, etc).
  46
  47     It consists of five steps:
  48
  49     1. Breaking up subtract operations into addition + negate, where
  50     it would promote the reassociation of adds.
  51
  52     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  53     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  54     During linearization, we place the operands of the binary
  55     expressions into a vector of operand_entry_t
  56
  57     3. Optimization of the operand lists, eliminating things like a +
  58     -a, a & a, etc.
  59
  60     3a. Combine repeated factors with the same occurrence counts
  61     into a __builtin_powi call that will later be optimized into
  62     an optimal number of multiplies.
  63
  64     4. Rewrite the expression trees we linearized and optimized so
  65     they are in proper rank order.
  66
  67     5. Repropagate negates, as nothing else will clean it up ATM.
  68
  69     A bit of theory on #4, since nobody seems to write anything down
  70     about why it makes sense to do it the way they do it:
  71
  72     We could do this much nicer theoretically, but don't (for reasons
  73     explained after how to do it theoretically nice :P).
  74
  75     In order to promote the most redundancy elimination, you want
  76     binary expressions whose operands are the same rank (or
  77     preferably, the same value) exposed to the redundancy eliminator,
  78     for possible elimination.
  79
  80     So the way to do this if we really cared, is to build the new op
  81     tree from the leaves to the roots, merging as you go, and putting the
  82     new op on the end of the worklist, until you are left with one
  83     thing on the worklist.
  84
  85     IE if you have to rewrite the following set of operands (listed with
  86     rank in parentheses), with opcode PLUS_EXPR:
  87
  88     a (1),  b (1),  c (1),  d (2), e (2)
  89
  90
  91     We start with our merge worklist empty, and the ops list with all of
  92     those on it.
  93
  94     You want to first merge all leaves of the same rank, as much as
  95     possible.
  96
  97     So first build a binary op of
  98
  99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 100
 101     Because there is no three operand form of PLUS_EXPR, c is not going to
 102     be exposed to redundancy elimination as a rank 1 operand.
 103
 104     So you might as well throw it on the merge worklist (you could also
 105     consider it to now be a rank two operand, and merge it with d and e,
 106     but in this case, you then have evicted e from a binary op. So at
 107     least in this situation, you can't win.)
 108
 109     Then build a binary op of d + e
 110     mergetmp2 = d + e
 111
 112     and put mergetmp2 on the merge worklist.
 113
 114     so merge worklist = {mergetmp, c, mergetmp2}
 115
 116     Continue building binary ops of these operations until you have only
 117     one operation left on the worklist.
 118
 119     So we have
 120
 121     build binary op
 122     mergetmp3 = mergetmp + c
 123
 124     worklist = {mergetmp2, mergetmp3}
 125
 126     mergetmp4 = mergetmp2 + mergetmp3
 127
 128     worklist = {mergetmp4}
 129
 130     because we have one operation left, we can now just set the original
 131     statement equal to the result of that operation.
 132
 133     This will at least expose a + b  and d + e to redundancy elimination
 134     as binary operations.
 135
 136     For extra points, you can reuse the old statements to build the
 137     mergetmps, since you shouldn't run out.
 138
 139     So why don't we do this?
 140
 141     Because it's expensive, and rarely will help.  Most trees we are
 142     reassociating have 3 or less ops.  If they have 2 ops, they already
 143     will be written into a nice single binary op.  If you have 3 ops, a
 144     single simple check suffices to tell you whether the first two are of the
 145     same rank.  If so, you know to order it
 146
 147     mergetmp = op1 + op2
 148     newstmt = mergetmp + op3
 149
 150     instead of
 151     mergetmp = op2 + op3
 152     newstmt = mergetmp + op1
 153
 154     If all three are of the same rank, you can't expose them all in a
 155     single binary operator anyway, so the above is *still* the best you
 156     can do.
 157
 158     Thus, this is what we do.  When we have three ops left, we check to see
 159     what order to put them in, and call it a day.  As a nod to vector sum
 160     reduction, we check if any of the ops are really a phi node that is a
 161     destructive update for the associating op, and keep the destructive
 162     update together for vector sum reduction recognition.  */
 163
 164
 165 /* Statistics */
 166 static struct
 167 {
 168   int linearized;
 169   int constants_eliminated;
 170   int ops_eliminated;
 171   int rewritten;
 172   int pows_encountered;
 173   int pows_created;
 174 } reassociate_stats;
 175
 176 /* Operator, rank pair.  */
 177 typedef struct operand_entry
 178 {
 179   unsigned int rank;
 180   int id;
 181   tree op;
 182   unsigned int count;
 183 } *operand_entry_t;
 184
 185 static alloc_pool operand_entry_pool;
 186
 187 /* This is used to assign a unique ID to each struct operand_entry
 188    so that qsort results are identical on different hosts.  */
 189 static int next_operand_entry_id;
 190
 191 /* Starting rank number for a given basic block, so that we can rank
 192    operations using unmovable instructions in that BB based on the bb
 193    depth.  */
 194 static long *bb_rank;
 195
 196 /* Operand->rank hashtable.  */
 197 static struct pointer_map_t *operand_rank;
 198
 199 /* Forward decls.  */
 200 static long get_rank (tree);
 201
 202
 203 /* Bias amount for loop-carried phis.  We want this to be larger than
 204    the depth of any reassociation tree we can see, but not larger than
 205    the rank difference between two blocks.  */
 206 #define PHI_LOOP_BIAS (1 << 15)
 207
 208 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 209    an innermost loop, and the phi has only a single use which is inside
 210    the loop, then the rank is the block rank of the loop latch plus an
 211    extra bias for the loop-carried dependence.  This causes expressions
 212    calculated into an accumulator variable to be independent for each
 213    iteration of the loop.  If STMT is some other phi, the rank is the
 214    block rank of its containing block.  */
 215 static long
 216 phi_rank (gimple stmt)
 217 {
 218   basic_block bb = gimple_bb (stmt);
 219   struct loop *father = bb->loop_father;
 220   tree res;
 221   unsigned i;
 222   use_operand_p use;
 223   gimple use_stmt;
 224
 225   /* We only care about real loops (those with a latch).  */
 226   if (!father->latch)
 227     return bb_rank[bb->index];
 228
 229   /* Interesting phis must be in headers of innermost loops.  */
 230   if (bb != father->header
 231       || father->inner)
 232     return bb_rank[bb->index];
 233
 234   /* Ignore virtual SSA_NAMEs.  */
 235   res = gimple_phi_result (stmt);
 236   if (virtual_operand_p (res))
 237     return bb_rank[bb->index];
 238
 239   /* The phi definition must have a single use, and that use must be
 240      within the loop.  Otherwise this isn't an accumulator pattern.  */
 241   if (!single_imm_use (res, &use, &use_stmt)
 242       || gimple_bb (use_stmt)->loop_father != father)
 243     return bb_rank[bb->index];
 244
 245   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 246   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 247     {
 248       tree arg = gimple_phi_arg_def (stmt, i);
 249       if (TREE_CODE (arg) == SSA_NAME
 250           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 251         {
 252           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 253           if (gimple_bb (def_stmt)->loop_father == father)
 254             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 255         }
 256     }
 257
 258   /* Must be an uninteresting phi.  */
 259   return bb_rank[bb->index];
 260 }
 261
 262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 263    loop-carried dependence of an innermost loop, return TRUE; else
 264    return FALSE.  */
 265 static bool
 266 loop_carried_phi (tree exp)
 267 {
 268   gimple phi_stmt;
 269   long block_rank;
 270
 271   if (TREE_CODE (exp) != SSA_NAME
 272       || SSA_NAME_IS_DEFAULT_DEF (exp))
 273     return false;
 274
 275   phi_stmt = SSA_NAME_DEF_STMT (exp);
 276
 277   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 278     return false;
 279
 280   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 281      an additional bias added in.  If this phi doesn't have block rank,
 282      it's biased and should not be propagated.  */
 283   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 284
 285   if (phi_rank (phi_stmt) != block_rank)
 286     return true;
 287
 288   return false;
 289 }
 290
 291 /* Return the maximum of RANK and the rank that should be propagated
 292    from expression OP.  For most operands, this is just the rank of OP.
 293    For loop-carried phis, the value is zero to avoid undoing the bias
 294    in favor of the phi.  */
 295 static long
 296 propagate_rank (long rank, tree op)
 297 {
 298   long op_rank;
 299
 300   if (loop_carried_phi (op))
 301     return rank;
 302
 303   op_rank = get_rank (op);
 304
 305   return MAX (rank, op_rank);
 306 }
 307
 308 /* Look up the operand rank structure for expression E.  */
 309
 310 static inline long
 311 find_operand_rank (tree e)
 312 {
 313   void **slot = pointer_map_contains (operand_rank, e);
 314   return slot ? (long) (intptr_t) *slot : -1;
 315 }
 316
 317 /* Insert {E,RANK} into the operand rank hashtable.  */
 318
 319 static inline void
 320 insert_operand_rank (tree e, long rank)
 321 {
 322   void **slot;
 323   gcc_assert (rank > 0);
 324   slot = pointer_map_insert (operand_rank, e);
 325   gcc_assert (!*slot);
 326   *slot = (void *) (intptr_t) rank;
 327 }
 328
 329 /* Given an expression E, return the rank of the expression.  */
 330
 331 static long
 332 get_rank (tree e)
 333 {
 334   /* Constants have rank 0.  */
 335   if (is_gimple_min_invariant (e))
 336     return 0;
 337
 338   /* SSA_NAME's have the rank of the expression they are the result
 339      of.
 340      For globals and uninitialized values, the rank is 0.
 341      For function arguments, use the pre-setup rank.
 342      For PHI nodes, stores, asm statements, etc, we use the rank of
 343      the BB.
 344      For simple operations, the rank is the maximum rank of any of
 345      its operands, or the bb_rank, whichever is less.
 346      I make no claims that this is optimal, however, it gives good
 347      results.  */
 348
 349   /* We make an exception to the normal ranking system to break
 350      dependences of accumulator variables in loops.  Suppose we
 351      have a simple one-block loop containing:
 352
 353        x_1 = phi(x_0, x_2)
 354        b = a + x_1
 355        c = b + d
 356        x_2 = c + e
 357
 358      As shown, each iteration of the calculation into x is fully
 359      dependent upon the iteration before it.  We would prefer to
 360      see this in the form:
 361
 362        x_1 = phi(x_0, x_2)
 363        b = a + d
 364        c = b + e
 365        x_2 = c + x_1
 366
 367      If the loop is unrolled, the calculations of b and c from
 368      different iterations can be interleaved.
 369
 370      To obtain this result during reassociation, we bias the rank
 371      of the phi definition x_1 upward, when it is recognized as an
 372      accumulator pattern.  The artificial rank causes it to be
 373      added last, providing the desired independence.  */
 374
 375   if (TREE_CODE (e) == SSA_NAME)
 376     {
 377       gimple stmt;
 378       long rank;
 379       int i, n;
 380       tree op;
 381
 382       if (SSA_NAME_IS_DEFAULT_DEF (e))
 383         return find_operand_rank (e);
 384
 385       stmt = SSA_NAME_DEF_STMT (e);
 386       if (gimple_code (stmt) == GIMPLE_PHI)
 387         return phi_rank (stmt);
 388
 389       if (!is_gimple_assign (stmt)
 390           || gimple_vdef (stmt))
 391         return bb_rank[gimple_bb (stmt)->index];
 392
 393       /* If we already have a rank for this expression, use that.  */
 394       rank = find_operand_rank (e);
 395       if (rank != -1)
 396         return rank;
 397
 398       /* Otherwise, find the maximum rank for the operands.  As an
 399          exception, remove the bias from loop-carried phis when propagating
 400          the rank so that dependent operations are not also biased.  */
 401       rank = 0;
 402       if (gimple_assign_single_p (stmt))
 403         {
 404           tree rhs = gimple_assign_rhs1 (stmt);
 405           n = TREE_OPERAND_LENGTH (rhs);
 406           if (n == 0)
 407             rank = propagate_rank (rank, rhs);
 408           else
 409             {
 410               for (i = 0; i < n; i++)
 411                 {
 412                   op = TREE_OPERAND (rhs, i);
 413
 414                   if (op != NULL_TREE)
 415                     rank = propagate_rank (rank, op);
 416                 }
 417             }
 418         }
 419       else
 420         {
 421           n = gimple_num_ops (stmt);
 422           for (i = 1; i < n; i++)
 423             {
 424               op = gimple_op (stmt, i);
 425               gcc_assert (op);
 426               rank = propagate_rank (rank, op);
 427             }
 428         }
 429
 430       if (dump_file && (dump_flags & TDF_DETAILS))
 431         {
 432           fprintf (dump_file, "Rank for ");
 433           print_generic_expr (dump_file, e, 0);
 434           fprintf (dump_file, " is %ld\n", (rank + 1));
 435         }
 436
 437       /* Note the rank in the hashtable so we don't recompute it.  */
 438       insert_operand_rank (e, (rank + 1));
 439       return (rank + 1);
 440     }
 441
 442   /* Globals, etc,  are rank 0 */
 443   return 0;
 444 }
 445
 446
 447 /* We want integer ones to end up last no matter what, since they are
 448    the ones we can do the most with.  */
 449 #define INTEGER_CONST_TYPE 1 << 3
 450 #define FLOAT_CONST_TYPE 1 << 2
 451 #define OTHER_CONST_TYPE 1 << 1
 452
 453 /* Classify an invariant tree into integer, float, or other, so that
 454    we can sort them to be near other constants of the same type.  */
 455 static inline int
 456 constant_type (tree t)
 457 {
 458   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 459     return INTEGER_CONST_TYPE;
 460   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 461     return FLOAT_CONST_TYPE;
 462   else
 463     return OTHER_CONST_TYPE;
 464 }
 465
 466 /* qsort comparison function to sort operand entries PA and PB by rank
 467    so that the sorted array is ordered by rank in decreasing order.  */
 468 static int
 469 sort_by_operand_rank (const void *pa, const void *pb)
 470 {
 471   const operand_entry_t oea = *(const operand_entry_t *)pa;
 472   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 473
 474   /* It's nicer for optimize_expression if constants that are likely
 475      to fold when added/multiplied//whatever are put next to each
 476      other.  Since all constants have rank 0, order them by type.  */
 477   if (oeb->rank == 0 && oea->rank == 0)
 478     {
 479       if (constant_type (oeb->op) != constant_type (oea->op))
 480         return constant_type (oeb->op) - constant_type (oea->op);
 481       else
 482         /* To make sorting result stable, we use unique IDs to determine
 483            order.  */
 484         return oeb->id - oea->id;
 485     }
 486
 487   /* Lastly, make sure the versions that are the same go next to each
 488      other.  We use SSA_NAME_VERSION because it's stable.  */
 489   if ((oeb->rank - oea->rank == 0)
 490       && TREE_CODE (oea->op) == SSA_NAME
 491       && TREE_CODE (oeb->op) == SSA_NAME)
 492     {
 493       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 494         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 495       else
 496         return oeb->id - oea->id;
 497     }
 498
 499   if (oeb->rank != oea->rank)
 500     return oeb->rank - oea->rank;
 501   else
 502     return oeb->id - oea->id;
 503 }
 504
 505 /* Add an operand entry to *OPS for the tree operand OP.  */
 506
 507 static void
 508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 509 {
 510   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 511
 512   oe->op = op;
 513   oe->rank = get_rank (op);
 514   oe->id = next_operand_entry_id++;
 515   oe->count = 1;
 516   ops->safe_push (oe);
 517 }
 518
 519 /* Add an operand entry to *OPS for the tree operand OP with repeat
 520    count REPEAT.  */
 521
 522 static void
 523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 524                        HOST_WIDE_INT repeat)
 525 {
 526   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 527
 528   oe->op = op;
 529   oe->rank = get_rank (op);
 530   oe->id = next_operand_entry_id++;
 531   oe->count = repeat;
 532   ops->safe_push (oe);
 533
 534   reassociate_stats.pows_encountered++;
 535 }
 536
 537 /* Return true if STMT is reassociable operation containing a binary
 538    operation with tree code CODE, and is inside LOOP.  */
 539
 540 static bool
 541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 542 {
 543   basic_block bb = gimple_bb (stmt);
 544
 545   if (gimple_bb (stmt) == NULL)
 546     return false;
 547
 548   if (!flow_bb_inside_loop_p (loop, bb))
 549     return false;
 550
 551   if (is_gimple_assign (stmt)
 552       && gimple_assign_rhs_code (stmt) == code
 553       && has_single_use (gimple_assign_lhs (stmt)))
 554     return true;
 555
 556   return false;
 557 }
 558
 559
 560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 561    operand of the negate operation.  Otherwise, return NULL.  */
 562
 563 static tree
 564 get_unary_op (tree name, enum tree_code opcode)
 565 {
 566   gimple stmt = SSA_NAME_DEF_STMT (name);
 567
 568   if (!is_gimple_assign (stmt))
 569     return NULL_TREE;
 570
 571   if (gimple_assign_rhs_code (stmt) == opcode)
 572     return gimple_assign_rhs1 (stmt);
 573   return NULL_TREE;
 574 }
 575
 576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 577    eliminate through equivalences, do so, remove them from OPS, and
 578    return true.  Otherwise, return false.  */
 579
 580 static bool
 581 eliminate_duplicate_pair (enum tree_code opcode,
 582                           vec<operand_entry_t> *ops,
 583                           bool *all_done,
 584                           unsigned int i,
 585                           operand_entry_t curr,
 586                           operand_entry_t last)
 587 {
 588
 589   /* If we have two of the same op, and the opcode is & |, min, or max,
 590      we can eliminate one of them.
 591      If we have two of the same op, and the opcode is ^, we can
 592      eliminate both of them.  */
 593
 594   if (last && last->op == curr->op)
 595     {
 596       switch (opcode)
 597         {
 598         case MAX_EXPR:
 599         case MIN_EXPR:
 600         case BIT_IOR_EXPR:
 601         case BIT_AND_EXPR:
 602           if (dump_file && (dump_flags & TDF_DETAILS))
 603             {
 604               fprintf (dump_file, "Equivalence: ");
 605               print_generic_expr (dump_file, curr->op, 0);
 606               fprintf (dump_file, " [&|minmax] ");
 607               print_generic_expr (dump_file, last->op, 0);
 608               fprintf (dump_file, " -> ");
 609               print_generic_stmt (dump_file, last->op, 0);
 610             }
 611
 612           ops->ordered_remove (i);
 613           reassociate_stats.ops_eliminated ++;
 614
 615           return true;
 616
 617         case BIT_XOR_EXPR:
 618           if (dump_file && (dump_flags & TDF_DETAILS))
 619             {
 620               fprintf (dump_file, "Equivalence: ");
 621               print_generic_expr (dump_file, curr->op, 0);
 622               fprintf (dump_file, " ^ ");
 623               print_generic_expr (dump_file, last->op, 0);
 624               fprintf (dump_file, " -> nothing\n");
 625             }
 626
 627           reassociate_stats.ops_eliminated += 2;
 628
 629           if (ops->length () == 2)
 630             {
 631               ops->create (0);
 632               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 633               *all_done = true;
 634             }
 635           else
 636             {
 637               ops->ordered_remove (i-1);
 638               ops->ordered_remove (i-1);
 639             }
 640
 641           return true;
 642
 643         default:
 644           break;
 645         }
 646     }
 647   return false;
 648 }
 649
 650 static vec<tree> plus_negates;
 651
 652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 653    expression, look in OPS for a corresponding positive operation to cancel
 654    it out.  If we find one, remove the other from OPS, replace
 655    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 656    return false. */
 657
 658 static bool
 659 eliminate_plus_minus_pair (enum tree_code opcode,
 660                            vec<operand_entry_t> *ops,
 661                            unsigned int currindex,
 662                            operand_entry_t curr)
 663 {
 664   tree negateop;
 665   tree notop;
 666   unsigned int i;
 667   operand_entry_t oe;
 668
 669   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 670     return false;
 671
 672   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 673   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 674   if (negateop == NULL_TREE && notop == NULL_TREE)
 675     return false;
 676
 677   /* Any non-negated version will have a rank that is one less than
 678      the current rank.  So once we hit those ranks, if we don't find
 679      one, we can stop.  */
 680
 681   for (i = currindex + 1;
 682        ops->iterate (i, &oe)
 683        && oe->rank >= curr->rank - 1 ;
 684        i++)
 685     {
 686       if (oe->op == negateop)
 687         {
 688
 689           if (dump_file && (dump_flags & TDF_DETAILS))
 690             {
 691               fprintf (dump_file, "Equivalence: ");
 692               print_generic_expr (dump_file, negateop, 0);
 693               fprintf (dump_file, " + -");
 694               print_generic_expr (dump_file, oe->op, 0);
 695               fprintf (dump_file, " -> 0\n");
 696             }
 697
 698           ops->ordered_remove (i);
 699           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 700           ops->ordered_remove (currindex);
 701           reassociate_stats.ops_eliminated ++;
 702
 703           return true;
 704         }
 705       else if (oe->op == notop)
 706         {
 707           tree op_type = TREE_TYPE (oe->op);
 708
 709           if (dump_file && (dump_flags & TDF_DETAILS))
 710             {
 711               fprintf (dump_file, "Equivalence: ");
 712               print_generic_expr (dump_file, notop, 0);
 713               fprintf (dump_file, " + ~");
 714               print_generic_expr (dump_file, oe->op, 0);
 715               fprintf (dump_file, " -> -1\n");
 716             }
 717
 718           ops->ordered_remove (i);
 719           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 720           ops->ordered_remove (currindex);
 721           reassociate_stats.ops_eliminated ++;
 722
 723           return true;
 724         }
 725     }
 726
 727   /* CURR->OP is a negate expr in a plus expr: save it for later
 728      inspection in repropagate_negates().  */
 729   if (negateop != NULL_TREE)
 730     plus_negates.safe_push (curr->op);
 731
 732   return false;
 733 }
 734
 735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 736    bitwise not expression, look in OPS for a corresponding operand to
 737    cancel it out.  If we find one, remove the other from OPS, replace
 738    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 739    false. */
 740
 741 static bool
 742 eliminate_not_pairs (enum tree_code opcode,
 743                      vec<operand_entry_t> *ops,
 744                      unsigned int currindex,
 745                      operand_entry_t curr)
 746 {
 747   tree notop;
 748   unsigned int i;
 749   operand_entry_t oe;
 750
 751   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 752       || TREE_CODE (curr->op) != SSA_NAME)
 753     return false;
 754
 755   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 756   if (notop == NULL_TREE)
 757     return false;
 758
 759   /* Any non-not version will have a rank that is one less than
 760      the current rank.  So once we hit those ranks, if we don't find
 761      one, we can stop.  */
 762
 763   for (i = currindex + 1;
 764        ops->iterate (i, &oe)
 765        && oe->rank >= curr->rank - 1;
 766        i++)
 767     {
 768       if (oe->op == notop)
 769         {
 770           if (dump_file && (dump_flags & TDF_DETAILS))
 771             {
 772               fprintf (dump_file, "Equivalence: ");
 773               print_generic_expr (dump_file, notop, 0);
 774               if (opcode == BIT_AND_EXPR)
 775                 fprintf (dump_file, " & ~");
 776               else if (opcode == BIT_IOR_EXPR)
 777                 fprintf (dump_file, " | ~");
 778               print_generic_expr (dump_file, oe->op, 0);
 779               if (opcode == BIT_AND_EXPR)
 780                 fprintf (dump_file, " -> 0\n");
 781               else if (opcode == BIT_IOR_EXPR)
 782                 fprintf (dump_file, " -> -1\n");
 783             }
 784
 785           if (opcode == BIT_AND_EXPR)
 786             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 787           else if (opcode == BIT_IOR_EXPR)
 788             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 789                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 790
 791           reassociate_stats.ops_eliminated += ops->length () - 1;
 792           ops->truncate (0);
 793           ops->quick_push (oe);
 794           return true;
 795         }
 796     }
 797
 798   return false;
 799 }
 800
 801 /* Use constant value that may be present in OPS to try to eliminate
 802    operands.  Note that this function is only really used when we've
 803    eliminated ops for other reasons, or merged constants.  Across
 804    single statements, fold already does all of this, plus more.  There
 805    is little point in duplicating logic, so I've only included the
 806    identities that I could ever construct testcases to trigger.  */
 807
 808 static void
 809 eliminate_using_constants (enum tree_code opcode,
 810                            vec<operand_entry_t> *ops)
 811 {
 812   operand_entry_t oelast = ops->last ();
 813   tree type = TREE_TYPE (oelast->op);
 814
 815   if (oelast->rank == 0
 816       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 817     {
 818       switch (opcode)
 819         {
 820         case BIT_AND_EXPR:
 821           if (integer_zerop (oelast->op))
 822             {
 823               if (ops->length () != 1)
 824                 {
 825                   if (dump_file && (dump_flags & TDF_DETAILS))
 826                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 827
 828                   reassociate_stats.ops_eliminated += ops->length () - 1;
 829
 830                   ops->truncate (0);
 831                   ops->quick_push (oelast);
 832                   return;
 833                 }
 834             }
 835           else if (integer_all_onesp (oelast->op))
 836             {
 837               if (ops->length () != 1)
 838                 {
 839                   if (dump_file && (dump_flags & TDF_DETAILS))
 840                     fprintf (dump_file, "Found & -1, removing\n");
 841                   ops->pop ();
 842                   reassociate_stats.ops_eliminated++;
 843                 }
 844             }
 845           break;
 846         case BIT_IOR_EXPR:
 847           if (integer_all_onesp (oelast->op))
 848             {
 849               if (ops->length () != 1)
 850                 {
 851                   if (dump_file && (dump_flags & TDF_DETAILS))
 852                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 853
 854                   reassociate_stats.ops_eliminated += ops->length () - 1;
 855
 856                   ops->truncate (0);
 857                   ops->quick_push (oelast);
 858                   return;
 859                 }
 860             }
 861           else if (integer_zerop (oelast->op))
 862             {
 863               if (ops->length () != 1)
 864                 {
 865                   if (dump_file && (dump_flags & TDF_DETAILS))
 866                     fprintf (dump_file, "Found | 0, removing\n");
 867                   ops->pop ();
 868                   reassociate_stats.ops_eliminated++;
 869                 }
 870             }
 871           break;
 872         case MULT_EXPR:
 873           if (integer_zerop (oelast->op)
 874               || (FLOAT_TYPE_P (type)
 875                   && !HONOR_NANS (TYPE_MODE (type))
 876                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 877                   && real_zerop (oelast->op)))
 878             {
 879               if (ops->length () != 1)
 880                 {
 881                   if (dump_file && (dump_flags & TDF_DETAILS))
 882                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 883
 884                   reassociate_stats.ops_eliminated += ops->length () - 1;
 885                   ops->truncate (1);
 886                   ops->quick_push (oelast);
 887                   return;
 888                 }
 889             }
 890           else if (integer_onep (oelast->op)
 891                    || (FLOAT_TYPE_P (type)
 892                        && !HONOR_SNANS (TYPE_MODE (type))
 893                        && real_onep (oelast->op)))
 894             {
 895               if (ops->length () != 1)
 896                 {
 897                   if (dump_file && (dump_flags & TDF_DETAILS))
 898                     fprintf (dump_file, "Found * 1, removing\n");
 899                   ops->pop ();
 900                   reassociate_stats.ops_eliminated++;
 901                   return;
 902                 }
 903             }
 904           break;
 905         case BIT_XOR_EXPR:
 906         case PLUS_EXPR:
 907         case MINUS_EXPR:
 908           if (integer_zerop (oelast->op)
 909               || (FLOAT_TYPE_P (type)
 910                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 911                   && fold_real_zero_addition_p (type, oelast->op,
 912                                                 opcode == MINUS_EXPR)))
 913             {
 914               if (ops->length () != 1)
 915                 {
 916                   if (dump_file && (dump_flags & TDF_DETAILS))
 917                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 918                   ops->pop ();
 919                   reassociate_stats.ops_eliminated++;
 920                   return;
 921                 }
 922             }
 923           break;
 924         default:
 925           break;
 926         }
 927     }
 928 }
 929
 930
 931 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 932                                  bool, bool);
 933
 934 /* Structure for tracking and counting operands.  */
 935 typedef struct oecount_s {
 936   int cnt;
 937   int id;
 938   enum tree_code oecode;
 939   tree op;
 940 } oecount;
 941
 942
 943 /* The heap for the oecount hashtable and the sorted list of operands.  */
 944 static vec<oecount> cvec;
 945
 946 /* Hash function for oecount.  */
 947
 948 static hashval_t
 949 oecount_hash (const void *p)
 950 {
 951   const oecount *c = &cvec[(size_t)p - 42];
 952   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 953 }
 954
 955 /* Comparison function for oecount.  */
 956
 957 static int
 958 oecount_eq (const void *p1, const void *p2)
 959 {
 960   const oecount *c1 = &cvec[(size_t)p1 - 42];
 961   const oecount *c2 = &cvec[(size_t)p2 - 42];
 962   return (c1->oecode == c2->oecode
 963           && c1->op == c2->op);
 964 }
 965
 966 /* Comparison function for qsort sorting oecount elements by count.  */
 967
 968 static int
 969 oecount_cmp (const void *p1, const void *p2)
 970 {
 971   const oecount *c1 = (const oecount *)p1;
 972   const oecount *c2 = (const oecount *)p2;
 973   if (c1->cnt != c2->cnt)
 974     return c1->cnt - c2->cnt;
 975   else
 976     /* If counts are identical, use unique IDs to stabilize qsort.  */
 977     return c1->id - c2->id;
 978 }
 979
 980 /* Return TRUE iff STMT represents a builtin call that raises OP
 981    to some exponent.  */
 982
 983 static bool
 984 stmt_is_power_of_op (gimple stmt, tree op)
 985 {
 986   tree fndecl;
 987
 988   if (!is_gimple_call (stmt))
 989     return false;
 990
 991   fndecl = gimple_call_fndecl (stmt);
 992
 993   if (!fndecl
 994       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 995     return false;
 996
 997   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
 998     {
 999     CASE_FLT_FN (BUILT_IN_POW):
1000     CASE_FLT_FN (BUILT_IN_POWI):
1001       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1002
1003     default:
1004       return false;
1005     }
1006 }
1007
1008 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1009    in place and return the result.  Assumes that stmt_is_power_of_op
1010    was previously called for STMT and returned TRUE.  */
1011
1012 static HOST_WIDE_INT
1013 decrement_power (gimple stmt)
1014 {
1015   REAL_VALUE_TYPE c, cint;
1016   HOST_WIDE_INT power;
1017   tree arg1;
1018
1019   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1020     {
1021     CASE_FLT_FN (BUILT_IN_POW):
1022       arg1 = gimple_call_arg (stmt, 1);
1023       c = TREE_REAL_CST (arg1);
1024       power = real_to_integer (&c) - 1;
1025       real_from_integer (&cint, VOIDmode, power, 0, 0);
1026       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1027       return power;
1028
1029     CASE_FLT_FN (BUILT_IN_POWI):
1030       arg1 = gimple_call_arg (stmt, 1);
1031       power = TREE_INT_CST_LOW (arg1) - 1;
1032       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1033       return power;
1034
1035     default:
1036       gcc_unreachable ();
1037     }
1038 }
1039
1040 /* Find the single immediate use of STMT's LHS, and replace it
1041    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1042    replace *DEF with OP as well.  */
1043
1044 static void
1045 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1046 {
1047   tree lhs;
1048   gimple use_stmt;
1049   use_operand_p use;
1050   gimple_stmt_iterator gsi;
1051
1052   if (is_gimple_call (stmt))
1053     lhs = gimple_call_lhs (stmt);
1054   else
1055     lhs = gimple_assign_lhs (stmt);
1056
1057   gcc_assert (has_single_use (lhs));
1058   single_imm_use (lhs, &use, &use_stmt);
1059   if (lhs == *def)
1060     *def = op;
1061   SET_USE (use, op);
1062   if (TREE_CODE (op) != SSA_NAME)
1063     update_stmt (use_stmt);
1064   gsi = gsi_for_stmt (stmt);
1065   gsi_remove (&gsi, true);
1066   release_defs (stmt);
1067
1068   if (is_gimple_call (stmt))
1069     unlink_stmt_vdef (stmt);
1070 }
1071
1072 /* Walks the linear chain with result *DEF searching for an operation
1073    with operand OP and code OPCODE removing that from the chain.  *DEF
1074    is updated if there is only one operand but no operation left.  */
1075
1076 static void
1077 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1078 {
1079   gimple stmt = SSA_NAME_DEF_STMT (*def);
1080
1081   do
1082     {
1083       tree name;
1084
1085       if (opcode == MULT_EXPR
1086           && stmt_is_power_of_op (stmt, op))
1087         {
1088           if (decrement_power (stmt) == 1)
1089             propagate_op_to_single_use (op, stmt, def);
1090           return;
1091         }
1092
1093       name = gimple_assign_rhs1 (stmt);
1094
1095       /* If this is the operation we look for and one of the operands
1096          is ours simply propagate the other operand into the stmts
1097          single use.  */
1098       if (gimple_assign_rhs_code (stmt) == opcode
1099           && (name == op
1100               || gimple_assign_rhs2 (stmt) == op))
1101         {
1102           if (name == op)
1103             name = gimple_assign_rhs2 (stmt);
1104           propagate_op_to_single_use (name, stmt, def);
1105           return;
1106         }
1107
1108       /* We might have a multiply of two __builtin_pow* calls, and
1109          the operand might be hiding in the rightmost one.  */
1110       if (opcode == MULT_EXPR
1111           && gimple_assign_rhs_code (stmt) == opcode
1112           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1113         {
1114           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1115           if (stmt_is_power_of_op (stmt2, op))
1116             {
1117               if (decrement_power (stmt2) == 1)
1118                 propagate_op_to_single_use (op, stmt2, def);
1119               return;
1120             }
1121         }
1122
1123       /* Continue walking the chain.  */
1124       gcc_assert (name != op
1125                   && TREE_CODE (name) == SSA_NAME);
1126       stmt = SSA_NAME_DEF_STMT (name);
1127     }
1128   while (1);
1129 }
1130
1131 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1132    the result.  Places the statement after the definition of either
1133    OP1 or OP2.  Returns the new statement.  */
1134
1135 static gimple
1136 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1137 {
1138   gimple op1def = NULL, op2def = NULL;
1139   gimple_stmt_iterator gsi;
1140   tree op;
1141   gimple sum;
1142
1143   /* Create the addition statement.  */
1144   op = make_ssa_name (type, NULL);
1145   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1146
1147   /* Find an insertion place and insert.  */
1148   if (TREE_CODE (op1) == SSA_NAME)
1149     op1def = SSA_NAME_DEF_STMT (op1);
1150   if (TREE_CODE (op2) == SSA_NAME)
1151     op2def = SSA_NAME_DEF_STMT (op2);
1152   if ((!op1def || gimple_nop_p (op1def))
1153       && (!op2def || gimple_nop_p (op2def)))
1154     {
1155       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1156       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1157     }
1158   else if ((!op1def || gimple_nop_p (op1def))
1159            || (op2def && !gimple_nop_p (op2def)
1160                && stmt_dominates_stmt_p (op1def, op2def)))
1161     {
1162       if (gimple_code (op2def) == GIMPLE_PHI)
1163         {
1164           gsi = gsi_after_labels (gimple_bb (op2def));
1165           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1166         }
1167       else
1168         {
1169           if (!stmt_ends_bb_p (op2def))
1170             {
1171               gsi = gsi_for_stmt (op2def);
1172               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1173             }
1174           else
1175             {
1176               edge e;
1177               edge_iterator ei;
1178
1179               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1180                 if (e->flags & EDGE_FALLTHRU)
1181                   gsi_insert_on_edge_immediate (e, sum);
1182             }
1183         }
1184     }
1185   else
1186     {
1187       if (gimple_code (op1def) == GIMPLE_PHI)
1188         {
1189           gsi = gsi_after_labels (gimple_bb (op1def));
1190           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1191         }
1192       else
1193         {
1194           if (!stmt_ends_bb_p (op1def))
1195             {
1196               gsi = gsi_for_stmt (op1def);
1197               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1198             }
1199           else
1200             {
1201               edge e;
1202               edge_iterator ei;
1203
1204               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1205                 if (e->flags & EDGE_FALLTHRU)
1206                   gsi_insert_on_edge_immediate (e, sum);
1207             }
1208         }
1209     }
1210   update_stmt (sum);
1211
1212   return sum;
1213 }
1214
1215 /* Perform un-distribution of divisions and multiplications.
1216    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1217    to (A + B) / X for real X.
1218
1219    The algorithm is organized as follows.
1220
1221     - First we walk the addition chain *OPS looking for summands that
1222       are defined by a multiplication or a real division.  This results
1223       in the candidates bitmap with relevant indices into *OPS.
1224
1225     - Second we build the chains of multiplications or divisions for
1226       these candidates, counting the number of occurrences of (operand, code)
1227       pairs in all of the candidates chains.
1228
1229     - Third we sort the (operand, code) pairs by number of occurrence and
1230       process them starting with the pair with the most uses.
1231
1232       * For each such pair we walk the candidates again to build a
1233         second candidate bitmap noting all multiplication/division chains
1234         that have at least one occurrence of (operand, code).
1235
1236       * We build an alternate addition chain only covering these
1237         candidates with one (operand, code) operation removed from their
1238         multiplication/division chain.
1239
1240       * The first candidate gets replaced by the alternate addition chain
1241         multiplied/divided by the operand.
1242
1243       * All candidate chains get disabled for further processing and
1244         processing of (operand, code) pairs continues.
1245
1246   The alternate addition chains built are re-processed by the main
1247   reassociation algorithm which allows optimizing a * x * y + b * y * x
1248   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1249
1250 static bool
1251 undistribute_ops_list (enum tree_code opcode,
1252                        vec<operand_entry_t> *ops, struct loop *loop)
1253 {
1254   unsigned int length = ops->length ();
1255   operand_entry_t oe1;
1256   unsigned i, j;
1257   sbitmap candidates, candidates2;
1258   unsigned nr_candidates, nr_candidates2;
1259   sbitmap_iterator sbi0;
1260   vec<operand_entry_t> *subops;
1261   htab_t ctable;
1262   bool changed = false;
1263   int next_oecount_id = 0;
1264
1265   if (length <= 1
1266       || opcode != PLUS_EXPR)
1267     return false;
1268
1269   /* Build a list of candidates to process.  */
1270   candidates = sbitmap_alloc (length);
1271   bitmap_clear (candidates);
1272   nr_candidates = 0;
1273   FOR_EACH_VEC_ELT (*ops, i, oe1)
1274     {
1275       enum tree_code dcode;
1276       gimple oe1def;
1277
1278       if (TREE_CODE (oe1->op) != SSA_NAME)
1279         continue;
1280       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1281       if (!is_gimple_assign (oe1def))
1282         continue;
1283       dcode = gimple_assign_rhs_code (oe1def);
1284       if ((dcode != MULT_EXPR
1285            && dcode != RDIV_EXPR)
1286           || !is_reassociable_op (oe1def, dcode, loop))
1287         continue;
1288
1289       bitmap_set_bit (candidates, i);
1290       nr_candidates++;
1291     }
1292
1293   if (nr_candidates < 2)
1294     {
1295       sbitmap_free (candidates);
1296       return false;
1297     }
1298
1299   if (dump_file && (dump_flags & TDF_DETAILS))
1300     {
1301       fprintf (dump_file, "searching for un-distribute opportunities ");
1302       print_generic_expr (dump_file,
1303         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1304       fprintf (dump_file, " %d\n", nr_candidates);
1305     }
1306
1307   /* Build linearized sub-operand lists and the counting table.  */
1308   cvec.create (0);
1309   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1310   /* ??? Macro arguments cannot have multi-argument template types in
1311      them.  This typedef is needed to workaround that limitation.  */
1312   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1313   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1314   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1315     {
1316       gimple oedef;
1317       enum tree_code oecode;
1318       unsigned j;
1319
1320       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1321       oecode = gimple_assign_rhs_code (oedef);
1322       linearize_expr_tree (&subops[i], oedef,
1323                            associative_tree_code (oecode), false);
1324
1325       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1326         {
1327           oecount c;
1328           void **slot;
1329           size_t idx;
1330           c.oecode = oecode;
1331           c.cnt = 1;
1332           c.id = next_oecount_id++;
1333           c.op = oe1->op;
1334           cvec.safe_push (c);
1335           idx = cvec.length () + 41;
1336           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1337           if (!*slot)
1338             {
1339               *slot = (void *)idx;
1340             }
1341           else
1342             {
1343               cvec.pop ();
1344               cvec[(size_t)*slot - 42].cnt++;
1345             }
1346         }
1347     }
1348   htab_delete (ctable);
1349
1350   /* Sort the counting table.  */
1351   cvec.qsort (oecount_cmp);
1352
1353   if (dump_file && (dump_flags & TDF_DETAILS))
1354     {
1355       oecount *c;
1356       fprintf (dump_file, "Candidates:\n");
1357       FOR_EACH_VEC_ELT (cvec, j, c)
1358         {
1359           fprintf (dump_file, "  %u %s: ", c->cnt,
1360                    c->oecode == MULT_EXPR
1361                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1362           print_generic_expr (dump_file, c->op, 0);
1363           fprintf (dump_file, "\n");
1364         }
1365     }
1366
1367   /* Process the (operand, code) pairs in order of most occurence.  */
1368   candidates2 = sbitmap_alloc (length);
1369   while (!cvec.is_empty ())
1370     {
1371       oecount *c = &cvec.last ();
1372       if (c->cnt < 2)
1373         break;
1374
1375       /* Now collect the operands in the outer chain that contain
1376          the common operand in their inner chain.  */
1377       bitmap_clear (candidates2);
1378       nr_candidates2 = 0;
1379       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1380         {
1381           gimple oedef;
1382           enum tree_code oecode;
1383           unsigned j;
1384           tree op = (*ops)[i]->op;
1385
1386           /* If we undistributed in this chain already this may be
1387              a constant.  */
1388           if (TREE_CODE (op) != SSA_NAME)
1389             continue;
1390
1391           oedef = SSA_NAME_DEF_STMT (op);
1392           oecode = gimple_assign_rhs_code (oedef);
1393           if (oecode != c->oecode)
1394             continue;
1395
1396           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1397             {
1398               if (oe1->op == c->op)
1399                 {
1400                   bitmap_set_bit (candidates2, i);
1401                   ++nr_candidates2;
1402                   break;
1403                 }
1404             }
1405         }
1406
1407       if (nr_candidates2 >= 2)
1408         {
1409           operand_entry_t oe1, oe2;
1410           gimple prod;
1411           int first = bitmap_first_set_bit (candidates2);
1412
1413           /* Build the new addition chain.  */
1414           oe1 = (*ops)[first];
1415           if (dump_file && (dump_flags & TDF_DETAILS))
1416             {
1417               fprintf (dump_file, "Building (");
1418               print_generic_expr (dump_file, oe1->op, 0);
1419             }
1420           zero_one_operation (&oe1->op, c->oecode, c->op);
1421           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1422             {
1423               gimple sum;
1424               oe2 = (*ops)[i];
1425               if (dump_file && (dump_flags & TDF_DETAILS))
1426                 {
1427                   fprintf (dump_file, " + ");
1428                   print_generic_expr (dump_file, oe2->op, 0);
1429                 }
1430               zero_one_operation (&oe2->op, c->oecode, c->op);
1431               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1432                                        oe1->op, oe2->op, opcode);
1433               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1434               oe2->rank = 0;
1435               oe1->op = gimple_get_lhs (sum);
1436             }
1437
1438           /* Apply the multiplication/division.  */
1439           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1440                                     oe1->op, c->op, c->oecode);
1441           if (dump_file && (dump_flags & TDF_DETAILS))
1442             {
1443               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1444               print_generic_expr (dump_file, c->op, 0);
1445               fprintf (dump_file, "\n");
1446             }
1447
1448           /* Record it in the addition chain and disable further
1449              undistribution with this op.  */
1450           oe1->op = gimple_assign_lhs (prod);
1451           oe1->rank = get_rank (oe1->op);
1452           subops[first].release ();
1453
1454           changed = true;
1455         }
1456
1457       cvec.pop ();
1458     }
1459
1460   for (i = 0; i < ops->length (); ++i)
1461     subops[i].release ();
1462   free (subops);
1463   cvec.release ();
1464   sbitmap_free (candidates);
1465   sbitmap_free (candidates2);
1466
1467   return changed;
1468 }
1469
1470 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1471    expression, examine the other OPS to see if any of them are comparisons
1472    of the same values, which we may be able to combine or eliminate.
1473    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1474
1475 static bool
1476 eliminate_redundant_comparison (enum tree_code opcode,
1477                                 vec<operand_entry_t> *ops,
1478                                 unsigned int currindex,
1479                                 operand_entry_t curr)
1480 {
1481   tree op1, op2;
1482   enum tree_code lcode, rcode;
1483   gimple def1, def2;
1484   int i;
1485   operand_entry_t oe;
1486
1487   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1488     return false;
1489
1490   /* Check that CURR is a comparison.  */
1491   if (TREE_CODE (curr->op) != SSA_NAME)
1492     return false;
1493   def1 = SSA_NAME_DEF_STMT (curr->op);
1494   if (!is_gimple_assign (def1))
1495     return false;
1496   lcode = gimple_assign_rhs_code (def1);
1497   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1498     return false;
1499   op1 = gimple_assign_rhs1 (def1);
1500   op2 = gimple_assign_rhs2 (def1);
1501
1502   /* Now look for a similar comparison in the remaining OPS.  */
1503   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1504     {
1505       tree t;
1506
1507       if (TREE_CODE (oe->op) != SSA_NAME)
1508         continue;
1509       def2 = SSA_NAME_DEF_STMT (oe->op);
1510       if (!is_gimple_assign (def2))
1511         continue;
1512       rcode = gimple_assign_rhs_code (def2);
1513       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1514         continue;
1515
1516       /* If we got here, we have a match.  See if we can combine the
1517          two comparisons.  */
1518       if (opcode == BIT_IOR_EXPR)
1519         t = maybe_fold_or_comparisons (lcode, op1, op2,
1520                                        rcode, gimple_assign_rhs1 (def2),
1521                                        gimple_assign_rhs2 (def2));
1522       else
1523         t = maybe_fold_and_comparisons (lcode, op1, op2,
1524                                         rcode, gimple_assign_rhs1 (def2),
1525                                         gimple_assign_rhs2 (def2));
1526       if (!t)
1527         continue;
1528
1529       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1530          always give us a boolean_type_node value back.  If the original
1531          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1532          we need to convert.  */
1533       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1534         t = fold_convert (TREE_TYPE (curr->op), t);
1535
1536       if (TREE_CODE (t) != INTEGER_CST
1537           && !operand_equal_p (t, curr->op, 0))
1538         {
1539           enum tree_code subcode;
1540           tree newop1, newop2;
1541           if (!COMPARISON_CLASS_P (t))
1542             continue;
1543           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1544           STRIP_USELESS_TYPE_CONVERSION (newop1);
1545           STRIP_USELESS_TYPE_CONVERSION (newop2);
1546           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1547             continue;
1548         }
1549
1550       if (dump_file && (dump_flags & TDF_DETAILS))
1551         {
1552           fprintf (dump_file, "Equivalence: ");
1553           print_generic_expr (dump_file, curr->op, 0);
1554           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1555           print_generic_expr (dump_file, oe->op, 0);
1556           fprintf (dump_file, " -> ");
1557           print_generic_expr (dump_file, t, 0);
1558           fprintf (dump_file, "\n");
1559         }
1560
1561       /* Now we can delete oe, as it has been subsumed by the new combined
1562          expression t.  */
1563       ops->ordered_remove (i);
1564       reassociate_stats.ops_eliminated ++;
1565
1566       /* If t is the same as curr->op, we're done.  Otherwise we must
1567          replace curr->op with t.  Special case is if we got a constant
1568          back, in which case we add it to the end instead of in place of
1569          the current entry.  */
1570       if (TREE_CODE (t) == INTEGER_CST)
1571         {
1572           ops->ordered_remove (currindex);
1573           add_to_ops_vec (ops, t);
1574         }
1575       else if (!operand_equal_p (t, curr->op, 0))
1576         {
1577           gimple sum;
1578           enum tree_code subcode;
1579           tree newop1;
1580           tree newop2;
1581           gcc_assert (COMPARISON_CLASS_P (t));
1582           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1583           STRIP_USELESS_TYPE_CONVERSION (newop1);
1584           STRIP_USELESS_TYPE_CONVERSION (newop2);
1585           gcc_checking_assert (is_gimple_val (newop1)
1586                                && is_gimple_val (newop2));
1587           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1588           curr->op = gimple_get_lhs (sum);
1589         }
1590       return true;
1591     }
1592
1593   return false;
1594 }
1595
1596 /* Perform various identities and other optimizations on the list of
1597    operand entries, stored in OPS.  The tree code for the binary
1598    operation between all the operands is OPCODE.  */
1599
1600 static void
1601 optimize_ops_list (enum tree_code opcode,
1602                    vec<operand_entry_t> *ops)
1603 {
1604   unsigned int length = ops->length ();
1605   unsigned int i;
1606   operand_entry_t oe;
1607   operand_entry_t oelast = NULL;
1608   bool iterate = false;
1609
1610   if (length == 1)
1611     return;
1612
1613   oelast = ops->last ();
1614
1615   /* If the last two are constants, pop the constants off, merge them
1616      and try the next two.  */
1617   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1618     {
1619       operand_entry_t oelm1 = (*ops)[length - 2];
1620
1621       if (oelm1->rank == 0
1622           && is_gimple_min_invariant (oelm1->op)
1623           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1624                                        TREE_TYPE (oelast->op)))
1625         {
1626           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1627                                      oelm1->op, oelast->op);
1628
1629           if (folded && is_gimple_min_invariant (folded))
1630             {
1631               if (dump_file && (dump_flags & TDF_DETAILS))
1632                 fprintf (dump_file, "Merging constants\n");
1633
1634               ops->pop ();
1635               ops->pop ();
1636
1637               add_to_ops_vec (ops, folded);
1638               reassociate_stats.constants_eliminated++;
1639
1640               optimize_ops_list (opcode, ops);
1641               return;
1642             }
1643         }
1644     }
1645
1646   eliminate_using_constants (opcode, ops);
1647   oelast = NULL;
1648
1649   for (i = 0; ops->iterate (i, &oe);)
1650     {
1651       bool done = false;
1652
1653       if (eliminate_not_pairs (opcode, ops, i, oe))
1654         return;
1655       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1656           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1657           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1658         {
1659           if (done)
1660             return;
1661           iterate = true;
1662           oelast = NULL;
1663           continue;
1664         }
1665       oelast = oe;
1666       i++;
1667     }
1668
1669   length = ops->length ();
1670   oelast = ops->last ();
1671
1672   if (iterate)
1673     optimize_ops_list (opcode, ops);
1674 }
1675
1676 /* The following functions are subroutines to optimize_range_tests and allow
1677    it to try to change a logical combination of comparisons into a range
1678    test.
1679
1680    For example, both
1681         X == 2 || X == 5 || X == 3 || X == 4
1682    and
1683         X >= 2 && X <= 5
1684    are converted to
1685         (unsigned) (X - 2) <= 3
1686
1687    For more information see comments above fold_test_range in fold-const.c,
1688    this implementation is for GIMPLE.  */
1689
1690 struct range_entry
1691 {
1692   tree exp;
1693   tree low;
1694   tree high;
1695   bool in_p;
1696   bool strict_overflow_p;
1697   unsigned int idx, next;
1698 };
1699
1700 /* This is similar to make_range in fold-const.c, but on top of
1701    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1702    an SSA_NAME and STMT argument is ignored, otherwise STMT
1703    argument should be a GIMPLE_COND.  */
1704
1705 static void
1706 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1707 {
1708   int in_p;
1709   tree low, high;
1710   bool is_bool, strict_overflow_p;
1711
1712   r->exp = NULL_TREE;
1713   r->in_p = false;
1714   r->strict_overflow_p = false;
1715   r->low = NULL_TREE;
1716   r->high = NULL_TREE;
1717   if (exp != NULL_TREE
1718       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1719     return;
1720
1721   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1722      and see if we can refine the range.  Some of the cases below may not
1723      happen, but it doesn't seem worth worrying about this.  We "continue"
1724      the outer loop when we've changed something; otherwise we "break"
1725      the switch, which will "break" the while.  */
1726   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1727   high = low;
1728   in_p = 0;
1729   strict_overflow_p = false;
1730   is_bool = false;
1731   if (exp == NULL_TREE)
1732     is_bool = true;
1733   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1734     {
1735       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1736         is_bool = true;
1737       else
1738         return;
1739     }
1740   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1741     is_bool = true;
1742
1743   while (1)
1744     {
1745       enum tree_code code;
1746       tree arg0, arg1, exp_type;
1747       tree nexp;
1748       location_t loc;
1749
1750       if (exp != NULL_TREE)
1751         {
1752           if (TREE_CODE (exp) != SSA_NAME)
1753             break;
1754
1755           stmt = SSA_NAME_DEF_STMT (exp);
1756           if (!is_gimple_assign (stmt))
1757             break;
1758
1759           code = gimple_assign_rhs_code (stmt);
1760           arg0 = gimple_assign_rhs1 (stmt);
1761           arg1 = gimple_assign_rhs2 (stmt);
1762           exp_type = TREE_TYPE (exp);
1763         }
1764       else
1765         {
1766           code = gimple_cond_code (stmt);
1767           arg0 = gimple_cond_lhs (stmt);
1768           arg1 = gimple_cond_rhs (stmt);
1769           exp_type = boolean_type_node;
1770         }
1771
1772       if (TREE_CODE (arg0) != SSA_NAME)
1773         break;
1774       loc = gimple_location (stmt);
1775       switch (code)
1776         {
1777         case BIT_NOT_EXPR:
1778           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1779             {
1780               in_p = !in_p;
1781               exp = arg0;
1782               continue;
1783             }
1784           break;
1785         case SSA_NAME:
1786           exp = arg0;
1787           continue;
1788         CASE_CONVERT:
1789           if (is_bool)
1790             goto do_default;
1791           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1792             {
1793               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1794                 is_bool = true;
1795               else
1796                 return;
1797             }
1798           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1799             is_bool = true;
1800           goto do_default;
1801         case EQ_EXPR:
1802         case NE_EXPR:
1803         case LT_EXPR:
1804         case LE_EXPR:
1805         case GE_EXPR:
1806         case GT_EXPR:
1807           is_bool = true;
1808           /* FALLTHRU */
1809         default:
1810           if (!is_bool)
1811             return;
1812         do_default:
1813           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1814                                   &low, &high, &in_p,
1815                                   &strict_overflow_p);
1816           if (nexp != NULL_TREE)
1817             {
1818               exp = nexp;
1819               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1820               continue;
1821             }
1822           break;
1823         }
1824       break;
1825     }
1826   if (is_bool)
1827     {
1828       r->exp = exp;
1829       r->in_p = in_p;
1830       r->low = low;
1831       r->high = high;
1832       r->strict_overflow_p = strict_overflow_p;
1833     }
1834 }
1835
1836 /* Comparison function for qsort.  Sort entries
1837    without SSA_NAME exp first, then with SSA_NAMEs sorted
1838    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1839    by increasing ->low and if ->low is the same, by increasing
1840    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1841    maximum.  */
1842
1843 static int
1844 range_entry_cmp (const void *a, const void *b)
1845 {
1846   const struct range_entry *p = (const struct range_entry *) a;
1847   const struct range_entry *q = (const struct range_entry *) b;
1848
1849   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1850     {
1851       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1852         {
1853           /* Group range_entries for the same SSA_NAME together.  */
1854           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1855             return -1;
1856           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1857             return 1;
1858           /* If ->low is different, NULL low goes first, then by
1859              ascending low.  */
1860           if (p->low != NULL_TREE)
1861             {
1862               if (q->low != NULL_TREE)
1863                 {
1864                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1865                                           p->low, q->low);
1866                   if (tem && integer_onep (tem))
1867                     return -1;
1868                   tem = fold_binary (GT_EXPR, boolean_type_node,
1869                                      p->low, q->low);
1870                   if (tem && integer_onep (tem))
1871                     return 1;
1872                 }
1873               else
1874                 return 1;
1875             }
1876           else if (q->low != NULL_TREE)
1877             return -1;
1878           /* If ->high is different, NULL high goes last, before that by
1879              ascending high.  */
1880           if (p->high != NULL_TREE)
1881             {
1882               if (q->high != NULL_TREE)
1883                 {
1884                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1885                                           p->high, q->high);
1886                   if (tem && integer_onep (tem))
1887                     return -1;
1888                   tem = fold_binary (GT_EXPR, boolean_type_node,
1889                                      p->high, q->high);
1890                   if (tem && integer_onep (tem))
1891                     return 1;
1892                 }
1893               else
1894                 return -1;
1895             }
1896           else if (p->high != NULL_TREE)
1897             return 1;
1898           /* If both ranges are the same, sort below by ascending idx.  */
1899         }
1900       else
1901         return 1;
1902     }
1903   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1904     return -1;
1905
1906   if (p->idx < q->idx)
1907     return -1;
1908   else
1909     {
1910       gcc_checking_assert (p->idx > q->idx);
1911       return 1;
1912     }
1913 }
1914
1915 /* Helper routine of optimize_range_test.
1916    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1917    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1918    OPCODE and OPS are arguments of optimize_range_tests.  Return
1919    true if the range merge has been successful.
1920    If OPCODE is ERROR_MARK, this is called from within
1921    maybe_optimize_range_tests and is performing inter-bb range optimization.
1922    Changes should be then performed right away, and whether an op is
1923    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1924
1925 static bool
1926 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1927                    unsigned int count, enum tree_code opcode,
1928                    vec<operand_entry_t> *ops, tree exp, bool in_p,
1929                    tree low, tree high, bool strict_overflow_p)
1930 {
1931   operand_entry_t oe = (*ops)[range->idx];
1932   tree op = oe->op;
1933   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1934   location_t loc = gimple_location (stmt);
1935   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1936   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1937   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1938   gimple_stmt_iterator gsi;
1939
1940   if (tem == NULL_TREE)
1941     return false;
1942
1943   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1944     warning_at (loc, OPT_Wstrict_overflow,
1945                 "assuming signed overflow does not occur "
1946                 "when simplifying range test");
1947
1948   if (dump_file && (dump_flags & TDF_DETAILS))
1949     {
1950       struct range_entry *r;
1951       fprintf (dump_file, "Optimizing range tests ");
1952       print_generic_expr (dump_file, range->exp, 0);
1953       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1954       print_generic_expr (dump_file, range->low, 0);
1955       fprintf (dump_file, ", ");
1956       print_generic_expr (dump_file, range->high, 0);
1957       fprintf (dump_file, "]");
1958       for (r = otherrange; r < otherrange + count; r++)
1959         {
1960           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1961           print_generic_expr (dump_file, r->low, 0);
1962           fprintf (dump_file, ", ");
1963           print_generic_expr (dump_file, r->high, 0);
1964           fprintf (dump_file, "]");
1965         }
1966       fprintf (dump_file, "\n into ");
1967       print_generic_expr (dump_file, tem, 0);
1968       fprintf (dump_file, "\n");
1969     }
1970
1971   if (opcode == BIT_IOR_EXPR
1972       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1973     tem = invert_truthvalue_loc (loc, tem);
1974
1975   tem = fold_convert_loc (loc, optype, tem);
1976   gsi = gsi_for_stmt (stmt);
1977   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1978                                   GSI_SAME_STMT);
1979
1980   /* If doing inter-bb range test optimization, update the
1981      stmts immediately.  Start with changing the first range test
1982      immediate use to the new value (TEM), or, if the first range
1983      test is a GIMPLE_COND stmt, change that condition.  */
1984   if (opcode == ERROR_MARK)
1985     {
1986       if (op)
1987         {
1988           imm_use_iterator iter;
1989           use_operand_p use_p;
1990           gimple use_stmt;
1991
1992           FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1993             {
1994               if (is_gimple_debug (use_stmt))
1995                 continue;
1996               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1997                 SET_USE (use_p, tem);
1998               update_stmt (use_stmt);
1999             }
2000         }
2001       else
2002         {
2003           gimple_cond_set_code (stmt, NE_EXPR);
2004           gimple_cond_set_lhs (stmt, tem);
2005           gimple_cond_set_rhs (stmt, boolean_false_node);
2006           update_stmt (stmt);
2007         }
2008     }
2009   oe->op = tem;
2010   range->exp = exp;
2011   range->low = low;
2012   range->high = high;
2013   range->in_p = in_p;
2014   range->strict_overflow_p = false;
2015
2016   for (range = otherrange; range < otherrange + count; range++)
2017     {
2018       oe = (*ops)[range->idx];
2019       /* Now change all the other range test immediate uses, so that
2020          those tests will be optimized away.  */
2021       if (opcode == ERROR_MARK)
2022         {
2023           if (oe->op)
2024             {
2025               imm_use_iterator iter;
2026               use_operand_p use_p;
2027               gimple use_stmt;
2028
2029               FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2030                 {
2031                   if (is_gimple_debug (use_stmt))
2032                     continue;
2033                   /* If imm use of _8 is a statement like _7 = _8 | _9;,
2034                      adjust it into _7 = _9;.  */
2035                   if (is_gimple_assign (use_stmt)
2036                       && gimple_assign_rhs_code (use_stmt) == oe->rank)
2037                     {
2038                       tree expr = NULL_TREE;
2039                       if (oe->op == gimple_assign_rhs1 (use_stmt))
2040                         expr = gimple_assign_rhs2 (use_stmt);
2041                       else if (oe->op == gimple_assign_rhs2 (use_stmt))
2042                         expr = gimple_assign_rhs1 (use_stmt);
2043                       if (expr
2044                           && expr != oe->op
2045                           && TREE_CODE (expr) == SSA_NAME)
2046                         {
2047                           gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2048                           gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2049                                                           expr, NULL_TREE);
2050                           update_stmt (use_stmt);
2051                           continue;
2052                         }
2053                     }
2054                   /* If imm use of _8 is a statement like _7 = (int) _8;,
2055                      adjust it into _7 = 0; or _7 = 1;.  */
2056                   if (gimple_assign_cast_p (use_stmt)
2057                       && oe->op == gimple_assign_rhs1 (use_stmt))
2058                     {
2059                       tree lhs = gimple_assign_lhs (use_stmt);
2060                       if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2061                         {
2062                           gimple_stmt_iterator gsi2
2063                             = gsi_for_stmt (use_stmt);
2064                           tree expr = build_int_cst (TREE_TYPE (lhs),
2065                                                      oe->rank == BIT_IOR_EXPR
2066                                                      ? 0 : 1);
2067                           gimple_assign_set_rhs_with_ops (&gsi2,
2068                                                           INTEGER_CST,
2069                                                           expr, NULL_TREE);
2070                           update_stmt (use_stmt);
2071                           continue;
2072                         }
2073                     }
2074                   /* Otherwise replace the use with 0 or 1.  */
2075                   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2076                     SET_USE (use_p,
2077                              build_int_cst (TREE_TYPE (oe->op),
2078                                             oe->rank == BIT_IOR_EXPR
2079                                             ? 0 : 1));
2080                   update_stmt (use_stmt);
2081                 }
2082             }
2083           else
2084             {
2085               /* If range test was a GIMPLE_COND, simply change it
2086                  into an always false or always true condition.  */
2087               stmt = last_stmt (BASIC_BLOCK (oe->id));
2088               if (oe->rank == BIT_IOR_EXPR)
2089                 gimple_cond_make_false (stmt);
2090               else
2091                 gimple_cond_make_true (stmt);
2092               update_stmt (stmt);
2093             }
2094         }
2095       oe->op = error_mark_node;
2096       range->exp = NULL_TREE;
2097     }
2098   return true;
2099 }
2100
2101 /* Optimize range tests, similarly how fold_range_test optimizes
2102    it on trees.  The tree code for the binary
2103    operation between all the operands is OPCODE.
2104    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2105    maybe_optimize_range_tests for inter-bb range optimization.
2106    In that case if oe->op is NULL, oe->id is bb->index whose
2107    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2108    the actual opcode.  */
2109
2110 static void
2111 optimize_range_tests (enum tree_code opcode,
2112                       vec<operand_entry_t> *ops)
2113 {
2114   unsigned int length = ops->length (), i, j, first;
2115   operand_entry_t oe;
2116   struct range_entry *ranges;
2117   bool any_changes = false;
2118
2119   if (length == 1)
2120     return;
2121
2122   ranges = XNEWVEC (struct range_entry, length);
2123   for (i = 0; i < length; i++)
2124     {
2125       oe = (*ops)[i];
2126       ranges[i].idx = i;
2127       init_range_entry (ranges + i, oe->op,
2128                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2129       /* For | invert it now, we will invert it again before emitting
2130          the optimized expression.  */
2131       if (opcode == BIT_IOR_EXPR
2132           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2133         ranges[i].in_p = !ranges[i].in_p;
2134     }
2135
2136   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2137   for (i = 0; i < length; i++)
2138     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2139       break;
2140
2141   /* Try to merge ranges.  */
2142   for (first = i; i < length; i++)
2143     {
2144       tree low = ranges[i].low;
2145       tree high = ranges[i].high;
2146       int in_p = ranges[i].in_p;
2147       bool strict_overflow_p = ranges[i].strict_overflow_p;
2148       int update_fail_count = 0;
2149
2150       for (j = i + 1; j < length; j++)
2151         {
2152           if (ranges[i].exp != ranges[j].exp)
2153             break;
2154           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2155                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2156             break;
2157           strict_overflow_p |= ranges[j].strict_overflow_p;
2158         }
2159
2160       if (j == i + 1)
2161         continue;
2162
2163       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2164                              ops, ranges[i].exp, in_p, low, high,
2165                              strict_overflow_p))
2166         {
2167           i = j - 1;
2168           any_changes = true;
2169         }
2170       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2171          while update_range_test would fail.  */
2172       else if (update_fail_count == 64)
2173         i = j - 1;
2174       else
2175         ++update_fail_count;
2176     }
2177
2178   /* Optimize X == CST1 || X == CST2
2179      if popcount (CST1 ^ CST2) == 1 into
2180      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2181      Similarly for ranges.  E.g.
2182      X != 2 && X != 3 && X != 10 && X != 11
2183      will be transformed by the above loop into
2184      (X - 2U) <= 1U && (X - 10U) <= 1U
2185      and this loop can transform that into
2186      ((X & ~8) - 2U) <= 1U.  */
2187   for (i = first; i < length; i++)
2188     {
2189       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2190
2191       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2192         continue;
2193       type = TREE_TYPE (ranges[i].exp);
2194       if (!INTEGRAL_TYPE_P (type))
2195         continue;
2196       lowi = ranges[i].low;
2197       if (lowi == NULL_TREE)
2198         lowi = TYPE_MIN_VALUE (type);
2199       highi = ranges[i].high;
2200       if (highi == NULL_TREE)
2201         continue;
2202       for (j = i + 1; j < length && j < i + 64; j++)
2203         {
2204           if (ranges[j].exp == NULL_TREE)
2205             continue;
2206           if (ranges[i].exp != ranges[j].exp)
2207             break;
2208           if (ranges[j].in_p)
2209             continue;
2210           lowj = ranges[j].low;
2211           if (lowj == NULL_TREE)
2212             continue;
2213           highj = ranges[j].high;
2214           if (highj == NULL_TREE)
2215             highj = TYPE_MAX_VALUE (type);
2216           tem = fold_binary (GT_EXPR, boolean_type_node,
2217                              lowj, highi);
2218           if (tem == NULL_TREE || !integer_onep (tem))
2219             continue;
2220           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2221           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2222             continue;
2223           gcc_checking_assert (!integer_zerop (lowxor));
2224           tem = fold_binary (MINUS_EXPR, type, lowxor,
2225                              build_int_cst (type, 1));
2226           if (tem == NULL_TREE)
2227             continue;
2228           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2229           if (tem == NULL_TREE || !integer_zerop (tem))
2230             continue;
2231           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2232           if (!tree_int_cst_equal (lowxor, highxor))
2233             continue;
2234           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2235           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2236           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2237           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2238           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2239                                  ranges[i].in_p, lowj, highj,
2240                                  ranges[i].strict_overflow_p
2241                                  || ranges[j].strict_overflow_p))
2242             {
2243               any_changes = true;
2244               break;
2245             }
2246         }
2247     }
2248
2249   if (any_changes && opcode != ERROR_MARK)
2250     {
2251       j = 0;
2252       FOR_EACH_VEC_ELT (*ops, i, oe)
2253         {
2254           if (oe->op == error_mark_node)
2255             continue;
2256           else if (i != j)
2257             (*ops)[j] = oe;
2258           j++;
2259         }
2260       ops->truncate (j);
2261     }
2262
2263   XDELETEVEC (ranges);
2264 }
2265
2266 /* Return true if STMT is a cast like:
2267    <bb N>:
2268    ...
2269    _123 = (int) _234;
2270
2271    <bb M>:
2272    # _345 = PHI <_123(N), 1(...), 1(...)>
2273    where _234 has bool type, _123 has single use and
2274    bb N has a single successor M.  This is commonly used in
2275    the last block of a range test.  */
2276
2277 static bool
2278 final_range_test_p (gimple stmt)
2279 {
2280   basic_block bb, rhs_bb;
2281   edge e;
2282   tree lhs, rhs;
2283   use_operand_p use_p;
2284   gimple use_stmt;
2285
2286   if (!gimple_assign_cast_p (stmt))
2287     return false;
2288   bb = gimple_bb (stmt);
2289   if (!single_succ_p (bb))
2290     return false;
2291   e = single_succ_edge (bb);
2292   if (e->flags & EDGE_COMPLEX)
2293     return false;
2294
2295   lhs = gimple_assign_lhs (stmt);
2296   rhs = gimple_assign_rhs1 (stmt);
2297   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2298       || TREE_CODE (rhs) != SSA_NAME
2299       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2300     return false;
2301
2302   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2303   if (!single_imm_use (lhs, &use_p, &use_stmt))
2304     return false;
2305
2306   if (gimple_code (use_stmt) != GIMPLE_PHI
2307       || gimple_bb (use_stmt) != e->dest)
2308     return false;
2309
2310   /* And that the rhs is defined in the same loop.  */
2311   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2312   if (rhs_bb == NULL
2313       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2314     return false;
2315
2316   return true;
2317 }
2318
2319 /* Return true if BB is suitable basic block for inter-bb range test
2320    optimization.  If BACKWARD is true, BB should be the only predecessor
2321    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2322    or compared with to find a common basic block to which all conditions
2323    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2324    be the only predecessor of BB.  */
2325
2326 static bool
2327 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2328                   bool backward)
2329 {
2330   edge_iterator ei, ei2;
2331   edge e, e2;
2332   gimple stmt;
2333   gimple_stmt_iterator gsi;
2334   bool other_edge_seen = false;
2335   bool is_cond;
2336
2337   if (test_bb == bb)
2338     return false;
2339   /* Check last stmt first.  */
2340   stmt = last_stmt (bb);
2341   if (stmt == NULL
2342       || (gimple_code (stmt) != GIMPLE_COND
2343           && (backward || !final_range_test_p (stmt)))
2344       || gimple_visited_p (stmt)
2345       || stmt_could_throw_p (stmt)
2346       || *other_bb == bb)
2347     return false;
2348   is_cond = gimple_code (stmt) == GIMPLE_COND;
2349   if (is_cond)
2350     {
2351       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2352          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2353          to *OTHER_BB (if not set yet, try to find it out).  */
2354       if (EDGE_COUNT (bb->succs) != 2)
2355         return false;
2356       FOR_EACH_EDGE (e, ei, bb->succs)
2357         {
2358           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2359             return false;
2360           if (e->dest == test_bb)
2361             {
2362               if (backward)
2363                 continue;
2364               else
2365                 return false;
2366             }
2367           if (e->dest == bb)
2368             return false;
2369           if (*other_bb == NULL)
2370             {
2371               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2372                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2373                   return false;
2374                 else if (e->dest == e2->dest)
2375                   *other_bb = e->dest;
2376               if (*other_bb == NULL)
2377                 return false;
2378             }
2379           if (e->dest == *other_bb)
2380             other_edge_seen = true;
2381           else if (backward)
2382             return false;
2383         }
2384       if (*other_bb == NULL || !other_edge_seen)
2385         return false;
2386     }
2387   else if (single_succ (bb) != *other_bb)
2388     return false;
2389
2390   /* Now check all PHIs of *OTHER_BB.  */
2391   e = find_edge (bb, *other_bb);
2392   e2 = find_edge (test_bb, *other_bb);
2393   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2394     {
2395       gimple phi = gsi_stmt (gsi);
2396       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2397          corresponding to BB and TEST_BB predecessor must be the same.  */
2398       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2399                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2400         {
2401           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2402              one of the PHIs should have the lhs of the last stmt in
2403              that block as PHI arg and that PHI should have 0 or 1
2404              corresponding to it in all other range test basic blocks
2405              considered.  */
2406           if (!is_cond)
2407             {
2408               if (gimple_phi_arg_def (phi, e->dest_idx)
2409                   == gimple_assign_lhs (stmt)
2410                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2411                       || integer_onep (gimple_phi_arg_def (phi,
2412                                                            e2->dest_idx))))
2413                 continue;
2414             }
2415           else
2416             {
2417               gimple test_last = last_stmt (test_bb);
2418               if (gimple_code (test_last) != GIMPLE_COND
2419                   && gimple_phi_arg_def (phi, e2->dest_idx)
2420                      == gimple_assign_lhs (test_last)
2421                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2422                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2423                 continue;
2424             }
2425
2426           return false;
2427         }
2428     }
2429   return true;
2430 }
2431
2432 /* Return true if BB doesn't have side-effects that would disallow
2433    range test optimization, all SSA_NAMEs set in the bb are consumed
2434    in the bb and there are no PHIs.  */
2435
2436 static bool
2437 no_side_effect_bb (basic_block bb)
2438 {
2439   gimple_stmt_iterator gsi;
2440   gimple last;
2441
2442   if (!gimple_seq_empty_p (phi_nodes (bb)))
2443     return false;
2444   last = last_stmt (bb);
2445   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2446     {
2447       gimple stmt = gsi_stmt (gsi);
2448       tree lhs;
2449       imm_use_iterator imm_iter;
2450       use_operand_p use_p;
2451
2452       if (is_gimple_debug (stmt))
2453         continue;
2454       if (gimple_has_side_effects (stmt))
2455         return false;
2456       if (stmt == last)
2457         return true;
2458       if (!is_gimple_assign (stmt))
2459         return false;
2460       lhs = gimple_assign_lhs (stmt);
2461       if (TREE_CODE (lhs) != SSA_NAME)
2462         return false;
2463       if (gimple_assign_rhs_could_trap_p (stmt))
2464         return false;
2465       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2466         {
2467           gimple use_stmt = USE_STMT (use_p);
2468           if (is_gimple_debug (use_stmt))
2469             continue;
2470           if (gimple_bb (use_stmt) != bb)
2471             return false;
2472         }
2473     }
2474   return false;
2475 }
2476
2477 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2478    return true and fill in *OPS recursively.  */
2479
2480 static bool
2481 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2482          struct loop *loop)
2483 {
2484   gimple stmt = SSA_NAME_DEF_STMT (var);
2485   tree rhs[2];
2486   int i;
2487
2488   if (!is_reassociable_op (stmt, code, loop))
2489     return false;
2490
2491   rhs[0] = gimple_assign_rhs1 (stmt);
2492   rhs[1] = gimple_assign_rhs2 (stmt);
2493   gimple_set_visited (stmt, true);
2494   for (i = 0; i < 2; i++)
2495     if (TREE_CODE (rhs[i]) == SSA_NAME
2496         && !get_ops (rhs[i], code, ops, loop)
2497         && has_single_use (rhs[i]))
2498       {
2499         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2500
2501         oe->op = rhs[i];
2502         oe->rank = code;
2503         oe->id = 0;
2504         oe->count = 1;
2505         ops->safe_push (oe);
2506       }
2507   return true;
2508 }
2509
2510 /* Inter-bb range test optimization.  */
2511
2512 static void
2513 maybe_optimize_range_tests (gimple stmt)
2514 {
2515   basic_block first_bb = gimple_bb (stmt);
2516   basic_block last_bb = first_bb;
2517   basic_block other_bb = NULL;
2518   basic_block bb;
2519   edge_iterator ei;
2520   edge e;
2521   vec<operand_entry_t> ops = vNULL;
2522
2523   /* Consider only basic blocks that end with GIMPLE_COND or
2524      a cast statement satisfying final_range_test_p.  All
2525      but the last bb in the first_bb .. last_bb range
2526      should end with GIMPLE_COND.  */
2527   if (gimple_code (stmt) == GIMPLE_COND)
2528     {
2529       if (EDGE_COUNT (first_bb->succs) != 2)
2530         return;
2531     }
2532   else if (final_range_test_p (stmt))
2533     other_bb = single_succ (first_bb);
2534   else
2535     return;
2536
2537   if (stmt_could_throw_p (stmt))
2538     return;
2539
2540   /* As relative ordering of post-dominator sons isn't fixed,
2541      maybe_optimize_range_tests can be called first on any
2542      bb in the range we want to optimize.  So, start searching
2543      backwards, if first_bb can be set to a predecessor.  */
2544   while (single_pred_p (first_bb))
2545     {
2546       basic_block pred_bb = single_pred (first_bb);
2547       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2548         break;
2549       if (!no_side_effect_bb (first_bb))
2550         break;
2551       first_bb = pred_bb;
2552     }
2553   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2554      Before starting forward search in last_bb successors, find
2555      out the other_bb.  */
2556   if (first_bb == last_bb)
2557     {
2558       other_bb = NULL;
2559       /* As non-GIMPLE_COND last stmt always terminates the range,
2560          if forward search didn't discover anything, just give up.  */
2561       if (gimple_code (stmt) != GIMPLE_COND)
2562         return;
2563       /* Look at both successors.  Either it ends with a GIMPLE_COND
2564          and satisfies suitable_cond_bb, or ends with a cast and
2565          other_bb is that cast's successor.  */
2566       FOR_EACH_EDGE (e, ei, first_bb->succs)
2567         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2568             || e->dest == first_bb)
2569           return;
2570         else if (single_pred_p (e->dest))
2571           {
2572             stmt = last_stmt (e->dest);
2573             if (stmt
2574                 && gimple_code (stmt) == GIMPLE_COND
2575                 && EDGE_COUNT (e->dest->succs) == 2)
2576               {
2577                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2578                   break;
2579                 else
2580                   other_bb = NULL;
2581               }
2582             else if (stmt
2583                      && final_range_test_p (stmt)
2584                      && find_edge (first_bb, single_succ (e->dest)))
2585               {
2586                 other_bb = single_succ (e->dest);
2587                 if (other_bb == first_bb)
2588                   other_bb = NULL;
2589               }
2590           }
2591       if (other_bb == NULL)
2592         return;
2593     }
2594   /* Now do the forward search, moving last_bb to successor bbs
2595      that aren't other_bb.  */
2596   while (EDGE_COUNT (last_bb->succs) == 2)
2597     {
2598       FOR_EACH_EDGE (e, ei, last_bb->succs)
2599         if (e->dest != other_bb)
2600           break;
2601       if (e == NULL)
2602         break;
2603       if (!single_pred_p (e->dest))
2604         break;
2605       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2606         break;
2607       if (!no_side_effect_bb (e->dest))
2608         break;
2609       last_bb = e->dest;
2610     }
2611   if (first_bb == last_bb)
2612     return;
2613   /* Here basic blocks first_bb through last_bb's predecessor
2614      end with GIMPLE_COND, all of them have one of the edges to
2615      other_bb and another to another block in the range,
2616      all blocks except first_bb don't have side-effects and
2617      last_bb ends with either GIMPLE_COND, or cast satisfying
2618      final_range_test_p.  */
2619   for (bb = last_bb; ; bb = single_pred (bb))
2620     {
2621       enum tree_code code;
2622       tree lhs, rhs;
2623
2624       e = find_edge (bb, other_bb);
2625       stmt = last_stmt (bb);
2626       gimple_set_visited (stmt, true);
2627       if (gimple_code (stmt) != GIMPLE_COND)
2628         {
2629           use_operand_p use_p;
2630           gimple phi;
2631           edge e2;
2632           unsigned int d;
2633
2634           lhs = gimple_assign_lhs (stmt);
2635           rhs = gimple_assign_rhs1 (stmt);
2636           gcc_assert (bb == last_bb);
2637
2638           /* stmt is
2639              _123 = (int) _234;
2640
2641              followed by:
2642              <bb M>:
2643              # _345 = PHI <_123(N), 1(...), 1(...)>
2644
2645              or 0 instead of 1.  If it is 0, the _234
2646              range test is anded together with all the
2647              other range tests, if it is 1, it is ored with
2648              them.  */
2649           single_imm_use (lhs, &use_p, &phi);
2650           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2651           e2 = find_edge (first_bb, other_bb);
2652           d = e2->dest_idx;
2653           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2654           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2655             code = BIT_AND_EXPR;
2656           else
2657             {
2658               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2659               code = BIT_IOR_EXPR;
2660             }
2661
2662           /* If _234 SSA_NAME_DEF_STMT is
2663              _234 = _567 | _789;
2664              (or &, corresponding to 1/0 in the phi arguments,
2665              push into ops the individual range test arguments
2666              of the bitwise or resp. and, recursively.  */
2667           if (!get_ops (rhs, code, &ops,
2668                         loop_containing_stmt (stmt))
2669               && has_single_use (rhs))
2670             {
2671               /* Otherwise, push the _234 range test itself.  */
2672               operand_entry_t oe
2673                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2674
2675               oe->op = rhs;
2676               oe->rank = code;
2677               oe->id = 0;
2678               oe->count = 1;
2679               ops.safe_push (oe);
2680             }
2681           continue;
2682         }
2683       /* Otherwise stmt is GIMPLE_COND.  */
2684       code = gimple_cond_code (stmt);
2685       lhs = gimple_cond_lhs (stmt);
2686       rhs = gimple_cond_rhs (stmt);
2687       if (TREE_CODE (lhs) == SSA_NAME
2688           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2689           && ((code != EQ_EXPR && code != NE_EXPR)
2690               || rhs != boolean_false_node
2691                  /* Either push into ops the individual bitwise
2692                     or resp. and operands, depending on which
2693                     edge is other_bb.  */
2694               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2695                                  ^ (code == EQ_EXPR))
2696                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2697                            loop_containing_stmt (stmt))))
2698         {
2699           /* Or push the GIMPLE_COND stmt itself.  */
2700           operand_entry_t oe
2701             = (operand_entry_t) pool_alloc (operand_entry_pool);
2702
2703           oe->op = NULL;
2704           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2705                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2706           /* oe->op = NULL signs that there is no SSA_NAME
2707              for the range test, and oe->id instead is the
2708              basic block number, at which's end the GIMPLE_COND
2709              is.  */
2710           oe->id = bb->index;
2711           oe->count = 1;
2712           ops.safe_push (oe);
2713         }
2714       if (bb == first_bb)
2715         break;
2716     }
2717   if (ops.length () > 1)
2718     optimize_range_tests (ERROR_MARK, &ops);
2719   ops.release ();
2720 }
2721
2722 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2723    of STMT in it's operands.  This is also known as a "destructive
2724    update" operation.  */
2725
2726 static bool
2727 is_phi_for_stmt (gimple stmt, tree operand)
2728 {
2729   gimple def_stmt;
2730   tree lhs;
2731   use_operand_p arg_p;
2732   ssa_op_iter i;
2733
2734   if (TREE_CODE (operand) != SSA_NAME)
2735     return false;
2736
2737   lhs = gimple_assign_lhs (stmt);
2738
2739   def_stmt = SSA_NAME_DEF_STMT (operand);
2740   if (gimple_code (def_stmt) != GIMPLE_PHI)
2741     return false;
2742
2743   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2744     if (lhs == USE_FROM_PTR (arg_p))
2745       return true;
2746   return false;
2747 }
2748
2749 /* Remove def stmt of VAR if VAR has zero uses and recurse
2750    on rhs1 operand if so.  */
2751
2752 static void
2753 remove_visited_stmt_chain (tree var)
2754 {
2755   gimple stmt;
2756   gimple_stmt_iterator gsi;
2757
2758   while (1)
2759     {
2760       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2761         return;
2762       stmt = SSA_NAME_DEF_STMT (var);
2763       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2764         {
2765           var = gimple_assign_rhs1 (stmt);
2766           gsi = gsi_for_stmt (stmt);
2767           gsi_remove (&gsi, true);
2768           release_defs (stmt);
2769         }
2770       else
2771         return;
2772     }
2773 }
2774
2775 /* This function checks three consequtive operands in
2776    passed operands vector OPS starting from OPINDEX and
2777    swaps two operands if it is profitable for binary operation
2778    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2779
2780    We pair ops with the same rank if possible.
2781
2782    The alternative we try is to see if STMT is a destructive
2783    update style statement, which is like:
2784    b = phi (a, ...)
2785    a = c + b;
2786    In that case, we want to use the destructive update form to
2787    expose the possible vectorizer sum reduction opportunity.
2788    In that case, the third operand will be the phi node. This
2789    check is not performed if STMT is null.
2790
2791    We could, of course, try to be better as noted above, and do a
2792    lot of work to try to find these opportunities in >3 operand
2793    cases, but it is unlikely to be worth it.  */
2794
2795 static void
2796 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2797                           unsigned int opindex, gimple stmt)
2798 {
2799   operand_entry_t oe1, oe2, oe3;
2800
2801   oe1 = ops[opindex];
2802   oe2 = ops[opindex + 1];
2803   oe3 = ops[opindex + 2];
2804
2805   if ((oe1->rank == oe2->rank
2806        && oe2->rank != oe3->rank)
2807       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2808           && !is_phi_for_stmt (stmt, oe1->op)
2809           && !is_phi_for_stmt (stmt, oe2->op)))
2810     {
2811       struct operand_entry temp = *oe3;
2812       oe3->op = oe1->op;
2813       oe3->rank = oe1->rank;
2814       oe1->op = temp.op;
2815       oe1->rank= temp.rank;
2816     }
2817   else if ((oe1->rank == oe3->rank
2818             && oe2->rank != oe3->rank)
2819            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2820                && !is_phi_for_stmt (stmt, oe1->op)
2821                && !is_phi_for_stmt (stmt, oe3->op)))
2822     {
2823       struct operand_entry temp = *oe2;
2824       oe2->op = oe1->op;
2825       oe2->rank = oe1->rank;
2826       oe1->op = temp.op;
2827       oe1->rank= temp.rank;
2828     }
2829 }
2830
2831 /* Recursively rewrite our linearized statements so that the operators
2832    match those in OPS[OPINDEX], putting the computation in rank
2833    order.  */
2834
2835 static void
2836 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2837                    vec<operand_entry_t> ops, bool moved)
2838 {
2839   tree rhs1 = gimple_assign_rhs1 (stmt);
2840   tree rhs2 = gimple_assign_rhs2 (stmt);
2841   operand_entry_t oe;
2842
2843   /* If we have three operands left, then we want to make sure the ones
2844      that get the double binary op are chosen wisely.  */
2845   if (opindex + 3 == ops.length ())
2846     swap_ops_for_binary_stmt (ops, opindex, stmt);
2847
2848   /* The final recursion case for this function is that you have
2849      exactly two operations left.
2850      If we had one exactly one op in the entire list to start with, we
2851      would have never called this function, and the tail recursion
2852      rewrites them one at a time.  */
2853   if (opindex + 2 == ops.length ())
2854     {
2855       operand_entry_t oe1, oe2;
2856
2857       oe1 = ops[opindex];
2858       oe2 = ops[opindex + 1];
2859
2860       if (rhs1 != oe1->op || rhs2 != oe2->op)
2861         {
2862           if (dump_file && (dump_flags & TDF_DETAILS))
2863             {
2864               fprintf (dump_file, "Transforming ");
2865               print_gimple_stmt (dump_file, stmt, 0, 0);
2866             }
2867
2868           gimple_assign_set_rhs1 (stmt, oe1->op);
2869           gimple_assign_set_rhs2 (stmt, oe2->op);
2870           update_stmt (stmt);
2871           if (rhs1 != oe1->op && rhs1 != oe2->op)
2872             remove_visited_stmt_chain (rhs1);
2873
2874           if (dump_file && (dump_flags & TDF_DETAILS))
2875             {
2876               fprintf (dump_file, " into ");
2877               print_gimple_stmt (dump_file, stmt, 0, 0);
2878             }
2879         }
2880       return;
2881     }
2882
2883   /* If we hit here, we should have 3 or more ops left.  */
2884   gcc_assert (opindex + 2 < ops.length ());
2885
2886   /* Rewrite the next operator.  */
2887   oe = ops[opindex];
2888
2889   if (oe->op != rhs2)
2890     {
2891       if (!moved)
2892         {
2893           gimple_stmt_iterator gsinow, gsirhs1;
2894           gimple stmt1 = stmt, stmt2;
2895           unsigned int count;
2896
2897           gsinow = gsi_for_stmt (stmt);
2898           count = ops.length () - opindex - 2;
2899           while (count-- != 0)
2900             {
2901               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2902               gsirhs1 = gsi_for_stmt (stmt2);
2903               gsi_move_before (&gsirhs1, &gsinow);
2904               gsi_prev (&gsinow);
2905               stmt1 = stmt2;
2906             }
2907           moved = true;
2908         }
2909
2910       if (dump_file && (dump_flags & TDF_DETAILS))
2911         {
2912           fprintf (dump_file, "Transforming ");
2913           print_gimple_stmt (dump_file, stmt, 0, 0);
2914         }
2915
2916       gimple_assign_set_rhs2 (stmt, oe->op);
2917       update_stmt (stmt);
2918
2919       if (dump_file && (dump_flags & TDF_DETAILS))
2920         {
2921           fprintf (dump_file, " into ");
2922           print_gimple_stmt (dump_file, stmt, 0, 0);
2923         }
2924     }
2925   /* Recurse on the LHS of the binary operator, which is guaranteed to
2926      be the non-leaf side.  */
2927   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2928 }
2929
2930 /* Find out how many cycles we need to compute statements chain.
2931    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2932    maximum number of independent statements we may execute per cycle.  */
2933
2934 static int
2935 get_required_cycles (int ops_num, int cpu_width)
2936 {
2937   int res;
2938   int elog;
2939   unsigned int rest;
2940
2941   /* While we have more than 2 * cpu_width operands
2942      we may reduce number of operands by cpu_width
2943      per cycle.  */
2944   res = ops_num / (2 * cpu_width);
2945
2946   /* Remained operands count may be reduced twice per cycle
2947      until we have only one operand.  */
2948   rest = (unsigned)(ops_num - res * cpu_width);
2949   elog = exact_log2 (rest);
2950   if (elog >= 0)
2951     res += elog;
2952   else
2953     res += floor_log2 (rest) + 1;
2954
2955   return res;
2956 }
2957
2958 /* Returns an optimal number of registers to use for computation of
2959    given statements.  */
2960
2961 static int
2962 get_reassociation_width (int ops_num, enum tree_code opc,
2963                          enum machine_mode mode)
2964 {
2965   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2966   int width;
2967   int width_min;
2968   int cycles_best;
2969
2970   if (param_width > 0)
2971     width = param_width;
2972   else
2973     width = targetm.sched.reassociation_width (opc, mode);
2974
2975   if (width == 1)
2976     return width;
2977
2978   /* Get the minimal time required for sequence computation.  */
2979   cycles_best = get_required_cycles (ops_num, width);
2980
2981   /* Check if we may use less width and still compute sequence for
2982      the same time.  It will allow us to reduce registers usage.
2983      get_required_cycles is monotonically increasing with lower width
2984      so we can perform a binary search for the minimal width that still
2985      results in the optimal cycle count.  */
2986   width_min = 1;
2987   while (width > width_min)
2988     {
2989       int width_mid = (width + width_min) / 2;
2990
2991       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2992         width = width_mid;
2993       else if (width_min < width_mid)
2994         width_min = width_mid;
2995       else
2996         break;
2997     }
2998
2999   return width;
3000 }
3001
3002 /* Recursively rewrite our linearized statements so that the operators
3003    match those in OPS[OPINDEX], putting the computation in rank
3004    order and trying to allow operations to be executed in
3005    parallel.  */
3006
3007 static void
3008 rewrite_expr_tree_parallel (gimple stmt, int width,
3009                             vec<operand_entry_t>  ops)
3010 {
3011   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3012   int op_num = ops.length ();
3013   int stmt_num = op_num - 1;
3014   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3015   int op_index = op_num - 1;
3016   int stmt_index = 0;
3017   int ready_stmts_end = 0;
3018   int i = 0;
3019   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3020
3021   /* We start expression rewriting from the top statements.
3022      So, in this loop we create a full list of statements
3023      we will work with.  */
3024   stmts[stmt_num - 1] = stmt;
3025   for (i = stmt_num - 2; i >= 0; i--)
3026     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3027
3028   for (i = 0; i < stmt_num; i++)
3029     {
3030       tree op1, op2;
3031
3032       /* Determine whether we should use results of
3033          already handled statements or not.  */
3034       if (ready_stmts_end == 0
3035           && (i - stmt_index >= width || op_index < 1))
3036         ready_stmts_end = i;
3037
3038       /* Now we choose operands for the next statement.  Non zero
3039          value in ready_stmts_end means here that we should use
3040          the result of already generated statements as new operand.  */
3041       if (ready_stmts_end > 0)
3042         {
3043           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3044           if (ready_stmts_end > stmt_index)
3045             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3046           else if (op_index >= 0)
3047             op2 = ops[op_index--]->op;
3048           else
3049             {
3050               gcc_assert (stmt_index < i);
3051               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3052             }
3053
3054           if (stmt_index >= ready_stmts_end)
3055             ready_stmts_end = 0;
3056         }
3057       else
3058         {
3059           if (op_index > 1)
3060             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3061           op2 = ops[op_index--]->op;
3062           op1 = ops[op_index--]->op;
3063         }
3064
3065       /* If we emit the last statement then we should put
3066          operands into the last statement.  It will also
3067          break the loop.  */
3068       if (op_index < 0 && stmt_index == i)
3069         i = stmt_num - 1;
3070
3071       if (dump_file && (dump_flags & TDF_DETAILS))
3072         {
3073           fprintf (dump_file, "Transforming ");
3074           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3075         }
3076
3077       /* We keep original statement only for the last one.  All
3078          others are recreated.  */
3079       if (i == stmt_num - 1)
3080         {
3081           gimple_assign_set_rhs1 (stmts[i], op1);
3082           gimple_assign_set_rhs2 (stmts[i], op2);
3083           update_stmt (stmts[i]);
3084         }
3085       else
3086         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3087
3088       if (dump_file && (dump_flags & TDF_DETAILS))
3089         {
3090           fprintf (dump_file, " into ");
3091           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3092         }
3093     }
3094
3095   remove_visited_stmt_chain (last_rhs1);
3096 }
3097
3098 /* Transform STMT, which is really (A +B) + (C + D) into the left
3099    linear form, ((A+B)+C)+D.
3100    Recurse on D if necessary.  */
3101
3102 static void
3103 linearize_expr (gimple stmt)
3104 {
3105   gimple_stmt_iterator gsinow, gsirhs;
3106   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3107   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3108   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3109   gimple newbinrhs = NULL;
3110   struct loop *loop = loop_containing_stmt (stmt);
3111
3112   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3113               && is_reassociable_op (binrhs, rhscode, loop));
3114
3115   gsinow = gsi_for_stmt (stmt);
3116   gsirhs = gsi_for_stmt (binrhs);
3117   gsi_move_before (&gsirhs, &gsinow);
3118
3119   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3120   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3121   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3122
3123   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3124     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3125
3126   if (dump_file && (dump_flags & TDF_DETAILS))
3127     {
3128       fprintf (dump_file, "Linearized: ");
3129       print_gimple_stmt (dump_file, stmt, 0, 0);
3130     }
3131
3132   reassociate_stats.linearized++;
3133   update_stmt (binrhs);
3134   update_stmt (binlhs);
3135   update_stmt (stmt);
3136
3137   gimple_set_visited (stmt, true);
3138   gimple_set_visited (binlhs, true);
3139   gimple_set_visited (binrhs, true);
3140
3141   /* Tail recurse on the new rhs if it still needs reassociation.  */
3142   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3143     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3144            want to change the algorithm while converting to tuples.  */
3145     linearize_expr (stmt);
3146 }
3147
3148 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3149    it.  Otherwise, return NULL.  */
3150
3151 static gimple
3152 get_single_immediate_use (tree lhs)
3153 {
3154   use_operand_p immuse;
3155   gimple immusestmt;
3156
3157   if (TREE_CODE (lhs) == SSA_NAME
3158       && single_imm_use (lhs, &immuse, &immusestmt)
3159       && is_gimple_assign (immusestmt))
3160     return immusestmt;
3161
3162   return NULL;
3163 }
3164
3165 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3166    representing the negated value.  Insertions of any necessary
3167    instructions go before GSI.
3168    This function is recursive in that, if you hand it "a_5" as the
3169    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3170    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3171
3172 static tree
3173 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3174 {
3175   gimple negatedefstmt= NULL;
3176   tree resultofnegate;
3177
3178   /* If we are trying to negate a name, defined by an add, negate the
3179      add operands instead.  */
3180   if (TREE_CODE (tonegate) == SSA_NAME)
3181     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3182   if (TREE_CODE (tonegate) == SSA_NAME
3183       && is_gimple_assign (negatedefstmt)
3184       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3185       && has_single_use (gimple_assign_lhs (negatedefstmt))
3186       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3187     {
3188       gimple_stmt_iterator gsi;
3189       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3190       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3191
3192       gsi = gsi_for_stmt (negatedefstmt);
3193       rhs1 = negate_value (rhs1, &gsi);
3194       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3195
3196       gsi = gsi_for_stmt (negatedefstmt);
3197       rhs2 = negate_value (rhs2, &gsi);
3198       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3199
3200       update_stmt (negatedefstmt);
3201       return gimple_assign_lhs (negatedefstmt);
3202     }
3203
3204   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3205   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3206                                              NULL_TREE, true, GSI_SAME_STMT);
3207   return resultofnegate;
3208 }
3209
3210 /* Return true if we should break up the subtract in STMT into an add
3211    with negate.  This is true when we the subtract operands are really
3212    adds, or the subtract itself is used in an add expression.  In
3213    either case, breaking up the subtract into an add with negate
3214    exposes the adds to reassociation.  */
3215
3216 static bool
3217 should_break_up_subtract (gimple stmt)
3218 {
3219   tree lhs = gimple_assign_lhs (stmt);
3220   tree binlhs = gimple_assign_rhs1 (stmt);
3221   tree binrhs = gimple_assign_rhs2 (stmt);
3222   gimple immusestmt;
3223   struct loop *loop = loop_containing_stmt (stmt);
3224
3225   if (TREE_CODE (binlhs) == SSA_NAME
3226       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3227     return true;
3228
3229   if (TREE_CODE (binrhs) == SSA_NAME
3230       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3231     return true;
3232
3233   if (TREE_CODE (lhs) == SSA_NAME
3234       && (immusestmt = get_single_immediate_use (lhs))
3235       && is_gimple_assign (immusestmt)
3236       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3237           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3238     return true;
3239   return false;
3240 }
3241
3242 /* Transform STMT from A - B into A + -B.  */
3243
3244 static void
3245 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3246 {
3247   tree rhs1 = gimple_assign_rhs1 (stmt);
3248   tree rhs2 = gimple_assign_rhs2 (stmt);
3249
3250   if (dump_file && (dump_flags & TDF_DETAILS))
3251     {
3252       fprintf (dump_file, "Breaking up subtract ");
3253       print_gimple_stmt (dump_file, stmt, 0, 0);
3254     }
3255
3256   rhs2 = negate_value (rhs2, gsip);
3257   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3258   update_stmt (stmt);
3259 }
3260
3261 /* Determine whether STMT is a builtin call that raises an SSA name
3262    to an integer power and has only one use.  If so, and this is early
3263    reassociation and unsafe math optimizations are permitted, place
3264    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3265    If any of these conditions does not hold, return FALSE.  */
3266
3267 static bool
3268 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3269 {
3270   tree fndecl, arg1;
3271   REAL_VALUE_TYPE c, cint;
3272
3273   if (!first_pass_instance
3274       || !flag_unsafe_math_optimizations
3275       || !is_gimple_call (stmt)
3276       || !has_single_use (gimple_call_lhs (stmt)))
3277     return false;
3278
3279   fndecl = gimple_call_fndecl (stmt);
3280
3281   if (!fndecl
3282       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3283     return false;
3284
3285   switch (DECL_FUNCTION_CODE (fndecl))
3286     {
3287     CASE_FLT_FN (BUILT_IN_POW):
3288       *base = gimple_call_arg (stmt, 0);
3289       arg1 = gimple_call_arg (stmt, 1);
3290
3291       if (TREE_CODE (arg1) != REAL_CST)
3292         return false;
3293
3294       c = TREE_REAL_CST (arg1);
3295
3296       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3297         return false;
3298
3299       *exponent = real_to_integer (&c);
3300       real_from_integer (&cint, VOIDmode, *exponent,
3301                          *exponent < 0 ? -1 : 0, 0);
3302       if (!real_identical (&c, &cint))
3303         return false;
3304
3305       break;
3306
3307     CASE_FLT_FN (BUILT_IN_POWI):
3308       *base = gimple_call_arg (stmt, 0);
3309       arg1 = gimple_call_arg (stmt, 1);
3310
3311       if (!host_integerp (arg1, 0))
3312         return false;
3313
3314       *exponent = TREE_INT_CST_LOW (arg1);
3315       break;
3316
3317     default:
3318       return false;
3319     }
3320
3321   /* Expanding negative exponents is generally unproductive, so we don't
3322      complicate matters with those.  Exponents of zero and one should
3323      have been handled by expression folding.  */
3324   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3325     return false;
3326
3327   return true;
3328 }
3329
3330 /* Recursively linearize a binary expression that is the RHS of STMT.
3331    Place the operands of the expression tree in the vector named OPS.  */
3332
3333 static void
3334 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3335                      bool is_associative, bool set_visited)
3336 {
3337   tree binlhs = gimple_assign_rhs1 (stmt);
3338   tree binrhs = gimple_assign_rhs2 (stmt);
3339   gimple binlhsdef = NULL, binrhsdef = NULL;
3340   bool binlhsisreassoc = false;
3341   bool binrhsisreassoc = false;
3342   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3343   struct loop *loop = loop_containing_stmt (stmt);
3344   tree base = NULL_TREE;
3345   HOST_WIDE_INT exponent = 0;
3346
3347   if (set_visited)
3348     gimple_set_visited (stmt, true);
3349
3350   if (TREE_CODE (binlhs) == SSA_NAME)
3351     {
3352       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3353       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3354                          && !stmt_could_throw_p (binlhsdef));
3355     }
3356
3357   if (TREE_CODE (binrhs) == SSA_NAME)
3358     {
3359       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3360       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3361                          && !stmt_could_throw_p (binrhsdef));
3362     }
3363
3364   /* If the LHS is not reassociable, but the RHS is, we need to swap
3365      them.  If neither is reassociable, there is nothing we can do, so
3366      just put them in the ops vector.  If the LHS is reassociable,
3367      linearize it.  If both are reassociable, then linearize the RHS
3368      and the LHS.  */
3369
3370   if (!binlhsisreassoc)
3371     {
3372       tree temp;
3373
3374       /* If this is not a associative operation like division, give up.  */
3375       if (!is_associative)
3376         {
3377           add_to_ops_vec (ops, binrhs);
3378           return;
3379         }
3380
3381       if (!binrhsisreassoc)
3382         {
3383           if (rhscode == MULT_EXPR
3384               && TREE_CODE (binrhs) == SSA_NAME
3385               && acceptable_pow_call (binrhsdef, &base, &exponent))
3386             {
3387               add_repeat_to_ops_vec (ops, base, exponent);
3388               gimple_set_visited (binrhsdef, true);
3389             }
3390           else
3391             add_to_ops_vec (ops, binrhs);
3392
3393           if (rhscode == MULT_EXPR
3394               && TREE_CODE (binlhs) == SSA_NAME
3395               && acceptable_pow_call (binlhsdef, &base, &exponent))
3396             {
3397               add_repeat_to_ops_vec (ops, base, exponent);
3398               gimple_set_visited (binlhsdef, true);
3399             }
3400           else
3401             add_to_ops_vec (ops, binlhs);
3402
3403           return;
3404         }
3405
3406       if (dump_file && (dump_flags & TDF_DETAILS))
3407         {
3408           fprintf (dump_file, "swapping operands of ");
3409           print_gimple_stmt (dump_file, stmt, 0, 0);
3410         }
3411
3412       swap_tree_operands (stmt,
3413                           gimple_assign_rhs1_ptr (stmt),
3414                           gimple_assign_rhs2_ptr (stmt));
3415       update_stmt (stmt);
3416
3417       if (dump_file && (dump_flags & TDF_DETAILS))
3418         {
3419           fprintf (dump_file, " is now ");
3420           print_gimple_stmt (dump_file, stmt, 0, 0);
3421         }
3422
3423       /* We want to make it so the lhs is always the reassociative op,
3424          so swap.  */
3425       temp = binlhs;
3426       binlhs = binrhs;
3427       binrhs = temp;
3428     }
3429   else if (binrhsisreassoc)
3430     {
3431       linearize_expr (stmt);
3432       binlhs = gimple_assign_rhs1 (stmt);
3433       binrhs = gimple_assign_rhs2 (stmt);
3434     }
3435
3436   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3437               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3438                                       rhscode, loop));
3439   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3440                        is_associative, set_visited);
3441
3442   if (rhscode == MULT_EXPR
3443       && TREE_CODE (binrhs) == SSA_NAME
3444       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3445     {
3446       add_repeat_to_ops_vec (ops, base, exponent);
3447       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3448     }
3449   else
3450     add_to_ops_vec (ops, binrhs);
3451 }
3452
3453 /* Repropagate the negates back into subtracts, since no other pass
3454    currently does it.  */
3455
3456 static void
3457 repropagate_negates (void)
3458 {
3459   unsigned int i = 0;
3460   tree negate;
3461
3462   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3463     {
3464       gimple user = get_single_immediate_use (negate);
3465
3466       if (!user || !is_gimple_assign (user))
3467         continue;
3468
3469       /* The negate operand can be either operand of a PLUS_EXPR
3470          (it can be the LHS if the RHS is a constant for example).
3471
3472          Force the negate operand to the RHS of the PLUS_EXPR, then
3473          transform the PLUS_EXPR into a MINUS_EXPR.  */
3474       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3475         {
3476           /* If the negated operand appears on the LHS of the
3477              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3478              to force the negated operand to the RHS of the PLUS_EXPR.  */
3479           if (gimple_assign_rhs1 (user) == negate)
3480             {
3481               swap_tree_operands (user,
3482                                   gimple_assign_rhs1_ptr (user),
3483                                   gimple_assign_rhs2_ptr (user));
3484             }
3485
3486           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3487              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3488           if (gimple_assign_rhs2 (user) == negate)
3489             {
3490               tree rhs1 = gimple_assign_rhs1 (user);
3491               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3492               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3493               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3494               update_stmt (user);
3495             }
3496         }
3497       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3498         {
3499           if (gimple_assign_rhs1 (user) == negate)
3500             {
3501               /* We have
3502                    x = -a
3503                    y = x - b
3504                  which we transform into
3505                    x = a + b
3506                    y = -x .
3507                  This pushes down the negate which we possibly can merge
3508                  into some other operation, hence insert it into the
3509                  plus_negates vector.  */
3510               gimple feed = SSA_NAME_DEF_STMT (negate);
3511               tree a = gimple_assign_rhs1 (feed);
3512               tree rhs2 = gimple_assign_rhs2 (user);
3513               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3514               gimple_replace_lhs (feed, negate);
3515               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3516               update_stmt (gsi_stmt (gsi));
3517               gsi2 = gsi_for_stmt (user);
3518               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3519               update_stmt (gsi_stmt (gsi2));
3520               gsi_move_before (&gsi, &gsi2);
3521               plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3522             }
3523           else
3524             {
3525               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3526                  rid of one operation.  */
3527               gimple feed = SSA_NAME_DEF_STMT (negate);
3528               tree a = gimple_assign_rhs1 (feed);
3529               tree rhs1 = gimple_assign_rhs1 (user);
3530               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3531               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3532               update_stmt (gsi_stmt (gsi));
3533             }
3534         }
3535     }
3536 }
3537
3538 /* Returns true if OP is of a type for which we can do reassociation.
3539    That is for integral or non-saturating fixed-point types, and for
3540    floating point type when associative-math is enabled.  */
3541
3542 static bool
3543 can_reassociate_p (tree op)
3544 {
3545   tree type = TREE_TYPE (op);
3546   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3547       || NON_SAT_FIXED_POINT_TYPE_P (type)
3548       || (flag_associative_math && FLOAT_TYPE_P (type)))
3549     return true;
3550   return false;
3551 }
3552
3553 /* Break up subtract operations in block BB.
3554
3555    We do this top down because we don't know whether the subtract is
3556    part of a possible chain of reassociation except at the top.
3557
3558    IE given
3559    d = f + g
3560    c = a + e
3561    b = c - d
3562    q = b - r
3563    k = t - q
3564
3565    we want to break up k = t - q, but we won't until we've transformed q
3566    = b - r, which won't be broken up until we transform b = c - d.
3567
3568    En passant, clear the GIMPLE visited flag on every statement.  */
3569
3570 static void
3571 break_up_subtract_bb (basic_block bb)
3572 {
3573   gimple_stmt_iterator gsi;
3574   basic_block son;
3575
3576   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3577     {
3578       gimple stmt = gsi_stmt (gsi);
3579       gimple_set_visited (stmt, false);
3580
3581       if (!is_gimple_assign (stmt)
3582           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3583         continue;
3584
3585       /* Look for simple gimple subtract operations.  */
3586       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3587         {
3588           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3589               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3590             continue;
3591
3592           /* Check for a subtract used only in an addition.  If this
3593              is the case, transform it into add of a negate for better
3594              reassociation.  IE transform C = A-B into C = A + -B if C
3595              is only used in an addition.  */
3596           if (should_break_up_subtract (stmt))
3597             break_up_subtract (stmt, &gsi);
3598         }
3599       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3600                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3601         plus_negates.safe_push (gimple_assign_lhs (stmt));
3602     }
3603   for (son = first_dom_son (CDI_DOMINATORS, bb);
3604        son;
3605        son = next_dom_son (CDI_DOMINATORS, son))
3606     break_up_subtract_bb (son);
3607 }
3608
3609 /* Used for repeated factor analysis.  */
3610 struct repeat_factor_d
3611 {
3612   /* An SSA name that occurs in a multiply chain.  */
3613   tree factor;
3614
3615   /* Cached rank of the factor.  */
3616   unsigned rank;
3617
3618   /* Number of occurrences of the factor in the chain.  */
3619   HOST_WIDE_INT count;
3620
3621   /* An SSA name representing the product of this factor and
3622      all factors appearing later in the repeated factor vector.  */
3623   tree repr;
3624 };
3625
3626 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3627 typedef const struct repeat_factor_d *const_repeat_factor_t;
3628
3629
3630 static vec<repeat_factor> repeat_factor_vec;
3631
3632 /* Used for sorting the repeat factor vector.  Sort primarily by
3633    ascending occurrence count, secondarily by descending rank.  */
3634
3635 static int
3636 compare_repeat_factors (const void *x1, const void *x2)
3637 {
3638   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3639   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3640
3641   if (rf1->count != rf2->count)
3642     return rf1->count - rf2->count;
3643
3644   return rf2->rank - rf1->rank;
3645 }
3646
3647 /* Look for repeated operands in OPS in the multiply tree rooted at
3648    STMT.  Replace them with an optimal sequence of multiplies and powi
3649    builtin calls, and remove the used operands from OPS.  Return an
3650    SSA name representing the value of the replacement sequence.  */
3651
3652 static tree
3653 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3654 {
3655   unsigned i, j, vec_len;
3656   int ii;
3657   operand_entry_t oe;
3658   repeat_factor_t rf1, rf2;
3659   repeat_factor rfnew;
3660   tree result = NULL_TREE;
3661   tree target_ssa, iter_result;
3662   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3663   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3664   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3665   gimple mul_stmt, pow_stmt;
3666
3667   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3668      target.  */
3669   if (!powi_fndecl)
3670     return NULL_TREE;
3671
3672   /* Allocate the repeated factor vector.  */
3673   repeat_factor_vec.create (10);
3674
3675   /* Scan the OPS vector for all SSA names in the product and build
3676      up a vector of occurrence counts for each factor.  */
3677   FOR_EACH_VEC_ELT (*ops, i, oe)
3678     {
3679       if (TREE_CODE (oe->op) == SSA_NAME)
3680         {
3681           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3682             {
3683               if (rf1->factor == oe->op)
3684                 {
3685                   rf1->count += oe->count;
3686                   break;
3687                 }
3688             }
3689
3690           if (j >= repeat_factor_vec.length ())
3691             {
3692               rfnew.factor = oe->op;
3693               rfnew.rank = oe->rank;
3694               rfnew.count = oe->count;
3695               rfnew.repr = NULL_TREE;
3696               repeat_factor_vec.safe_push (rfnew);
3697             }
3698         }
3699     }
3700
3701   /* Sort the repeated factor vector by (a) increasing occurrence count,
3702      and (b) decreasing rank.  */
3703   repeat_factor_vec.qsort (compare_repeat_factors);
3704
3705   /* It is generally best to combine as many base factors as possible
3706      into a product before applying __builtin_powi to the result.
3707      However, the sort order chosen for the repeated factor vector
3708      allows us to cache partial results for the product of the base
3709      factors for subsequent use.  When we already have a cached partial
3710      result from a previous iteration, it is best to make use of it
3711      before looking for another __builtin_pow opportunity.
3712
3713      As an example, consider x * x * y * y * y * z * z * z * z.
3714      We want to first compose the product x * y * z, raise it to the
3715      second power, then multiply this by y * z, and finally multiply
3716      by z.  This can be done in 5 multiplies provided we cache y * z
3717      for use in both expressions:
3718
3719         t1 = y * z
3720         t2 = t1 * x
3721         t3 = t2 * t2
3722         t4 = t1 * t3
3723         result = t4 * z
3724
3725      If we instead ignored the cached y * z and first multiplied by
3726      the __builtin_pow opportunity z * z, we would get the inferior:
3727
3728         t1 = y * z
3729         t2 = t1 * x
3730         t3 = t2 * t2
3731         t4 = z * z
3732         t5 = t3 * t4
3733         result = t5 * y  */
3734
3735   vec_len = repeat_factor_vec.length ();
3736
3737   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3738   while (true)
3739     {
3740       HOST_WIDE_INT power;
3741
3742       /* First look for the largest cached product of factors from
3743          preceding iterations.  If found, create a builtin_powi for
3744          it if the minimum occurrence count for its factors is at
3745          least 2, or just use this cached product as our next
3746          multiplicand if the minimum occurrence count is 1.  */
3747       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3748         {
3749           if (rf1->repr && rf1->count > 0)
3750             break;
3751         }
3752
3753       if (j < vec_len)
3754         {
3755           power = rf1->count;
3756
3757           if (power == 1)
3758             {
3759               iter_result = rf1->repr;
3760
3761               if (dump_file && (dump_flags & TDF_DETAILS))
3762                 {
3763                   unsigned elt;
3764                   repeat_factor_t rf;
3765                   fputs ("Multiplying by cached product ", dump_file);
3766                   for (elt = j; elt < vec_len; elt++)
3767                     {
3768                       rf = &repeat_factor_vec[elt];
3769                       print_generic_expr (dump_file, rf->factor, 0);
3770                       if (elt < vec_len - 1)
3771                         fputs (" * ", dump_file);
3772                     }
3773                   fputs ("\n", dump_file);
3774                 }
3775             }
3776           else
3777             {
3778               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3779               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3780                                             build_int_cst (integer_type_node,
3781                                                            power));
3782               gimple_call_set_lhs (pow_stmt, iter_result);
3783               gimple_set_location (pow_stmt, gimple_location (stmt));
3784               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3785
3786               if (dump_file && (dump_flags & TDF_DETAILS))
3787                 {
3788                   unsigned elt;
3789                   repeat_factor_t rf;
3790                   fputs ("Building __builtin_pow call for cached product (",
3791                          dump_file);
3792                   for (elt = j; elt < vec_len; elt++)
3793                     {
3794                       rf = &repeat_factor_vec[elt];
3795                       print_generic_expr (dump_file, rf->factor, 0);
3796                       if (elt < vec_len - 1)
3797                         fputs (" * ", dump_file);
3798                     }
3799                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3800                            power);
3801                 }
3802             }
3803         }
3804       else
3805         {
3806           /* Otherwise, find the first factor in the repeated factor
3807              vector whose occurrence count is at least 2.  If no such
3808              factor exists, there are no builtin_powi opportunities
3809              remaining.  */
3810           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3811             {
3812               if (rf1->count >= 2)
3813                 break;
3814             }
3815
3816           if (j >= vec_len)
3817             break;
3818
3819           power = rf1->count;
3820
3821           if (dump_file && (dump_flags & TDF_DETAILS))
3822             {
3823               unsigned elt;
3824               repeat_factor_t rf;
3825               fputs ("Building __builtin_pow call for (", dump_file);
3826               for (elt = j; elt < vec_len; elt++)
3827                 {
3828                   rf = &repeat_factor_vec[elt];
3829                   print_generic_expr (dump_file, rf->factor, 0);
3830                   if (elt < vec_len - 1)
3831                     fputs (" * ", dump_file);
3832                 }
3833               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3834             }
3835
3836           reassociate_stats.pows_created++;
3837
3838           /* Visit each element of the vector in reverse order (so that
3839              high-occurrence elements are visited first, and within the
3840              same occurrence count, lower-ranked elements are visited
3841              first).  Form a linear product of all elements in this order
3842              whose occurrencce count is at least that of element J.
3843              Record the SSA name representing the product of each element
3844              with all subsequent elements in the vector.  */
3845           if (j == vec_len - 1)
3846             rf1->repr = rf1->factor;
3847           else
3848             {
3849               for (ii = vec_len - 2; ii >= (int)j; ii--)
3850                 {
3851                   tree op1, op2;
3852
3853                   rf1 = &repeat_factor_vec[ii];
3854                   rf2 = &repeat_factor_vec[ii + 1];
3855
3856                   /* Init the last factor's representative to be itself.  */
3857                   if (!rf2->repr)
3858                     rf2->repr = rf2->factor;
3859
3860                   op1 = rf1->factor;
3861                   op2 = rf2->repr;
3862
3863                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3864                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3865                                                            target_ssa,
3866                                                            op1, op2);
3867                   gimple_set_location (mul_stmt, gimple_location (stmt));
3868                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3869                   rf1->repr = target_ssa;
3870
3871                   /* Don't reprocess the multiply we just introduced.  */
3872                   gimple_set_visited (mul_stmt, true);
3873                 }
3874             }
3875
3876           /* Form a call to __builtin_powi for the maximum product
3877              just formed, raised to the power obtained earlier.  */
3878           rf1 = &repeat_factor_vec[j];
3879           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3880           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3881                                         build_int_cst (integer_type_node,
3882                                                        power));
3883           gimple_call_set_lhs (pow_stmt, iter_result);
3884           gimple_set_location (pow_stmt, gimple_location (stmt));
3885           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3886         }
3887
3888       /* If we previously formed at least one other builtin_powi call,
3889          form the product of this one and those others.  */
3890       if (result)
3891         {
3892           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3893           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3894                                                    result, iter_result);
3895           gimple_set_location (mul_stmt, gimple_location (stmt));
3896           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3897           gimple_set_visited (mul_stmt, true);
3898           result = new_result;
3899         }
3900       else
3901         result = iter_result;
3902
3903       /* Decrement the occurrence count of each element in the product
3904          by the count found above, and remove this many copies of each
3905          factor from OPS.  */
3906       for (i = j; i < vec_len; i++)
3907         {
3908           unsigned k = power;
3909           unsigned n;
3910
3911           rf1 = &repeat_factor_vec[i];
3912           rf1->count -= power;
3913
3914           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3915             {
3916               if (oe->op == rf1->factor)
3917                 {
3918                   if (oe->count <= k)
3919                     {
3920                       ops->ordered_remove (n);
3921                       k -= oe->count;
3922
3923                       if (k == 0)
3924                         break;
3925                     }
3926                   else
3927                     {
3928                       oe->count -= k;
3929                       break;
3930                     }
3931                 }
3932             }
3933         }
3934     }
3935
3936   /* At this point all elements in the repeated factor vector have a
3937      remaining occurrence count of 0 or 1, and those with a count of 1
3938      don't have cached representatives.  Re-sort the ops vector and
3939      clean up.  */
3940   ops->qsort (sort_by_operand_rank);
3941   repeat_factor_vec.release ();
3942
3943   /* Return the final product computed herein.  Note that there may
3944      still be some elements with single occurrence count left in OPS;
3945      those will be handled by the normal reassociation logic.  */
3946   return result;
3947 }
3948
3949 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3950
3951 static void
3952 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3953 {
3954   tree rhs1;
3955
3956   if (dump_file && (dump_flags & TDF_DETAILS))
3957     {
3958       fprintf (dump_file, "Transforming ");
3959       print_gimple_stmt (dump_file, stmt, 0, 0);
3960     }
3961
3962   rhs1 = gimple_assign_rhs1 (stmt);
3963   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3964   update_stmt (stmt);
3965   remove_visited_stmt_chain (rhs1);
3966
3967   if (dump_file && (dump_flags & TDF_DETAILS))
3968     {
3969       fprintf (dump_file, " into ");
3970       print_gimple_stmt (dump_file, stmt, 0, 0);
3971     }
3972 }
3973
3974 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3975
3976 static void
3977 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3978                             tree rhs1, tree rhs2)
3979 {
3980   if (dump_file && (dump_flags & TDF_DETAILS))
3981     {
3982       fprintf (dump_file, "Transforming ");
3983       print_gimple_stmt (dump_file, stmt, 0, 0);
3984     }
3985
3986   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3987   update_stmt (gsi_stmt (*gsi));
3988   remove_visited_stmt_chain (rhs1);
3989
3990   if (dump_file && (dump_flags & TDF_DETAILS))
3991     {
3992       fprintf (dump_file, " into ");
3993       print_gimple_stmt (dump_file, stmt, 0, 0);
3994     }
3995 }
3996
3997 /* Reassociate expressions in basic block BB and its post-dominator as
3998    children.  */
3999
4000 static void
4001 reassociate_bb (basic_block bb)
4002 {
4003   gimple_stmt_iterator gsi;
4004   basic_block son;
4005   gimple stmt = last_stmt (bb);
4006
4007   if (stmt && !gimple_visited_p (stmt))
4008     maybe_optimize_range_tests (stmt);
4009
4010   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4011     {
4012       stmt = gsi_stmt (gsi);
4013
4014       if (is_gimple_assign (stmt)
4015           && !stmt_could_throw_p (stmt))
4016         {
4017           tree lhs, rhs1, rhs2;
4018           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4019
4020           /* If this is not a gimple binary expression, there is
4021              nothing for us to do with it.  */
4022           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4023             continue;
4024
4025           /* If this was part of an already processed statement,
4026              we don't need to touch it again. */
4027           if (gimple_visited_p (stmt))
4028             {
4029               /* This statement might have become dead because of previous
4030                  reassociations.  */
4031               if (has_zero_uses (gimple_get_lhs (stmt)))
4032                 {
4033                   gsi_remove (&gsi, true);
4034                   release_defs (stmt);
4035                   /* We might end up removing the last stmt above which
4036                      places the iterator to the end of the sequence.
4037                      Reset it to the last stmt in this case which might
4038                      be the end of the sequence as well if we removed
4039                      the last statement of the sequence.  In which case
4040                      we need to bail out.  */
4041                   if (gsi_end_p (gsi))
4042                     {
4043                       gsi = gsi_last_bb (bb);
4044                       if (gsi_end_p (gsi))
4045                         break;
4046                     }
4047                 }
4048               continue;
4049             }
4050
4051           lhs = gimple_assign_lhs (stmt);
4052           rhs1 = gimple_assign_rhs1 (stmt);
4053           rhs2 = gimple_assign_rhs2 (stmt);
4054
4055           /* For non-bit or min/max operations we can't associate
4056              all types.  Verify that here.  */
4057           if (rhs_code != BIT_IOR_EXPR
4058               && rhs_code != BIT_AND_EXPR
4059               && rhs_code != BIT_XOR_EXPR
4060               && rhs_code != MIN_EXPR
4061               && rhs_code != MAX_EXPR
4062               && (!can_reassociate_p (lhs)
4063                   || !can_reassociate_p (rhs1)
4064                   || !can_reassociate_p (rhs2)))
4065             continue;
4066
4067           if (associative_tree_code (rhs_code))
4068             {
4069               vec<operand_entry_t> ops = vNULL;
4070               tree powi_result = NULL_TREE;
4071
4072               /* There may be no immediate uses left by the time we
4073                  get here because we may have eliminated them all.  */
4074               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4075                 continue;
4076
4077               gimple_set_visited (stmt, true);
4078               linearize_expr_tree (&ops, stmt, true, true);
4079               ops.qsort (sort_by_operand_rank);
4080               optimize_ops_list (rhs_code, &ops);
4081               if (undistribute_ops_list (rhs_code, &ops,
4082                                          loop_containing_stmt (stmt)))
4083                 {
4084                   ops.qsort (sort_by_operand_rank);
4085                   optimize_ops_list (rhs_code, &ops);
4086                 }
4087
4088               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4089                 optimize_range_tests (rhs_code, &ops);
4090
4091               if (first_pass_instance
4092                   && rhs_code == MULT_EXPR
4093                   && flag_unsafe_math_optimizations)
4094                 powi_result = attempt_builtin_powi (stmt, &ops);
4095
4096               /* If the operand vector is now empty, all operands were
4097                  consumed by the __builtin_powi optimization.  */
4098               if (ops.length () == 0)
4099                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4100               else if (ops.length () == 1)
4101                 {
4102                   tree last_op = ops.last ()->op;
4103
4104                   if (powi_result)
4105                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4106                                                 powi_result);
4107                   else
4108                     transform_stmt_to_copy (&gsi, stmt, last_op);
4109                 }
4110               else
4111                 {
4112                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4113                   int ops_num = ops.length ();
4114                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4115
4116                   if (dump_file && (dump_flags & TDF_DETAILS))
4117                     fprintf (dump_file,
4118                              "Width = %d was chosen for reassociation\n", width);
4119
4120                   if (width > 1
4121                       && ops.length () > 3)
4122                     rewrite_expr_tree_parallel (stmt, width, ops);
4123                   else
4124                     rewrite_expr_tree (stmt, 0, ops, false);
4125
4126                   /* If we combined some repeated factors into a
4127                      __builtin_powi call, multiply that result by the
4128                      reassociated operands.  */
4129                   if (powi_result)
4130                     {
4131                       gimple mul_stmt;
4132                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
4133                       tree target_ssa = make_temp_ssa_name (type, NULL,
4134                                                             "reassocpow");
4135                       gimple_set_lhs (stmt, target_ssa);
4136                       update_stmt (stmt);
4137                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4138                                                                powi_result,
4139                                                                target_ssa);
4140                       gimple_set_location (mul_stmt, gimple_location (stmt));
4141                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4142                     }
4143                 }
4144
4145               ops.release ();
4146             }
4147         }
4148     }
4149   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4150        son;
4151        son = next_dom_son (CDI_POST_DOMINATORS, son))
4152     reassociate_bb (son);
4153 }
4154
4155 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4156 void debug_ops_vector (vec<operand_entry_t> ops);
4157
4158 /* Dump the operand entry vector OPS to FILE.  */
4159
4160 void
4161 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4162 {
4163   operand_entry_t oe;
4164   unsigned int i;
4165
4166   FOR_EACH_VEC_ELT (ops, i, oe)
4167     {
4168       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4169       print_generic_expr (file, oe->op, 0);
4170     }
4171 }
4172
4173 /* Dump the operand entry vector OPS to STDERR.  */
4174
4175 DEBUG_FUNCTION void
4176 debug_ops_vector (vec<operand_entry_t> ops)
4177 {
4178   dump_ops_vector (stderr, ops);
4179 }
4180
4181 static void
4182 do_reassoc (void)
4183 {
4184   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4185   reassociate_bb (EXIT_BLOCK_PTR);
4186 }
4187
4188 /* Initialize the reassociation pass.  */
4189
4190 static void
4191 init_reassoc (void)
4192 {
4193   int i;
4194   long rank = 2;
4195   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4196
4197   /* Find the loops, so that we can prevent moving calculations in
4198      them.  */
4199   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4200
4201   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4202
4203   operand_entry_pool = create_alloc_pool ("operand entry pool",
4204                                           sizeof (struct operand_entry), 30);
4205   next_operand_entry_id = 0;
4206
4207   /* Reverse RPO (Reverse Post Order) will give us something where
4208      deeper loops come later.  */
4209   pre_and_rev_post_order_compute (NULL, bbs, false);
4210   bb_rank = XCNEWVEC (long, last_basic_block);
4211   operand_rank = pointer_map_create ();
4212
4213   /* Give each default definition a distinct rank.  This includes
4214      parameters and the static chain.  Walk backwards over all
4215      SSA names so that we get proper rank ordering according
4216      to tree_swap_operands_p.  */
4217   for (i = num_ssa_names - 1; i > 0; --i)
4218     {
4219       tree name = ssa_name (i);
4220       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4221         insert_operand_rank (name, ++rank);
4222     }
4223
4224   /* Set up rank for each BB  */
4225   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4226     bb_rank[bbs[i]] = ++rank  << 16;
4227
4228   free (bbs);
4229   calculate_dominance_info (CDI_POST_DOMINATORS);
4230   plus_negates = vNULL;
4231 }
4232
4233 /* Cleanup after the reassociation pass, and print stats if
4234    requested.  */
4235
4236 static void
4237 fini_reassoc (void)
4238 {
4239   statistics_counter_event (cfun, "Linearized",
4240                             reassociate_stats.linearized);
4241   statistics_counter_event (cfun, "Constants eliminated",
4242                             reassociate_stats.constants_eliminated);
4243   statistics_counter_event (cfun, "Ops eliminated",
4244                             reassociate_stats.ops_eliminated);
4245   statistics_counter_event (cfun, "Statements rewritten",
4246                             reassociate_stats.rewritten);
4247   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4248                             reassociate_stats.pows_encountered);
4249   statistics_counter_event (cfun, "Built-in powi calls created",
4250                             reassociate_stats.pows_created);
4251
4252   pointer_map_destroy (operand_rank);
4253   free_alloc_pool (operand_entry_pool);
4254   free (bb_rank);
4255   plus_negates.release ();
4256   free_dominance_info (CDI_POST_DOMINATORS);
4257   loop_optimizer_finalize ();
4258 }
4259
4260 /* Gate and execute functions for Reassociation.  */
4261
4262 static unsigned int
4263 execute_reassoc (void)
4264 {
4265   init_reassoc ();
4266
4267   do_reassoc ();
4268   repropagate_negates ();
4269
4270   fini_reassoc ();
4271   return 0;
4272 }
4273
4274 static bool
4275 gate_tree_ssa_reassoc (void)
4276 {
4277   return flag_tree_reassoc != 0;
4278 }
4279
4280 struct gimple_opt_pass pass_reassoc =
4281 {
4282  {
4283   GIMPLE_PASS,
4284   "reassoc",                            /* name */
4285   OPTGROUP_NONE,                        /* optinfo_flags */
4286   gate_tree_ssa_reassoc,                /* gate */
4287   execute_reassoc,                      /* execute */
4288   NULL,                                 /* sub */
4289   NULL,                                 /* next */
4290   0,                                    /* static_pass_number */
4291   TV_TREE_REASSOC,                      /* tv_id */
4292   PROP_cfg | PROP_ssa,                  /* properties_required */
4293   0,                                    /* properties_provided */
4294   0,                                    /* properties_destroyed */
4295   0,                                    /* todo_flags_start */
4296   TODO_verify_ssa
4297     | TODO_verify_flow
4298     | TODO_ggc_collect                  /* todo_flags_finish */
4299  }
4300 };