gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "pointer-set.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63
  64 /*  This is a simple global reassociation pass.  It is, in part, based
  65     on the LLVM pass of the same name (They do some things more/less
  66     than we do, in different orders, etc).
  67
  68     It consists of five steps:
  69
  70     1. Breaking up subtract operations into addition + negate, where
  71     it would promote the reassociation of adds.
  72
  73     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  74     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  75     During linearization, we place the operands of the binary
  76     expressions into a vector of operand_entry_t
  77
  78     3. Optimization of the operand lists, eliminating things like a +
  79     -a, a & a, etc.
  80
  81     3a. Combine repeated factors with the same occurrence counts
  82     into a __builtin_powi call that will later be optimized into
  83     an optimal number of multiplies.
  84
  85     4. Rewrite the expression trees we linearized and optimized so
  86     they are in proper rank order.
  87
  88     5. Repropagate negates, as nothing else will clean it up ATM.
  89
  90     A bit of theory on #4, since nobody seems to write anything down
  91     about why it makes sense to do it the way they do it:
  92
  93     We could do this much nicer theoretically, but don't (for reasons
  94     explained after how to do it theoretically nice :P).
  95
  96     In order to promote the most redundancy elimination, you want
  97     binary expressions whose operands are the same rank (or
  98     preferably, the same value) exposed to the redundancy eliminator,
  99     for possible elimination.
 100
 101     So the way to do this if we really cared, is to build the new op
 102     tree from the leaves to the roots, merging as you go, and putting the
 103     new op on the end of the worklist, until you are left with one
 104     thing on the worklist.
 105
 106     IE if you have to rewrite the following set of operands (listed with
 107     rank in parentheses), with opcode PLUS_EXPR:
 108
 109     a (1),  b (1),  c (1),  d (2), e (2)
 110
 111
 112     We start with our merge worklist empty, and the ops list with all of
 113     those on it.
 114
 115     You want to first merge all leaves of the same rank, as much as
 116     possible.
 117
 118     So first build a binary op of
 119
 120     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 121
 122     Because there is no three operand form of PLUS_EXPR, c is not going to
 123     be exposed to redundancy elimination as a rank 1 operand.
 124
 125     So you might as well throw it on the merge worklist (you could also
 126     consider it to now be a rank two operand, and merge it with d and e,
 127     but in this case, you then have evicted e from a binary op. So at
 128     least in this situation, you can't win.)
 129
 130     Then build a binary op of d + e
 131     mergetmp2 = d + e
 132
 133     and put mergetmp2 on the merge worklist.
 134
 135     so merge worklist = {mergetmp, c, mergetmp2}
 136
 137     Continue building binary ops of these operations until you have only
 138     one operation left on the worklist.
 139
 140     So we have
 141
 142     build binary op
 143     mergetmp3 = mergetmp + c
 144
 145     worklist = {mergetmp2, mergetmp3}
 146
 147     mergetmp4 = mergetmp2 + mergetmp3
 148
 149     worklist = {mergetmp4}
 150
 151     because we have one operation left, we can now just set the original
 152     statement equal to the result of that operation.
 153
 154     This will at least expose a + b  and d + e to redundancy elimination
 155     as binary operations.
 156
 157     For extra points, you can reuse the old statements to build the
 158     mergetmps, since you shouldn't run out.
 159
 160     So why don't we do this?
 161
 162     Because it's expensive, and rarely will help.  Most trees we are
 163     reassociating have 3 or less ops.  If they have 2 ops, they already
 164     will be written into a nice single binary op.  If you have 3 ops, a
 165     single simple check suffices to tell you whether the first two are of the
 166     same rank.  If so, you know to order it
 167
 168     mergetmp = op1 + op2
 169     newstmt = mergetmp + op3
 170
 171     instead of
 172     mergetmp = op2 + op3
 173     newstmt = mergetmp + op1
 174
 175     If all three are of the same rank, you can't expose them all in a
 176     single binary operator anyway, so the above is *still* the best you
 177     can do.
 178
 179     Thus, this is what we do.  When we have three ops left, we check to see
 180     what order to put them in, and call it a day.  As a nod to vector sum
 181     reduction, we check if any of the ops are really a phi node that is a
 182     destructive update for the associating op, and keep the destructive
 183     update together for vector sum reduction recognition.  */
 184
 185
 186 /* Statistics */
 187 static struct
 188 {
 189   int linearized;
 190   int constants_eliminated;
 191   int ops_eliminated;
 192   int rewritten;
 193   int pows_encountered;
 194   int pows_created;
 195 } reassociate_stats;
 196
 197 /* Operator, rank pair.  */
 198 typedef struct operand_entry
 199 {
 200   unsigned int rank;
 201   int id;
 202   tree op;
 203   unsigned int count;
 204 } *operand_entry_t;
 205
 206 static alloc_pool operand_entry_pool;
 207
 208 /* This is used to assign a unique ID to each struct operand_entry
 209    so that qsort results are identical on different hosts.  */
 210 static int next_operand_entry_id;
 211
 212 /* Starting rank number for a given basic block, so that we can rank
 213    operations using unmovable instructions in that BB based on the bb
 214    depth.  */
 215 static long *bb_rank;
 216
 217 /* Operand->rank hashtable.  */
 218 static struct pointer_map_t *operand_rank;
 219
 220 /* Forward decls.  */
 221 static long get_rank (tree);
 222
 223
 224 /* Bias amount for loop-carried phis.  We want this to be larger than
 225    the depth of any reassociation tree we can see, but not larger than
 226    the rank difference between two blocks.  */
 227 #define PHI_LOOP_BIAS (1 << 15)
 228
 229 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 230    an innermost loop, and the phi has only a single use which is inside
 231    the loop, then the rank is the block rank of the loop latch plus an
 232    extra bias for the loop-carried dependence.  This causes expressions
 233    calculated into an accumulator variable to be independent for each
 234    iteration of the loop.  If STMT is some other phi, the rank is the
 235    block rank of its containing block.  */
 236 static long
 237 phi_rank (gimple stmt)
 238 {
 239   basic_block bb = gimple_bb (stmt);
 240   struct loop *father = bb->loop_father;
 241   tree res;
 242   unsigned i;
 243   use_operand_p use;
 244   gimple use_stmt;
 245
 246   /* We only care about real loops (those with a latch).  */
 247   if (!father->latch)
 248     return bb_rank[bb->index];
 249
 250   /* Interesting phis must be in headers of innermost loops.  */
 251   if (bb != father->header
 252       || father->inner)
 253     return bb_rank[bb->index];
 254
 255   /* Ignore virtual SSA_NAMEs.  */
 256   res = gimple_phi_result (stmt);
 257   if (virtual_operand_p (res))
 258     return bb_rank[bb->index];
 259
 260   /* The phi definition must have a single use, and that use must be
 261      within the loop.  Otherwise this isn't an accumulator pattern.  */
 262   if (!single_imm_use (res, &use, &use_stmt)
 263       || gimple_bb (use_stmt)->loop_father != father)
 264     return bb_rank[bb->index];
 265
 266   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 267   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 268     {
 269       tree arg = gimple_phi_arg_def (stmt, i);
 270       if (TREE_CODE (arg) == SSA_NAME
 271           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 272         {
 273           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 274           if (gimple_bb (def_stmt)->loop_father == father)
 275             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 276         }
 277     }
 278
 279   /* Must be an uninteresting phi.  */
 280   return bb_rank[bb->index];
 281 }
 282
 283 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 284    loop-carried dependence of an innermost loop, return TRUE; else
 285    return FALSE.  */
 286 static bool
 287 loop_carried_phi (tree exp)
 288 {
 289   gimple phi_stmt;
 290   long block_rank;
 291
 292   if (TREE_CODE (exp) != SSA_NAME
 293       || SSA_NAME_IS_DEFAULT_DEF (exp))
 294     return false;
 295
 296   phi_stmt = SSA_NAME_DEF_STMT (exp);
 297
 298   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 299     return false;
 300
 301   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 302      an additional bias added in.  If this phi doesn't have block rank,
 303      it's biased and should not be propagated.  */
 304   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 305
 306   if (phi_rank (phi_stmt) != block_rank)
 307     return true;
 308
 309   return false;
 310 }
 311
 312 /* Return the maximum of RANK and the rank that should be propagated
 313    from expression OP.  For most operands, this is just the rank of OP.
 314    For loop-carried phis, the value is zero to avoid undoing the bias
 315    in favor of the phi.  */
 316 static long
 317 propagate_rank (long rank, tree op)
 318 {
 319   long op_rank;
 320
 321   if (loop_carried_phi (op))
 322     return rank;
 323
 324   op_rank = get_rank (op);
 325
 326   return MAX (rank, op_rank);
 327 }
 328
 329 /* Look up the operand rank structure for expression E.  */
 330
 331 static inline long
 332 find_operand_rank (tree e)
 333 {
 334   void **slot = pointer_map_contains (operand_rank, e);
 335   return slot ? (long) (intptr_t) *slot : -1;
 336 }
 337
 338 /* Insert {E,RANK} into the operand rank hashtable.  */
 339
 340 static inline void
 341 insert_operand_rank (tree e, long rank)
 342 {
 343   void **slot;
 344   gcc_assert (rank > 0);
 345   slot = pointer_map_insert (operand_rank, e);
 346   gcc_assert (!*slot);
 347   *slot = (void *) (intptr_t) rank;
 348 }
 349
 350 /* Given an expression E, return the rank of the expression.  */
 351
 352 static long
 353 get_rank (tree e)
 354 {
 355   /* Constants have rank 0.  */
 356   if (is_gimple_min_invariant (e))
 357     return 0;
 358
 359   /* SSA_NAME's have the rank of the expression they are the result
 360      of.
 361      For globals and uninitialized values, the rank is 0.
 362      For function arguments, use the pre-setup rank.
 363      For PHI nodes, stores, asm statements, etc, we use the rank of
 364      the BB.
 365      For simple operations, the rank is the maximum rank of any of
 366      its operands, or the bb_rank, whichever is less.
 367      I make no claims that this is optimal, however, it gives good
 368      results.  */
 369
 370   /* We make an exception to the normal ranking system to break
 371      dependences of accumulator variables in loops.  Suppose we
 372      have a simple one-block loop containing:
 373
 374        x_1 = phi(x_0, x_2)
 375        b = a + x_1
 376        c = b + d
 377        x_2 = c + e
 378
 379      As shown, each iteration of the calculation into x is fully
 380      dependent upon the iteration before it.  We would prefer to
 381      see this in the form:
 382
 383        x_1 = phi(x_0, x_2)
 384        b = a + d
 385        c = b + e
 386        x_2 = c + x_1
 387
 388      If the loop is unrolled, the calculations of b and c from
 389      different iterations can be interleaved.
 390
 391      To obtain this result during reassociation, we bias the rank
 392      of the phi definition x_1 upward, when it is recognized as an
 393      accumulator pattern.  The artificial rank causes it to be
 394      added last, providing the desired independence.  */
 395
 396   if (TREE_CODE (e) == SSA_NAME)
 397     {
 398       gimple stmt;
 399       long rank;
 400       int i, n;
 401       tree op;
 402
 403       if (SSA_NAME_IS_DEFAULT_DEF (e))
 404         return find_operand_rank (e);
 405
 406       stmt = SSA_NAME_DEF_STMT (e);
 407       if (gimple_code (stmt) == GIMPLE_PHI)
 408         return phi_rank (stmt);
 409
 410       if (!is_gimple_assign (stmt)
 411           || gimple_vdef (stmt))
 412         return bb_rank[gimple_bb (stmt)->index];
 413
 414       /* If we already have a rank for this expression, use that.  */
 415       rank = find_operand_rank (e);
 416       if (rank != -1)
 417         return rank;
 418
 419       /* Otherwise, find the maximum rank for the operands.  As an
 420          exception, remove the bias from loop-carried phis when propagating
 421          the rank so that dependent operations are not also biased.  */
 422       rank = 0;
 423       if (gimple_assign_single_p (stmt))
 424         {
 425           tree rhs = gimple_assign_rhs1 (stmt);
 426           n = TREE_OPERAND_LENGTH (rhs);
 427           if (n == 0)
 428             rank = propagate_rank (rank, rhs);
 429           else
 430             {
 431               for (i = 0; i < n; i++)
 432                 {
 433                   op = TREE_OPERAND (rhs, i);
 434
 435                   if (op != NULL_TREE)
 436                     rank = propagate_rank (rank, op);
 437                 }
 438             }
 439         }
 440       else
 441         {
 442           n = gimple_num_ops (stmt);
 443           for (i = 1; i < n; i++)
 444             {
 445               op = gimple_op (stmt, i);
 446               gcc_assert (op);
 447               rank = propagate_rank (rank, op);
 448             }
 449         }
 450
 451       if (dump_file && (dump_flags & TDF_DETAILS))
 452         {
 453           fprintf (dump_file, "Rank for ");
 454           print_generic_expr (dump_file, e, 0);
 455           fprintf (dump_file, " is %ld\n", (rank + 1));
 456         }
 457
 458       /* Note the rank in the hashtable so we don't recompute it.  */
 459       insert_operand_rank (e, (rank + 1));
 460       return (rank + 1);
 461     }
 462
 463   /* Globals, etc,  are rank 0 */
 464   return 0;
 465 }
 466
 467
 468 /* We want integer ones to end up last no matter what, since they are
 469    the ones we can do the most with.  */
 470 #define INTEGER_CONST_TYPE 1 << 3
 471 #define FLOAT_CONST_TYPE 1 << 2
 472 #define OTHER_CONST_TYPE 1 << 1
 473
 474 /* Classify an invariant tree into integer, float, or other, so that
 475    we can sort them to be near other constants of the same type.  */
 476 static inline int
 477 constant_type (tree t)
 478 {
 479   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 480     return INTEGER_CONST_TYPE;
 481   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 482     return FLOAT_CONST_TYPE;
 483   else
 484     return OTHER_CONST_TYPE;
 485 }
 486
 487 /* qsort comparison function to sort operand entries PA and PB by rank
 488    so that the sorted array is ordered by rank in decreasing order.  */
 489 static int
 490 sort_by_operand_rank (const void *pa, const void *pb)
 491 {
 492   const operand_entry_t oea = *(const operand_entry_t *)pa;
 493   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 494
 495   /* It's nicer for optimize_expression if constants that are likely
 496      to fold when added/multiplied//whatever are put next to each
 497      other.  Since all constants have rank 0, order them by type.  */
 498   if (oeb->rank == 0 && oea->rank == 0)
 499     {
 500       if (constant_type (oeb->op) != constant_type (oea->op))
 501         return constant_type (oeb->op) - constant_type (oea->op);
 502       else
 503         /* To make sorting result stable, we use unique IDs to determine
 504            order.  */
 505         return oeb->id - oea->id;
 506     }
 507
 508   /* Lastly, make sure the versions that are the same go next to each
 509      other.  We use SSA_NAME_VERSION because it's stable.  */
 510   if ((oeb->rank - oea->rank == 0)
 511       && TREE_CODE (oea->op) == SSA_NAME
 512       && TREE_CODE (oeb->op) == SSA_NAME)
 513     {
 514       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 515         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 516       else
 517         return oeb->id - oea->id;
 518     }
 519
 520   if (oeb->rank != oea->rank)
 521     return oeb->rank - oea->rank;
 522   else
 523     return oeb->id - oea->id;
 524 }
 525
 526 /* Add an operand entry to *OPS for the tree operand OP.  */
 527
 528 static void
 529 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 530 {
 531   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 532
 533   oe->op = op;
 534   oe->rank = get_rank (op);
 535   oe->id = next_operand_entry_id++;
 536   oe->count = 1;
 537   ops->safe_push (oe);
 538 }
 539
 540 /* Add an operand entry to *OPS for the tree operand OP with repeat
 541    count REPEAT.  */
 542
 543 static void
 544 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 545                        HOST_WIDE_INT repeat)
 546 {
 547   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 548
 549   oe->op = op;
 550   oe->rank = get_rank (op);
 551   oe->id = next_operand_entry_id++;
 552   oe->count = repeat;
 553   ops->safe_push (oe);
 554
 555   reassociate_stats.pows_encountered++;
 556 }
 557
 558 /* Return true if STMT is reassociable operation containing a binary
 559    operation with tree code CODE, and is inside LOOP.  */
 560
 561 static bool
 562 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 563 {
 564   basic_block bb = gimple_bb (stmt);
 565
 566   if (gimple_bb (stmt) == NULL)
 567     return false;
 568
 569   if (!flow_bb_inside_loop_p (loop, bb))
 570     return false;
 571
 572   if (is_gimple_assign (stmt)
 573       && gimple_assign_rhs_code (stmt) == code
 574       && has_single_use (gimple_assign_lhs (stmt)))
 575     return true;
 576
 577   return false;
 578 }
 579
 580
 581 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 582    operand of the negate operation.  Otherwise, return NULL.  */
 583
 584 static tree
 585 get_unary_op (tree name, enum tree_code opcode)
 586 {
 587   gimple stmt = SSA_NAME_DEF_STMT (name);
 588
 589   if (!is_gimple_assign (stmt))
 590     return NULL_TREE;
 591
 592   if (gimple_assign_rhs_code (stmt) == opcode)
 593     return gimple_assign_rhs1 (stmt);
 594   return NULL_TREE;
 595 }
 596
 597 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 598    eliminate through equivalences, do so, remove them from OPS, and
 599    return true.  Otherwise, return false.  */
 600
 601 static bool
 602 eliminate_duplicate_pair (enum tree_code opcode,
 603                           vec<operand_entry_t> *ops,
 604                           bool *all_done,
 605                           unsigned int i,
 606                           operand_entry_t curr,
 607                           operand_entry_t last)
 608 {
 609
 610   /* If we have two of the same op, and the opcode is & |, min, or max,
 611      we can eliminate one of them.
 612      If we have two of the same op, and the opcode is ^, we can
 613      eliminate both of them.  */
 614
 615   if (last && last->op == curr->op)
 616     {
 617       switch (opcode)
 618         {
 619         case MAX_EXPR:
 620         case MIN_EXPR:
 621         case BIT_IOR_EXPR:
 622         case BIT_AND_EXPR:
 623           if (dump_file && (dump_flags & TDF_DETAILS))
 624             {
 625               fprintf (dump_file, "Equivalence: ");
 626               print_generic_expr (dump_file, curr->op, 0);
 627               fprintf (dump_file, " [&|minmax] ");
 628               print_generic_expr (dump_file, last->op, 0);
 629               fprintf (dump_file, " -> ");
 630               print_generic_stmt (dump_file, last->op, 0);
 631             }
 632
 633           ops->ordered_remove (i);
 634           reassociate_stats.ops_eliminated ++;
 635
 636           return true;
 637
 638         case BIT_XOR_EXPR:
 639           if (dump_file && (dump_flags & TDF_DETAILS))
 640             {
 641               fprintf (dump_file, "Equivalence: ");
 642               print_generic_expr (dump_file, curr->op, 0);
 643               fprintf (dump_file, " ^ ");
 644               print_generic_expr (dump_file, last->op, 0);
 645               fprintf (dump_file, " -> nothing\n");
 646             }
 647
 648           reassociate_stats.ops_eliminated += 2;
 649
 650           if (ops->length () == 2)
 651             {
 652               ops->create (0);
 653               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 654               *all_done = true;
 655             }
 656           else
 657             {
 658               ops->ordered_remove (i-1);
 659               ops->ordered_remove (i-1);
 660             }
 661
 662           return true;
 663
 664         default:
 665           break;
 666         }
 667     }
 668   return false;
 669 }
 670
 671 static vec<tree> plus_negates;
 672
 673 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 674    expression, look in OPS for a corresponding positive operation to cancel
 675    it out.  If we find one, remove the other from OPS, replace
 676    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 677    return false. */
 678
 679 static bool
 680 eliminate_plus_minus_pair (enum tree_code opcode,
 681                            vec<operand_entry_t> *ops,
 682                            unsigned int currindex,
 683                            operand_entry_t curr)
 684 {
 685   tree negateop;
 686   tree notop;
 687   unsigned int i;
 688   operand_entry_t oe;
 689
 690   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 691     return false;
 692
 693   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 694   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 695   if (negateop == NULL_TREE && notop == NULL_TREE)
 696     return false;
 697
 698   /* Any non-negated version will have a rank that is one less than
 699      the current rank.  So once we hit those ranks, if we don't find
 700      one, we can stop.  */
 701
 702   for (i = currindex + 1;
 703        ops->iterate (i, &oe)
 704        && oe->rank >= curr->rank - 1 ;
 705        i++)
 706     {
 707       if (oe->op == negateop)
 708         {
 709
 710           if (dump_file && (dump_flags & TDF_DETAILS))
 711             {
 712               fprintf (dump_file, "Equivalence: ");
 713               print_generic_expr (dump_file, negateop, 0);
 714               fprintf (dump_file, " + -");
 715               print_generic_expr (dump_file, oe->op, 0);
 716               fprintf (dump_file, " -> 0\n");
 717             }
 718
 719           ops->ordered_remove (i);
 720           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 721           ops->ordered_remove (currindex);
 722           reassociate_stats.ops_eliminated ++;
 723
 724           return true;
 725         }
 726       else if (oe->op == notop)
 727         {
 728           tree op_type = TREE_TYPE (oe->op);
 729
 730           if (dump_file && (dump_flags & TDF_DETAILS))
 731             {
 732               fprintf (dump_file, "Equivalence: ");
 733               print_generic_expr (dump_file, notop, 0);
 734               fprintf (dump_file, " + ~");
 735               print_generic_expr (dump_file, oe->op, 0);
 736               fprintf (dump_file, " -> -1\n");
 737             }
 738
 739           ops->ordered_remove (i);
 740           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 741           ops->ordered_remove (currindex);
 742           reassociate_stats.ops_eliminated ++;
 743
 744           return true;
 745         }
 746     }
 747
 748   /* CURR->OP is a negate expr in a plus expr: save it for later
 749      inspection in repropagate_negates().  */
 750   if (negateop != NULL_TREE)
 751     plus_negates.safe_push (curr->op);
 752
 753   return false;
 754 }
 755
 756 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 757    bitwise not expression, look in OPS for a corresponding operand to
 758    cancel it out.  If we find one, remove the other from OPS, replace
 759    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 760    false. */
 761
 762 static bool
 763 eliminate_not_pairs (enum tree_code opcode,
 764                      vec<operand_entry_t> *ops,
 765                      unsigned int currindex,
 766                      operand_entry_t curr)
 767 {
 768   tree notop;
 769   unsigned int i;
 770   operand_entry_t oe;
 771
 772   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 773       || TREE_CODE (curr->op) != SSA_NAME)
 774     return false;
 775
 776   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 777   if (notop == NULL_TREE)
 778     return false;
 779
 780   /* Any non-not version will have a rank that is one less than
 781      the current rank.  So once we hit those ranks, if we don't find
 782      one, we can stop.  */
 783
 784   for (i = currindex + 1;
 785        ops->iterate (i, &oe)
 786        && oe->rank >= curr->rank - 1;
 787        i++)
 788     {
 789       if (oe->op == notop)
 790         {
 791           if (dump_file && (dump_flags & TDF_DETAILS))
 792             {
 793               fprintf (dump_file, "Equivalence: ");
 794               print_generic_expr (dump_file, notop, 0);
 795               if (opcode == BIT_AND_EXPR)
 796                 fprintf (dump_file, " & ~");
 797               else if (opcode == BIT_IOR_EXPR)
 798                 fprintf (dump_file, " | ~");
 799               print_generic_expr (dump_file, oe->op, 0);
 800               if (opcode == BIT_AND_EXPR)
 801                 fprintf (dump_file, " -> 0\n");
 802               else if (opcode == BIT_IOR_EXPR)
 803                 fprintf (dump_file, " -> -1\n");
 804             }
 805
 806           if (opcode == BIT_AND_EXPR)
 807             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 808           else if (opcode == BIT_IOR_EXPR)
 809             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 810                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 811
 812           reassociate_stats.ops_eliminated += ops->length () - 1;
 813           ops->truncate (0);
 814           ops->quick_push (oe);
 815           return true;
 816         }
 817     }
 818
 819   return false;
 820 }
 821
 822 /* Use constant value that may be present in OPS to try to eliminate
 823    operands.  Note that this function is only really used when we've
 824    eliminated ops for other reasons, or merged constants.  Across
 825    single statements, fold already does all of this, plus more.  There
 826    is little point in duplicating logic, so I've only included the
 827    identities that I could ever construct testcases to trigger.  */
 828
 829 static void
 830 eliminate_using_constants (enum tree_code opcode,
 831                            vec<operand_entry_t> *ops)
 832 {
 833   operand_entry_t oelast = ops->last ();
 834   tree type = TREE_TYPE (oelast->op);
 835
 836   if (oelast->rank == 0
 837       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 838     {
 839       switch (opcode)
 840         {
 841         case BIT_AND_EXPR:
 842           if (integer_zerop (oelast->op))
 843             {
 844               if (ops->length () != 1)
 845                 {
 846                   if (dump_file && (dump_flags & TDF_DETAILS))
 847                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 848
 849                   reassociate_stats.ops_eliminated += ops->length () - 1;
 850
 851                   ops->truncate (0);
 852                   ops->quick_push (oelast);
 853                   return;
 854                 }
 855             }
 856           else if (integer_all_onesp (oelast->op))
 857             {
 858               if (ops->length () != 1)
 859                 {
 860                   if (dump_file && (dump_flags & TDF_DETAILS))
 861                     fprintf (dump_file, "Found & -1, removing\n");
 862                   ops->pop ();
 863                   reassociate_stats.ops_eliminated++;
 864                 }
 865             }
 866           break;
 867         case BIT_IOR_EXPR:
 868           if (integer_all_onesp (oelast->op))
 869             {
 870               if (ops->length () != 1)
 871                 {
 872                   if (dump_file && (dump_flags & TDF_DETAILS))
 873                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 874
 875                   reassociate_stats.ops_eliminated += ops->length () - 1;
 876
 877                   ops->truncate (0);
 878                   ops->quick_push (oelast);
 879                   return;
 880                 }
 881             }
 882           else if (integer_zerop (oelast->op))
 883             {
 884               if (ops->length () != 1)
 885                 {
 886                   if (dump_file && (dump_flags & TDF_DETAILS))
 887                     fprintf (dump_file, "Found | 0, removing\n");
 888                   ops->pop ();
 889                   reassociate_stats.ops_eliminated++;
 890                 }
 891             }
 892           break;
 893         case MULT_EXPR:
 894           if (integer_zerop (oelast->op)
 895               || (FLOAT_TYPE_P (type)
 896                   && !HONOR_NANS (TYPE_MODE (type))
 897                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 898                   && real_zerop (oelast->op)))
 899             {
 900               if (ops->length () != 1)
 901                 {
 902                   if (dump_file && (dump_flags & TDF_DETAILS))
 903                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 904
 905                   reassociate_stats.ops_eliminated += ops->length () - 1;
 906                   ops->truncate (1);
 907                   ops->quick_push (oelast);
 908                   return;
 909                 }
 910             }
 911           else if (integer_onep (oelast->op)
 912                    || (FLOAT_TYPE_P (type)
 913                        && !HONOR_SNANS (TYPE_MODE (type))
 914                        && real_onep (oelast->op)))
 915             {
 916               if (ops->length () != 1)
 917                 {
 918                   if (dump_file && (dump_flags & TDF_DETAILS))
 919                     fprintf (dump_file, "Found * 1, removing\n");
 920                   ops->pop ();
 921                   reassociate_stats.ops_eliminated++;
 922                   return;
 923                 }
 924             }
 925           break;
 926         case BIT_XOR_EXPR:
 927         case PLUS_EXPR:
 928         case MINUS_EXPR:
 929           if (integer_zerop (oelast->op)
 930               || (FLOAT_TYPE_P (type)
 931                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 932                   && fold_real_zero_addition_p (type, oelast->op,
 933                                                 opcode == MINUS_EXPR)))
 934             {
 935               if (ops->length () != 1)
 936                 {
 937                   if (dump_file && (dump_flags & TDF_DETAILS))
 938                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 939                   ops->pop ();
 940                   reassociate_stats.ops_eliminated++;
 941                   return;
 942                 }
 943             }
 944           break;
 945         default:
 946           break;
 947         }
 948     }
 949 }
 950
 951
 952 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 953                                  bool, bool);
 954
 955 /* Structure for tracking and counting operands.  */
 956 typedef struct oecount_s {
 957   int cnt;
 958   int id;
 959   enum tree_code oecode;
 960   tree op;
 961 } oecount;
 962
 963
 964 /* The heap for the oecount hashtable and the sorted list of operands.  */
 965 static vec<oecount> cvec;
 966
 967
 968 /* Oecount hashtable helpers.  */
 969
 970 struct oecount_hasher : typed_noop_remove <void>
 971 {
 972   /* Note that this hash table stores integers, not pointers.
 973      So, observe the casting in the member functions.  */
 974   typedef void value_type;
 975   typedef void compare_type;
 976   static inline hashval_t hash (const value_type *);
 977   static inline bool equal (const value_type *, const compare_type *);
 978 };
 979
 980 /* Hash function for oecount.  */
 981
 982 inline hashval_t
 983 oecount_hasher::hash (const value_type *p)
 984 {
 985   const oecount *c = &cvec[(size_t)p - 42];
 986   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 987 }
 988
 989 /* Comparison function for oecount.  */
 990
 991 inline bool
 992 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 993 {
 994   const oecount *c1 = &cvec[(size_t)p1 - 42];
 995   const oecount *c2 = &cvec[(size_t)p2 - 42];
 996   return (c1->oecode == c2->oecode
 997           && c1->op == c2->op);
 998 }
 999
1000 /* Comparison function for qsort sorting oecount elements by count.  */
1001
1002 static int
1003 oecount_cmp (const void *p1, const void *p2)
1004 {
1005   const oecount *c1 = (const oecount *)p1;
1006   const oecount *c2 = (const oecount *)p2;
1007   if (c1->cnt != c2->cnt)
1008     return c1->cnt - c2->cnt;
1009   else
1010     /* If counts are identical, use unique IDs to stabilize qsort.  */
1011     return c1->id - c2->id;
1012 }
1013
1014 /* Return TRUE iff STMT represents a builtin call that raises OP
1015    to some exponent.  */
1016
1017 static bool
1018 stmt_is_power_of_op (gimple stmt, tree op)
1019 {
1020   tree fndecl;
1021
1022   if (!is_gimple_call (stmt))
1023     return false;
1024
1025   fndecl = gimple_call_fndecl (stmt);
1026
1027   if (!fndecl
1028       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1029     return false;
1030
1031   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1032     {
1033     CASE_FLT_FN (BUILT_IN_POW):
1034     CASE_FLT_FN (BUILT_IN_POWI):
1035       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1036
1037     default:
1038       return false;
1039     }
1040 }
1041
1042 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1043    in place and return the result.  Assumes that stmt_is_power_of_op
1044    was previously called for STMT and returned TRUE.  */
1045
1046 static HOST_WIDE_INT
1047 decrement_power (gimple stmt)
1048 {
1049   REAL_VALUE_TYPE c, cint;
1050   HOST_WIDE_INT power;
1051   tree arg1;
1052
1053   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1054     {
1055     CASE_FLT_FN (BUILT_IN_POW):
1056       arg1 = gimple_call_arg (stmt, 1);
1057       c = TREE_REAL_CST (arg1);
1058       power = real_to_integer (&c) - 1;
1059       real_from_integer (&cint, VOIDmode, power, 0, 0);
1060       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1061       return power;
1062
1063     CASE_FLT_FN (BUILT_IN_POWI):
1064       arg1 = gimple_call_arg (stmt, 1);
1065       power = TREE_INT_CST_LOW (arg1) - 1;
1066       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1067       return power;
1068
1069     default:
1070       gcc_unreachable ();
1071     }
1072 }
1073
1074 /* Find the single immediate use of STMT's LHS, and replace it
1075    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1076    replace *DEF with OP as well.  */
1077
1078 static void
1079 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1080 {
1081   tree lhs;
1082   gimple use_stmt;
1083   use_operand_p use;
1084   gimple_stmt_iterator gsi;
1085
1086   if (is_gimple_call (stmt))
1087     lhs = gimple_call_lhs (stmt);
1088   else
1089     lhs = gimple_assign_lhs (stmt);
1090
1091   gcc_assert (has_single_use (lhs));
1092   single_imm_use (lhs, &use, &use_stmt);
1093   if (lhs == *def)
1094     *def = op;
1095   SET_USE (use, op);
1096   if (TREE_CODE (op) != SSA_NAME)
1097     update_stmt (use_stmt);
1098   gsi = gsi_for_stmt (stmt);
1099   unlink_stmt_vdef (stmt);
1100   gsi_remove (&gsi, true);
1101   release_defs (stmt);
1102 }
1103
1104 /* Walks the linear chain with result *DEF searching for an operation
1105    with operand OP and code OPCODE removing that from the chain.  *DEF
1106    is updated if there is only one operand but no operation left.  */
1107
1108 static void
1109 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1110 {
1111   gimple stmt = SSA_NAME_DEF_STMT (*def);
1112
1113   do
1114     {
1115       tree name;
1116
1117       if (opcode == MULT_EXPR
1118           && stmt_is_power_of_op (stmt, op))
1119         {
1120           if (decrement_power (stmt) == 1)
1121             propagate_op_to_single_use (op, stmt, def);
1122           return;
1123         }
1124
1125       name = gimple_assign_rhs1 (stmt);
1126
1127       /* If this is the operation we look for and one of the operands
1128          is ours simply propagate the other operand into the stmts
1129          single use.  */
1130       if (gimple_assign_rhs_code (stmt) == opcode
1131           && (name == op
1132               || gimple_assign_rhs2 (stmt) == op))
1133         {
1134           if (name == op)
1135             name = gimple_assign_rhs2 (stmt);
1136           propagate_op_to_single_use (name, stmt, def);
1137           return;
1138         }
1139
1140       /* We might have a multiply of two __builtin_pow* calls, and
1141          the operand might be hiding in the rightmost one.  */
1142       if (opcode == MULT_EXPR
1143           && gimple_assign_rhs_code (stmt) == opcode
1144           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1145           && has_single_use (gimple_assign_rhs2 (stmt)))
1146         {
1147           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1148           if (stmt_is_power_of_op (stmt2, op))
1149             {
1150               if (decrement_power (stmt2) == 1)
1151                 propagate_op_to_single_use (op, stmt2, def);
1152               return;
1153             }
1154         }
1155
1156       /* Continue walking the chain.  */
1157       gcc_assert (name != op
1158                   && TREE_CODE (name) == SSA_NAME);
1159       stmt = SSA_NAME_DEF_STMT (name);
1160     }
1161   while (1);
1162 }
1163
1164 /* Returns true if statement S1 dominates statement S2.  Like
1165    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1166
1167 static bool
1168 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1169 {
1170   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1171
1172   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1173      SSA_NAME.  Assume it lives at the beginning of function and
1174      thus dominates everything.  */
1175   if (!bb1 || s1 == s2)
1176     return true;
1177
1178   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1179   if (!bb2)
1180     return false;
1181
1182   if (bb1 == bb2)
1183     {
1184       /* PHIs in the same basic block are assumed to be
1185          executed all in parallel, if only one stmt is a PHI,
1186          it dominates the other stmt in the same basic block.  */
1187       if (gimple_code (s1) == GIMPLE_PHI)
1188         return true;
1189
1190       if (gimple_code (s2) == GIMPLE_PHI)
1191         return false;
1192
1193       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1194
1195       if (gimple_uid (s1) < gimple_uid (s2))
1196         return true;
1197
1198       if (gimple_uid (s1) > gimple_uid (s2))
1199         return false;
1200
1201       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1202       unsigned int uid = gimple_uid (s1);
1203       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1204         {
1205           gimple s = gsi_stmt (gsi);
1206           if (gimple_uid (s) != uid)
1207             break;
1208           if (s == s2)
1209             return true;
1210         }
1211
1212       return false;
1213     }
1214
1215   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1216 }
1217
1218 /* Insert STMT after INSERT_POINT.  */
1219
1220 static void
1221 insert_stmt_after (gimple stmt, gimple insert_point)
1222 {
1223   gimple_stmt_iterator gsi;
1224   basic_block bb;
1225
1226   if (gimple_code (insert_point) == GIMPLE_PHI)
1227     bb = gimple_bb (insert_point);
1228   else if (!stmt_ends_bb_p (insert_point))
1229     {
1230       gsi = gsi_for_stmt (insert_point);
1231       gimple_set_uid (stmt, gimple_uid (insert_point));
1232       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1233       return;
1234     }
1235   else
1236     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1237        thus if it must end a basic block, it should be a call that can
1238        throw, or some assignment that can throw.  If it throws, the LHS
1239        of it will not be initialized though, so only valid places using
1240        the SSA_NAME should be dominated by the fallthru edge.  */
1241     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1242   gsi = gsi_after_labels (bb);
1243   if (gsi_end_p (gsi))
1244     {
1245       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1246       gimple_set_uid (stmt,
1247                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1248     }
1249   else
1250     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1251   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1252 }
1253
1254 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1255    the result.  Places the statement after the definition of either
1256    OP1 or OP2.  Returns the new statement.  */
1257
1258 static gimple
1259 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1260 {
1261   gimple op1def = NULL, op2def = NULL;
1262   gimple_stmt_iterator gsi;
1263   tree op;
1264   gimple sum;
1265
1266   /* Create the addition statement.  */
1267   op = make_ssa_name (type, NULL);
1268   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1269
1270   /* Find an insertion place and insert.  */
1271   if (TREE_CODE (op1) == SSA_NAME)
1272     op1def = SSA_NAME_DEF_STMT (op1);
1273   if (TREE_CODE (op2) == SSA_NAME)
1274     op2def = SSA_NAME_DEF_STMT (op2);
1275   if ((!op1def || gimple_nop_p (op1def))
1276       && (!op2def || gimple_nop_p (op2def)))
1277     {
1278       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1279       if (gsi_end_p (gsi))
1280         {
1281           gimple_stmt_iterator gsi2
1282             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1283           gimple_set_uid (sum,
1284                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1285         }
1286       else
1287         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1288       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1289     }
1290   else
1291     {
1292       gimple insert_point;
1293       if ((!op1def || gimple_nop_p (op1def))
1294            || (op2def && !gimple_nop_p (op2def)
1295                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1296         insert_point = op2def;
1297       else
1298         insert_point = op1def;
1299       insert_stmt_after (sum, insert_point);
1300     }
1301   update_stmt (sum);
1302
1303   return sum;
1304 }
1305
1306 /* Perform un-distribution of divisions and multiplications.
1307    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1308    to (A + B) / X for real X.
1309
1310    The algorithm is organized as follows.
1311
1312     - First we walk the addition chain *OPS looking for summands that
1313       are defined by a multiplication or a real division.  This results
1314       in the candidates bitmap with relevant indices into *OPS.
1315
1316     - Second we build the chains of multiplications or divisions for
1317       these candidates, counting the number of occurrences of (operand, code)
1318       pairs in all of the candidates chains.
1319
1320     - Third we sort the (operand, code) pairs by number of occurrence and
1321       process them starting with the pair with the most uses.
1322
1323       * For each such pair we walk the candidates again to build a
1324         second candidate bitmap noting all multiplication/division chains
1325         that have at least one occurrence of (operand, code).
1326
1327       * We build an alternate addition chain only covering these
1328         candidates with one (operand, code) operation removed from their
1329         multiplication/division chain.
1330
1331       * The first candidate gets replaced by the alternate addition chain
1332         multiplied/divided by the operand.
1333
1334       * All candidate chains get disabled for further processing and
1335         processing of (operand, code) pairs continues.
1336
1337   The alternate addition chains built are re-processed by the main
1338   reassociation algorithm which allows optimizing a * x * y + b * y * x
1339   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1340
1341 static bool
1342 undistribute_ops_list (enum tree_code opcode,
1343                        vec<operand_entry_t> *ops, struct loop *loop)
1344 {
1345   unsigned int length = ops->length ();
1346   operand_entry_t oe1;
1347   unsigned i, j;
1348   sbitmap candidates, candidates2;
1349   unsigned nr_candidates, nr_candidates2;
1350   sbitmap_iterator sbi0;
1351   vec<operand_entry_t> *subops;
1352   hash_table <oecount_hasher> ctable;
1353   bool changed = false;
1354   int next_oecount_id = 0;
1355
1356   if (length <= 1
1357       || opcode != PLUS_EXPR)
1358     return false;
1359
1360   /* Build a list of candidates to process.  */
1361   candidates = sbitmap_alloc (length);
1362   bitmap_clear (candidates);
1363   nr_candidates = 0;
1364   FOR_EACH_VEC_ELT (*ops, i, oe1)
1365     {
1366       enum tree_code dcode;
1367       gimple oe1def;
1368
1369       if (TREE_CODE (oe1->op) != SSA_NAME)
1370         continue;
1371       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1372       if (!is_gimple_assign (oe1def))
1373         continue;
1374       dcode = gimple_assign_rhs_code (oe1def);
1375       if ((dcode != MULT_EXPR
1376            && dcode != RDIV_EXPR)
1377           || !is_reassociable_op (oe1def, dcode, loop))
1378         continue;
1379
1380       bitmap_set_bit (candidates, i);
1381       nr_candidates++;
1382     }
1383
1384   if (nr_candidates < 2)
1385     {
1386       sbitmap_free (candidates);
1387       return false;
1388     }
1389
1390   if (dump_file && (dump_flags & TDF_DETAILS))
1391     {
1392       fprintf (dump_file, "searching for un-distribute opportunities ");
1393       print_generic_expr (dump_file,
1394         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1395       fprintf (dump_file, " %d\n", nr_candidates);
1396     }
1397
1398   /* Build linearized sub-operand lists and the counting table.  */
1399   cvec.create (0);
1400   ctable.create (15);
1401   /* ??? Macro arguments cannot have multi-argument template types in
1402      them.  This typedef is needed to workaround that limitation.  */
1403   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1404   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1405   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1406     {
1407       gimple oedef;
1408       enum tree_code oecode;
1409       unsigned j;
1410
1411       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1412       oecode = gimple_assign_rhs_code (oedef);
1413       linearize_expr_tree (&subops[i], oedef,
1414                            associative_tree_code (oecode), false);
1415
1416       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1417         {
1418           oecount c;
1419           void **slot;
1420           size_t idx;
1421           c.oecode = oecode;
1422           c.cnt = 1;
1423           c.id = next_oecount_id++;
1424           c.op = oe1->op;
1425           cvec.safe_push (c);
1426           idx = cvec.length () + 41;
1427           slot = ctable.find_slot ((void *)idx, INSERT);
1428           if (!*slot)
1429             {
1430               *slot = (void *)idx;
1431             }
1432           else
1433             {
1434               cvec.pop ();
1435               cvec[(size_t)*slot - 42].cnt++;
1436             }
1437         }
1438     }
1439   ctable.dispose ();
1440
1441   /* Sort the counting table.  */
1442   cvec.qsort (oecount_cmp);
1443
1444   if (dump_file && (dump_flags & TDF_DETAILS))
1445     {
1446       oecount *c;
1447       fprintf (dump_file, "Candidates:\n");
1448       FOR_EACH_VEC_ELT (cvec, j, c)
1449         {
1450           fprintf (dump_file, "  %u %s: ", c->cnt,
1451                    c->oecode == MULT_EXPR
1452                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1453           print_generic_expr (dump_file, c->op, 0);
1454           fprintf (dump_file, "\n");
1455         }
1456     }
1457
1458   /* Process the (operand, code) pairs in order of most occurrence.  */
1459   candidates2 = sbitmap_alloc (length);
1460   while (!cvec.is_empty ())
1461     {
1462       oecount *c = &cvec.last ();
1463       if (c->cnt < 2)
1464         break;
1465
1466       /* Now collect the operands in the outer chain that contain
1467          the common operand in their inner chain.  */
1468       bitmap_clear (candidates2);
1469       nr_candidates2 = 0;
1470       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1471         {
1472           gimple oedef;
1473           enum tree_code oecode;
1474           unsigned j;
1475           tree op = (*ops)[i]->op;
1476
1477           /* If we undistributed in this chain already this may be
1478              a constant.  */
1479           if (TREE_CODE (op) != SSA_NAME)
1480             continue;
1481
1482           oedef = SSA_NAME_DEF_STMT (op);
1483           oecode = gimple_assign_rhs_code (oedef);
1484           if (oecode != c->oecode)
1485             continue;
1486
1487           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1488             {
1489               if (oe1->op == c->op)
1490                 {
1491                   bitmap_set_bit (candidates2, i);
1492                   ++nr_candidates2;
1493                   break;
1494                 }
1495             }
1496         }
1497
1498       if (nr_candidates2 >= 2)
1499         {
1500           operand_entry_t oe1, oe2;
1501           gimple prod;
1502           int first = bitmap_first_set_bit (candidates2);
1503
1504           /* Build the new addition chain.  */
1505           oe1 = (*ops)[first];
1506           if (dump_file && (dump_flags & TDF_DETAILS))
1507             {
1508               fprintf (dump_file, "Building (");
1509               print_generic_expr (dump_file, oe1->op, 0);
1510             }
1511           zero_one_operation (&oe1->op, c->oecode, c->op);
1512           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1513             {
1514               gimple sum;
1515               oe2 = (*ops)[i];
1516               if (dump_file && (dump_flags & TDF_DETAILS))
1517                 {
1518                   fprintf (dump_file, " + ");
1519                   print_generic_expr (dump_file, oe2->op, 0);
1520                 }
1521               zero_one_operation (&oe2->op, c->oecode, c->op);
1522               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1523                                        oe1->op, oe2->op, opcode);
1524               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1525               oe2->rank = 0;
1526               oe1->op = gimple_get_lhs (sum);
1527             }
1528
1529           /* Apply the multiplication/division.  */
1530           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1531                                     oe1->op, c->op, c->oecode);
1532           if (dump_file && (dump_flags & TDF_DETAILS))
1533             {
1534               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1535               print_generic_expr (dump_file, c->op, 0);
1536               fprintf (dump_file, "\n");
1537             }
1538
1539           /* Record it in the addition chain and disable further
1540              undistribution with this op.  */
1541           oe1->op = gimple_assign_lhs (prod);
1542           oe1->rank = get_rank (oe1->op);
1543           subops[first].release ();
1544
1545           changed = true;
1546         }
1547
1548       cvec.pop ();
1549     }
1550
1551   for (i = 0; i < ops->length (); ++i)
1552     subops[i].release ();
1553   free (subops);
1554   cvec.release ();
1555   sbitmap_free (candidates);
1556   sbitmap_free (candidates2);
1557
1558   return changed;
1559 }
1560
1561 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1562    expression, examine the other OPS to see if any of them are comparisons
1563    of the same values, which we may be able to combine or eliminate.
1564    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1565
1566 static bool
1567 eliminate_redundant_comparison (enum tree_code opcode,
1568                                 vec<operand_entry_t> *ops,
1569                                 unsigned int currindex,
1570                                 operand_entry_t curr)
1571 {
1572   tree op1, op2;
1573   enum tree_code lcode, rcode;
1574   gimple def1, def2;
1575   int i;
1576   operand_entry_t oe;
1577
1578   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1579     return false;
1580
1581   /* Check that CURR is a comparison.  */
1582   if (TREE_CODE (curr->op) != SSA_NAME)
1583     return false;
1584   def1 = SSA_NAME_DEF_STMT (curr->op);
1585   if (!is_gimple_assign (def1))
1586     return false;
1587   lcode = gimple_assign_rhs_code (def1);
1588   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1589     return false;
1590   op1 = gimple_assign_rhs1 (def1);
1591   op2 = gimple_assign_rhs2 (def1);
1592
1593   /* Now look for a similar comparison in the remaining OPS.  */
1594   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1595     {
1596       tree t;
1597
1598       if (TREE_CODE (oe->op) != SSA_NAME)
1599         continue;
1600       def2 = SSA_NAME_DEF_STMT (oe->op);
1601       if (!is_gimple_assign (def2))
1602         continue;
1603       rcode = gimple_assign_rhs_code (def2);
1604       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1605         continue;
1606
1607       /* If we got here, we have a match.  See if we can combine the
1608          two comparisons.  */
1609       if (opcode == BIT_IOR_EXPR)
1610         t = maybe_fold_or_comparisons (lcode, op1, op2,
1611                                        rcode, gimple_assign_rhs1 (def2),
1612                                        gimple_assign_rhs2 (def2));
1613       else
1614         t = maybe_fold_and_comparisons (lcode, op1, op2,
1615                                         rcode, gimple_assign_rhs1 (def2),
1616                                         gimple_assign_rhs2 (def2));
1617       if (!t)
1618         continue;
1619
1620       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1621          always give us a boolean_type_node value back.  If the original
1622          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1623          we need to convert.  */
1624       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1625         t = fold_convert (TREE_TYPE (curr->op), t);
1626
1627       if (TREE_CODE (t) != INTEGER_CST
1628           && !operand_equal_p (t, curr->op, 0))
1629         {
1630           enum tree_code subcode;
1631           tree newop1, newop2;
1632           if (!COMPARISON_CLASS_P (t))
1633             continue;
1634           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1635           STRIP_USELESS_TYPE_CONVERSION (newop1);
1636           STRIP_USELESS_TYPE_CONVERSION (newop2);
1637           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1638             continue;
1639         }
1640
1641       if (dump_file && (dump_flags & TDF_DETAILS))
1642         {
1643           fprintf (dump_file, "Equivalence: ");
1644           print_generic_expr (dump_file, curr->op, 0);
1645           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1646           print_generic_expr (dump_file, oe->op, 0);
1647           fprintf (dump_file, " -> ");
1648           print_generic_expr (dump_file, t, 0);
1649           fprintf (dump_file, "\n");
1650         }
1651
1652       /* Now we can delete oe, as it has been subsumed by the new combined
1653          expression t.  */
1654       ops->ordered_remove (i);
1655       reassociate_stats.ops_eliminated ++;
1656
1657       /* If t is the same as curr->op, we're done.  Otherwise we must
1658          replace curr->op with t.  Special case is if we got a constant
1659          back, in which case we add it to the end instead of in place of
1660          the current entry.  */
1661       if (TREE_CODE (t) == INTEGER_CST)
1662         {
1663           ops->ordered_remove (currindex);
1664           add_to_ops_vec (ops, t);
1665         }
1666       else if (!operand_equal_p (t, curr->op, 0))
1667         {
1668           gimple sum;
1669           enum tree_code subcode;
1670           tree newop1;
1671           tree newop2;
1672           gcc_assert (COMPARISON_CLASS_P (t));
1673           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1674           STRIP_USELESS_TYPE_CONVERSION (newop1);
1675           STRIP_USELESS_TYPE_CONVERSION (newop2);
1676           gcc_checking_assert (is_gimple_val (newop1)
1677                                && is_gimple_val (newop2));
1678           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1679           curr->op = gimple_get_lhs (sum);
1680         }
1681       return true;
1682     }
1683
1684   return false;
1685 }
1686
1687 /* Perform various identities and other optimizations on the list of
1688    operand entries, stored in OPS.  The tree code for the binary
1689    operation between all the operands is OPCODE.  */
1690
1691 static void
1692 optimize_ops_list (enum tree_code opcode,
1693                    vec<operand_entry_t> *ops)
1694 {
1695   unsigned int length = ops->length ();
1696   unsigned int i;
1697   operand_entry_t oe;
1698   operand_entry_t oelast = NULL;
1699   bool iterate = false;
1700
1701   if (length == 1)
1702     return;
1703
1704   oelast = ops->last ();
1705
1706   /* If the last two are constants, pop the constants off, merge them
1707      and try the next two.  */
1708   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1709     {
1710       operand_entry_t oelm1 = (*ops)[length - 2];
1711
1712       if (oelm1->rank == 0
1713           && is_gimple_min_invariant (oelm1->op)
1714           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1715                                        TREE_TYPE (oelast->op)))
1716         {
1717           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1718                                      oelm1->op, oelast->op);
1719
1720           if (folded && is_gimple_min_invariant (folded))
1721             {
1722               if (dump_file && (dump_flags & TDF_DETAILS))
1723                 fprintf (dump_file, "Merging constants\n");
1724
1725               ops->pop ();
1726               ops->pop ();
1727
1728               add_to_ops_vec (ops, folded);
1729               reassociate_stats.constants_eliminated++;
1730
1731               optimize_ops_list (opcode, ops);
1732               return;
1733             }
1734         }
1735     }
1736
1737   eliminate_using_constants (opcode, ops);
1738   oelast = NULL;
1739
1740   for (i = 0; ops->iterate (i, &oe);)
1741     {
1742       bool done = false;
1743
1744       if (eliminate_not_pairs (opcode, ops, i, oe))
1745         return;
1746       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1747           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1748           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1749         {
1750           if (done)
1751             return;
1752           iterate = true;
1753           oelast = NULL;
1754           continue;
1755         }
1756       oelast = oe;
1757       i++;
1758     }
1759
1760   length = ops->length ();
1761   oelast = ops->last ();
1762
1763   if (iterate)
1764     optimize_ops_list (opcode, ops);
1765 }
1766
1767 /* The following functions are subroutines to optimize_range_tests and allow
1768    it to try to change a logical combination of comparisons into a range
1769    test.
1770
1771    For example, both
1772         X == 2 || X == 5 || X == 3 || X == 4
1773    and
1774         X >= 2 && X <= 5
1775    are converted to
1776         (unsigned) (X - 2) <= 3
1777
1778    For more information see comments above fold_test_range in fold-const.c,
1779    this implementation is for GIMPLE.  */
1780
1781 struct range_entry
1782 {
1783   tree exp;
1784   tree low;
1785   tree high;
1786   bool in_p;
1787   bool strict_overflow_p;
1788   unsigned int idx, next;
1789 };
1790
1791 /* This is similar to make_range in fold-const.c, but on top of
1792    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1793    an SSA_NAME and STMT argument is ignored, otherwise STMT
1794    argument should be a GIMPLE_COND.  */
1795
1796 static void
1797 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1798 {
1799   int in_p;
1800   tree low, high;
1801   bool is_bool, strict_overflow_p;
1802
1803   r->exp = NULL_TREE;
1804   r->in_p = false;
1805   r->strict_overflow_p = false;
1806   r->low = NULL_TREE;
1807   r->high = NULL_TREE;
1808   if (exp != NULL_TREE
1809       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1810     return;
1811
1812   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1813      and see if we can refine the range.  Some of the cases below may not
1814      happen, but it doesn't seem worth worrying about this.  We "continue"
1815      the outer loop when we've changed something; otherwise we "break"
1816      the switch, which will "break" the while.  */
1817   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1818   high = low;
1819   in_p = 0;
1820   strict_overflow_p = false;
1821   is_bool = false;
1822   if (exp == NULL_TREE)
1823     is_bool = true;
1824   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1825     {
1826       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1827         is_bool = true;
1828       else
1829         return;
1830     }
1831   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1832     is_bool = true;
1833
1834   while (1)
1835     {
1836       enum tree_code code;
1837       tree arg0, arg1, exp_type;
1838       tree nexp;
1839       location_t loc;
1840
1841       if (exp != NULL_TREE)
1842         {
1843           if (TREE_CODE (exp) != SSA_NAME)
1844             break;
1845
1846           stmt = SSA_NAME_DEF_STMT (exp);
1847           if (!is_gimple_assign (stmt))
1848             break;
1849
1850           code = gimple_assign_rhs_code (stmt);
1851           arg0 = gimple_assign_rhs1 (stmt);
1852           arg1 = gimple_assign_rhs2 (stmt);
1853           exp_type = TREE_TYPE (exp);
1854         }
1855       else
1856         {
1857           code = gimple_cond_code (stmt);
1858           arg0 = gimple_cond_lhs (stmt);
1859           arg1 = gimple_cond_rhs (stmt);
1860           exp_type = boolean_type_node;
1861         }
1862
1863       if (TREE_CODE (arg0) != SSA_NAME)
1864         break;
1865       loc = gimple_location (stmt);
1866       switch (code)
1867         {
1868         case BIT_NOT_EXPR:
1869           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1870               /* Ensure the range is either +[-,0], +[0,0],
1871                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1872                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1873                  or similar expression of unconditional true or
1874                  false, it should not be negated.  */
1875               && ((high && integer_zerop (high))
1876                   || (low && integer_onep (low))))
1877             {
1878               in_p = !in_p;
1879               exp = arg0;
1880               continue;
1881             }
1882           break;
1883         case SSA_NAME:
1884           exp = arg0;
1885           continue;
1886         CASE_CONVERT:
1887           if (is_bool)
1888             goto do_default;
1889           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1890             {
1891               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1892                 is_bool = true;
1893               else
1894                 return;
1895             }
1896           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1897             is_bool = true;
1898           goto do_default;
1899         case EQ_EXPR:
1900         case NE_EXPR:
1901         case LT_EXPR:
1902         case LE_EXPR:
1903         case GE_EXPR:
1904         case GT_EXPR:
1905           is_bool = true;
1906           /* FALLTHRU */
1907         default:
1908           if (!is_bool)
1909             return;
1910         do_default:
1911           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1912                                   &low, &high, &in_p,
1913                                   &strict_overflow_p);
1914           if (nexp != NULL_TREE)
1915             {
1916               exp = nexp;
1917               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1918               continue;
1919             }
1920           break;
1921         }
1922       break;
1923     }
1924   if (is_bool)
1925     {
1926       r->exp = exp;
1927       r->in_p = in_p;
1928       r->low = low;
1929       r->high = high;
1930       r->strict_overflow_p = strict_overflow_p;
1931     }
1932 }
1933
1934 /* Comparison function for qsort.  Sort entries
1935    without SSA_NAME exp first, then with SSA_NAMEs sorted
1936    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1937    by increasing ->low and if ->low is the same, by increasing
1938    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1939    maximum.  */
1940
1941 static int
1942 range_entry_cmp (const void *a, const void *b)
1943 {
1944   const struct range_entry *p = (const struct range_entry *) a;
1945   const struct range_entry *q = (const struct range_entry *) b;
1946
1947   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1948     {
1949       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1950         {
1951           /* Group range_entries for the same SSA_NAME together.  */
1952           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1953             return -1;
1954           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1955             return 1;
1956           /* If ->low is different, NULL low goes first, then by
1957              ascending low.  */
1958           if (p->low != NULL_TREE)
1959             {
1960               if (q->low != NULL_TREE)
1961                 {
1962                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1963                                           p->low, q->low);
1964                   if (tem && integer_onep (tem))
1965                     return -1;
1966                   tem = fold_binary (GT_EXPR, boolean_type_node,
1967                                      p->low, q->low);
1968                   if (tem && integer_onep (tem))
1969                     return 1;
1970                 }
1971               else
1972                 return 1;
1973             }
1974           else if (q->low != NULL_TREE)
1975             return -1;
1976           /* If ->high is different, NULL high goes last, before that by
1977              ascending high.  */
1978           if (p->high != NULL_TREE)
1979             {
1980               if (q->high != NULL_TREE)
1981                 {
1982                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1983                                           p->high, q->high);
1984                   if (tem && integer_onep (tem))
1985                     return -1;
1986                   tem = fold_binary (GT_EXPR, boolean_type_node,
1987                                      p->high, q->high);
1988                   if (tem && integer_onep (tem))
1989                     return 1;
1990                 }
1991               else
1992                 return -1;
1993             }
1994           else if (p->high != NULL_TREE)
1995             return 1;
1996           /* If both ranges are the same, sort below by ascending idx.  */
1997         }
1998       else
1999         return 1;
2000     }
2001   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2002     return -1;
2003
2004   if (p->idx < q->idx)
2005     return -1;
2006   else
2007     {
2008       gcc_checking_assert (p->idx > q->idx);
2009       return 1;
2010     }
2011 }
2012
2013 /* Helper routine of optimize_range_test.
2014    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2015    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2016    OPCODE and OPS are arguments of optimize_range_tests.  Return
2017    true if the range merge has been successful.
2018    If OPCODE is ERROR_MARK, this is called from within
2019    maybe_optimize_range_tests and is performing inter-bb range optimization.
2020    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2021    oe->rank.  */
2022
2023 static bool
2024 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2025                    unsigned int count, enum tree_code opcode,
2026                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2027                    tree low, tree high, bool strict_overflow_p)
2028 {
2029   operand_entry_t oe = (*ops)[range->idx];
2030   tree op = oe->op;
2031   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
2032   location_t loc = gimple_location (stmt);
2033   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2034   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2035   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2036   gimple_stmt_iterator gsi;
2037
2038   if (tem == NULL_TREE)
2039     return false;
2040
2041   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2042     warning_at (loc, OPT_Wstrict_overflow,
2043                 "assuming signed overflow does not occur "
2044                 "when simplifying range test");
2045
2046   if (dump_file && (dump_flags & TDF_DETAILS))
2047     {
2048       struct range_entry *r;
2049       fprintf (dump_file, "Optimizing range tests ");
2050       print_generic_expr (dump_file, range->exp, 0);
2051       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2052       print_generic_expr (dump_file, range->low, 0);
2053       fprintf (dump_file, ", ");
2054       print_generic_expr (dump_file, range->high, 0);
2055       fprintf (dump_file, "]");
2056       for (r = otherrange; r < otherrange + count; r++)
2057         {
2058           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2059           print_generic_expr (dump_file, r->low, 0);
2060           fprintf (dump_file, ", ");
2061           print_generic_expr (dump_file, r->high, 0);
2062           fprintf (dump_file, "]");
2063         }
2064       fprintf (dump_file, "\n into ");
2065       print_generic_expr (dump_file, tem, 0);
2066       fprintf (dump_file, "\n");
2067     }
2068
2069   if (opcode == BIT_IOR_EXPR
2070       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2071     tem = invert_truthvalue_loc (loc, tem);
2072
2073   tem = fold_convert_loc (loc, optype, tem);
2074   gsi = gsi_for_stmt (stmt);
2075   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2076                                   GSI_SAME_STMT);
2077   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2078     if (gimple_uid (gsi_stmt (gsi)))
2079       break;
2080     else
2081       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2082
2083   oe->op = tem;
2084   range->exp = exp;
2085   range->low = low;
2086   range->high = high;
2087   range->in_p = in_p;
2088   range->strict_overflow_p = false;
2089
2090   for (range = otherrange; range < otherrange + count; range++)
2091     {
2092       oe = (*ops)[range->idx];
2093       /* Now change all the other range test immediate uses, so that
2094          those tests will be optimized away.  */
2095       if (opcode == ERROR_MARK)
2096         {
2097           if (oe->op)
2098             oe->op = build_int_cst (TREE_TYPE (oe->op),
2099                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2100           else
2101             oe->op = (oe->rank == BIT_IOR_EXPR
2102                       ? boolean_false_node : boolean_true_node);
2103         }
2104       else
2105         oe->op = error_mark_node;
2106       range->exp = NULL_TREE;
2107     }
2108   return true;
2109 }
2110
2111 /* Optimize X == CST1 || X == CST2
2112    if popcount (CST1 ^ CST2) == 1 into
2113    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2114    Similarly for ranges.  E.g.
2115    X != 2 && X != 3 && X != 10 && X != 11
2116    will be transformed by the previous optimization into
2117    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2118    and this loop can transform that into
2119    !(((X & ~8) - 2U) <= 1U).  */
2120
2121 static bool
2122 optimize_range_tests_xor (enum tree_code opcode, tree type,
2123                           tree lowi, tree lowj, tree highi, tree highj,
2124                           vec<operand_entry_t> *ops,
2125                           struct range_entry *rangei,
2126                           struct range_entry *rangej)
2127 {
2128   tree lowxor, highxor, tem, exp;
2129   /* Check highi ^ lowi == highj ^ lowj and
2130      popcount (highi ^ lowi) == 1.  */
2131   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2132   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2133     return false;
2134   if (tree_log2 (lowxor) < 0)
2135     return false;
2136   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2137   if (!tree_int_cst_equal (lowxor, highxor))
2138     return false;
2139
2140   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2141   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2142   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2143   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2144   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2145                          rangei->in_p, lowj, highj,
2146                          rangei->strict_overflow_p
2147                          || rangej->strict_overflow_p))
2148     return true;
2149   return false;
2150 }
2151
2152 /* Optimize X == CST1 || X == CST2
2153    if popcount (CST2 - CST1) == 1 into
2154    ((X - CST1) & ~(CST2 - CST1)) == 0.
2155    Similarly for ranges.  E.g.
2156    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2157    || X == 75 || X == 45
2158    will be transformed by the previous optimization into
2159    (X - 43U) <= 3U || (X - 75U) <= 3U
2160    and this loop can transform that into
2161    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2162 static bool
2163 optimize_range_tests_diff (enum tree_code opcode, tree type,
2164                             tree lowi, tree lowj, tree highi, tree highj,
2165                             vec<operand_entry_t> *ops,
2166                             struct range_entry *rangei,
2167                             struct range_entry *rangej)
2168 {
2169   tree tem1, tem2, mask;
2170   /* Check highi - lowi == highj - lowj.  */
2171   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2172   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2173     return false;
2174   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2175   if (!tree_int_cst_equal (tem1, tem2))
2176     return false;
2177   /* Check popcount (lowj - lowi) == 1.  */
2178   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2179   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2180     return false;
2181   if (tree_log2 (tem1) < 0)
2182     return false;
2183
2184   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2185   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2186   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2187   lowj = build_int_cst (type, 0);
2188   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2189                          rangei->in_p, lowj, tem2,
2190                          rangei->strict_overflow_p
2191                          || rangej->strict_overflow_p))
2192     return true;
2193   return false;
2194 }
2195
2196 /* It does some common checks for function optimize_range_tests_xor and
2197    optimize_range_tests_diff.
2198    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2199    Else it calls optimize_range_tests_diff.  */
2200
2201 static bool
2202 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2203                         bool optimize_xor, vec<operand_entry_t> *ops,
2204                         struct range_entry *ranges)
2205 {
2206   int i, j;
2207   bool any_changes = false;
2208   for (i = first; i < length; i++)
2209     {
2210       tree lowi, highi, lowj, highj, type, tem;
2211
2212       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2213         continue;
2214       type = TREE_TYPE (ranges[i].exp);
2215       if (!INTEGRAL_TYPE_P (type))
2216         continue;
2217       lowi = ranges[i].low;
2218       if (lowi == NULL_TREE)
2219         lowi = TYPE_MIN_VALUE (type);
2220       highi = ranges[i].high;
2221       if (highi == NULL_TREE)
2222         continue;
2223       for (j = i + 1; j < length && j < i + 64; j++)
2224         {
2225           bool changes;
2226           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2227             continue;
2228           lowj = ranges[j].low;
2229           if (lowj == NULL_TREE)
2230             continue;
2231           highj = ranges[j].high;
2232           if (highj == NULL_TREE)
2233             highj = TYPE_MAX_VALUE (type);
2234           /* Check lowj > highi.  */
2235           tem = fold_binary (GT_EXPR, boolean_type_node,
2236                              lowj, highi);
2237           if (tem == NULL_TREE || !integer_onep (tem))
2238             continue;
2239           if (optimize_xor)
2240             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2241                                                 highi, highj, ops,
2242                                                 ranges + i, ranges + j);
2243           else
2244             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2245                                                  highi, highj, ops,
2246                                                  ranges + i, ranges + j);
2247           if (changes)
2248             {
2249               any_changes = true;
2250               break;
2251             }
2252         }
2253     }
2254   return any_changes;
2255 }
2256
2257 /* Optimize range tests, similarly how fold_range_test optimizes
2258    it on trees.  The tree code for the binary
2259    operation between all the operands is OPCODE.
2260    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2261    maybe_optimize_range_tests for inter-bb range optimization.
2262    In that case if oe->op is NULL, oe->id is bb->index whose
2263    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2264    the actual opcode.  */
2265
2266 static bool
2267 optimize_range_tests (enum tree_code opcode,
2268                       vec<operand_entry_t> *ops)
2269 {
2270   unsigned int length = ops->length (), i, j, first;
2271   operand_entry_t oe;
2272   struct range_entry *ranges;
2273   bool any_changes = false;
2274
2275   if (length == 1)
2276     return false;
2277
2278   ranges = XNEWVEC (struct range_entry, length);
2279   for (i = 0; i < length; i++)
2280     {
2281       oe = (*ops)[i];
2282       ranges[i].idx = i;
2283       init_range_entry (ranges + i, oe->op,
2284                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2285       /* For | invert it now, we will invert it again before emitting
2286          the optimized expression.  */
2287       if (opcode == BIT_IOR_EXPR
2288           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2289         ranges[i].in_p = !ranges[i].in_p;
2290     }
2291
2292   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2293   for (i = 0; i < length; i++)
2294     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2295       break;
2296
2297   /* Try to merge ranges.  */
2298   for (first = i; i < length; i++)
2299     {
2300       tree low = ranges[i].low;
2301       tree high = ranges[i].high;
2302       int in_p = ranges[i].in_p;
2303       bool strict_overflow_p = ranges[i].strict_overflow_p;
2304       int update_fail_count = 0;
2305
2306       for (j = i + 1; j < length; j++)
2307         {
2308           if (ranges[i].exp != ranges[j].exp)
2309             break;
2310           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2311                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2312             break;
2313           strict_overflow_p |= ranges[j].strict_overflow_p;
2314         }
2315
2316       if (j == i + 1)
2317         continue;
2318
2319       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2320                              ops, ranges[i].exp, in_p, low, high,
2321                              strict_overflow_p))
2322         {
2323           i = j - 1;
2324           any_changes = true;
2325         }
2326       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2327          while update_range_test would fail.  */
2328       else if (update_fail_count == 64)
2329         i = j - 1;
2330       else
2331         ++update_fail_count;
2332     }
2333
2334   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2335                                          ops, ranges);
2336
2337   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2338     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2339                                            ops, ranges);
2340
2341   if (any_changes && opcode != ERROR_MARK)
2342     {
2343       j = 0;
2344       FOR_EACH_VEC_ELT (*ops, i, oe)
2345         {
2346           if (oe->op == error_mark_node)
2347             continue;
2348           else if (i != j)
2349             (*ops)[j] = oe;
2350           j++;
2351         }
2352       ops->truncate (j);
2353     }
2354
2355   XDELETEVEC (ranges);
2356   return any_changes;
2357 }
2358
2359 /* Return true if STMT is a cast like:
2360    <bb N>:
2361    ...
2362    _123 = (int) _234;
2363
2364    <bb M>:
2365    # _345 = PHI <_123(N), 1(...), 1(...)>
2366    where _234 has bool type, _123 has single use and
2367    bb N has a single successor M.  This is commonly used in
2368    the last block of a range test.  */
2369
2370 static bool
2371 final_range_test_p (gimple stmt)
2372 {
2373   basic_block bb, rhs_bb;
2374   edge e;
2375   tree lhs, rhs;
2376   use_operand_p use_p;
2377   gimple use_stmt;
2378
2379   if (!gimple_assign_cast_p (stmt))
2380     return false;
2381   bb = gimple_bb (stmt);
2382   if (!single_succ_p (bb))
2383     return false;
2384   e = single_succ_edge (bb);
2385   if (e->flags & EDGE_COMPLEX)
2386     return false;
2387
2388   lhs = gimple_assign_lhs (stmt);
2389   rhs = gimple_assign_rhs1 (stmt);
2390   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2391       || TREE_CODE (rhs) != SSA_NAME
2392       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2393     return false;
2394
2395   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2396   if (!single_imm_use (lhs, &use_p, &use_stmt))
2397     return false;
2398
2399   if (gimple_code (use_stmt) != GIMPLE_PHI
2400       || gimple_bb (use_stmt) != e->dest)
2401     return false;
2402
2403   /* And that the rhs is defined in the same loop.  */
2404   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2405   if (rhs_bb == NULL
2406       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2407     return false;
2408
2409   return true;
2410 }
2411
2412 /* Return true if BB is suitable basic block for inter-bb range test
2413    optimization.  If BACKWARD is true, BB should be the only predecessor
2414    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2415    or compared with to find a common basic block to which all conditions
2416    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2417    be the only predecessor of BB.  */
2418
2419 static bool
2420 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2421                   bool backward)
2422 {
2423   edge_iterator ei, ei2;
2424   edge e, e2;
2425   gimple stmt;
2426   gimple_stmt_iterator gsi;
2427   bool other_edge_seen = false;
2428   bool is_cond;
2429
2430   if (test_bb == bb)
2431     return false;
2432   /* Check last stmt first.  */
2433   stmt = last_stmt (bb);
2434   if (stmt == NULL
2435       || (gimple_code (stmt) != GIMPLE_COND
2436           && (backward || !final_range_test_p (stmt)))
2437       || gimple_visited_p (stmt)
2438       || stmt_could_throw_p (stmt)
2439       || *other_bb == bb)
2440     return false;
2441   is_cond = gimple_code (stmt) == GIMPLE_COND;
2442   if (is_cond)
2443     {
2444       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2445          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2446          to *OTHER_BB (if not set yet, try to find it out).  */
2447       if (EDGE_COUNT (bb->succs) != 2)
2448         return false;
2449       FOR_EACH_EDGE (e, ei, bb->succs)
2450         {
2451           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2452             return false;
2453           if (e->dest == test_bb)
2454             {
2455               if (backward)
2456                 continue;
2457               else
2458                 return false;
2459             }
2460           if (e->dest == bb)
2461             return false;
2462           if (*other_bb == NULL)
2463             {
2464               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2465                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2466                   return false;
2467                 else if (e->dest == e2->dest)
2468                   *other_bb = e->dest;
2469               if (*other_bb == NULL)
2470                 return false;
2471             }
2472           if (e->dest == *other_bb)
2473             other_edge_seen = true;
2474           else if (backward)
2475             return false;
2476         }
2477       if (*other_bb == NULL || !other_edge_seen)
2478         return false;
2479     }
2480   else if (single_succ (bb) != *other_bb)
2481     return false;
2482
2483   /* Now check all PHIs of *OTHER_BB.  */
2484   e = find_edge (bb, *other_bb);
2485   e2 = find_edge (test_bb, *other_bb);
2486   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2487     {
2488       gimple phi = gsi_stmt (gsi);
2489       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2490          corresponding to BB and TEST_BB predecessor must be the same.  */
2491       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2492                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2493         {
2494           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2495              one of the PHIs should have the lhs of the last stmt in
2496              that block as PHI arg and that PHI should have 0 or 1
2497              corresponding to it in all other range test basic blocks
2498              considered.  */
2499           if (!is_cond)
2500             {
2501               if (gimple_phi_arg_def (phi, e->dest_idx)
2502                   == gimple_assign_lhs (stmt)
2503                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2504                       || integer_onep (gimple_phi_arg_def (phi,
2505                                                            e2->dest_idx))))
2506                 continue;
2507             }
2508           else
2509             {
2510               gimple test_last = last_stmt (test_bb);
2511               if (gimple_code (test_last) != GIMPLE_COND
2512                   && gimple_phi_arg_def (phi, e2->dest_idx)
2513                      == gimple_assign_lhs (test_last)
2514                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2515                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2516                 continue;
2517             }
2518
2519           return false;
2520         }
2521     }
2522   return true;
2523 }
2524
2525 /* Return true if BB doesn't have side-effects that would disallow
2526    range test optimization, all SSA_NAMEs set in the bb are consumed
2527    in the bb and there are no PHIs.  */
2528
2529 static bool
2530 no_side_effect_bb (basic_block bb)
2531 {
2532   gimple_stmt_iterator gsi;
2533   gimple last;
2534
2535   if (!gimple_seq_empty_p (phi_nodes (bb)))
2536     return false;
2537   last = last_stmt (bb);
2538   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2539     {
2540       gimple stmt = gsi_stmt (gsi);
2541       tree lhs;
2542       imm_use_iterator imm_iter;
2543       use_operand_p use_p;
2544
2545       if (is_gimple_debug (stmt))
2546         continue;
2547       if (gimple_has_side_effects (stmt))
2548         return false;
2549       if (stmt == last)
2550         return true;
2551       if (!is_gimple_assign (stmt))
2552         return false;
2553       lhs = gimple_assign_lhs (stmt);
2554       if (TREE_CODE (lhs) != SSA_NAME)
2555         return false;
2556       if (gimple_assign_rhs_could_trap_p (stmt))
2557         return false;
2558       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2559         {
2560           gimple use_stmt = USE_STMT (use_p);
2561           if (is_gimple_debug (use_stmt))
2562             continue;
2563           if (gimple_bb (use_stmt) != bb)
2564             return false;
2565         }
2566     }
2567   return false;
2568 }
2569
2570 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2571    return true and fill in *OPS recursively.  */
2572
2573 static bool
2574 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2575          struct loop *loop)
2576 {
2577   gimple stmt = SSA_NAME_DEF_STMT (var);
2578   tree rhs[2];
2579   int i;
2580
2581   if (!is_reassociable_op (stmt, code, loop))
2582     return false;
2583
2584   rhs[0] = gimple_assign_rhs1 (stmt);
2585   rhs[1] = gimple_assign_rhs2 (stmt);
2586   gimple_set_visited (stmt, true);
2587   for (i = 0; i < 2; i++)
2588     if (TREE_CODE (rhs[i]) == SSA_NAME
2589         && !get_ops (rhs[i], code, ops, loop)
2590         && has_single_use (rhs[i]))
2591       {
2592         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2593
2594         oe->op = rhs[i];
2595         oe->rank = code;
2596         oe->id = 0;
2597         oe->count = 1;
2598         ops->safe_push (oe);
2599       }
2600   return true;
2601 }
2602
2603 /* Find the ops that were added by get_ops starting from VAR, see if
2604    they were changed during update_range_test and if yes, create new
2605    stmts.  */
2606
2607 static tree
2608 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2609             unsigned int *pidx, struct loop *loop)
2610 {
2611   gimple stmt = SSA_NAME_DEF_STMT (var);
2612   tree rhs[4];
2613   int i;
2614
2615   if (!is_reassociable_op (stmt, code, loop))
2616     return NULL;
2617
2618   rhs[0] = gimple_assign_rhs1 (stmt);
2619   rhs[1] = gimple_assign_rhs2 (stmt);
2620   rhs[2] = rhs[0];
2621   rhs[3] = rhs[1];
2622   for (i = 0; i < 2; i++)
2623     if (TREE_CODE (rhs[i]) == SSA_NAME)
2624       {
2625         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2626         if (rhs[2 + i] == NULL_TREE)
2627           {
2628             if (has_single_use (rhs[i]))
2629               rhs[2 + i] = ops[(*pidx)++]->op;
2630             else
2631               rhs[2 + i] = rhs[i];
2632           }
2633       }
2634   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2635       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2636     {
2637       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2638       var = make_ssa_name (TREE_TYPE (var), NULL);
2639       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2640                                                var, rhs[2], rhs[3]);
2641       gimple_set_uid (g, gimple_uid (stmt));
2642       gimple_set_visited (g, true);
2643       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2644     }
2645   return var;
2646 }
2647
2648 /* Structure to track the initial value passed to get_ops and
2649    the range in the ops vector for each basic block.  */
2650
2651 struct inter_bb_range_test_entry
2652 {
2653   tree op;
2654   unsigned int first_idx, last_idx;
2655 };
2656
2657 /* Inter-bb range test optimization.  */
2658
2659 static void
2660 maybe_optimize_range_tests (gimple stmt)
2661 {
2662   basic_block first_bb = gimple_bb (stmt);
2663   basic_block last_bb = first_bb;
2664   basic_block other_bb = NULL;
2665   basic_block bb;
2666   edge_iterator ei;
2667   edge e;
2668   auto_vec<operand_entry_t> ops;
2669   auto_vec<inter_bb_range_test_entry> bbinfo;
2670   bool any_changes = false;
2671
2672   /* Consider only basic blocks that end with GIMPLE_COND or
2673      a cast statement satisfying final_range_test_p.  All
2674      but the last bb in the first_bb .. last_bb range
2675      should end with GIMPLE_COND.  */
2676   if (gimple_code (stmt) == GIMPLE_COND)
2677     {
2678       if (EDGE_COUNT (first_bb->succs) != 2)
2679         return;
2680     }
2681   else if (final_range_test_p (stmt))
2682     other_bb = single_succ (first_bb);
2683   else
2684     return;
2685
2686   if (stmt_could_throw_p (stmt))
2687     return;
2688
2689   /* As relative ordering of post-dominator sons isn't fixed,
2690      maybe_optimize_range_tests can be called first on any
2691      bb in the range we want to optimize.  So, start searching
2692      backwards, if first_bb can be set to a predecessor.  */
2693   while (single_pred_p (first_bb))
2694     {
2695       basic_block pred_bb = single_pred (first_bb);
2696       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2697         break;
2698       if (!no_side_effect_bb (first_bb))
2699         break;
2700       first_bb = pred_bb;
2701     }
2702   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2703      Before starting forward search in last_bb successors, find
2704      out the other_bb.  */
2705   if (first_bb == last_bb)
2706     {
2707       other_bb = NULL;
2708       /* As non-GIMPLE_COND last stmt always terminates the range,
2709          if forward search didn't discover anything, just give up.  */
2710       if (gimple_code (stmt) != GIMPLE_COND)
2711         return;
2712       /* Look at both successors.  Either it ends with a GIMPLE_COND
2713          and satisfies suitable_cond_bb, or ends with a cast and
2714          other_bb is that cast's successor.  */
2715       FOR_EACH_EDGE (e, ei, first_bb->succs)
2716         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2717             || e->dest == first_bb)
2718           return;
2719         else if (single_pred_p (e->dest))
2720           {
2721             stmt = last_stmt (e->dest);
2722             if (stmt
2723                 && gimple_code (stmt) == GIMPLE_COND
2724                 && EDGE_COUNT (e->dest->succs) == 2)
2725               {
2726                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2727                   break;
2728                 else
2729                   other_bb = NULL;
2730               }
2731             else if (stmt
2732                      && final_range_test_p (stmt)
2733                      && find_edge (first_bb, single_succ (e->dest)))
2734               {
2735                 other_bb = single_succ (e->dest);
2736                 if (other_bb == first_bb)
2737                   other_bb = NULL;
2738               }
2739           }
2740       if (other_bb == NULL)
2741         return;
2742     }
2743   /* Now do the forward search, moving last_bb to successor bbs
2744      that aren't other_bb.  */
2745   while (EDGE_COUNT (last_bb->succs) == 2)
2746     {
2747       FOR_EACH_EDGE (e, ei, last_bb->succs)
2748         if (e->dest != other_bb)
2749           break;
2750       if (e == NULL)
2751         break;
2752       if (!single_pred_p (e->dest))
2753         break;
2754       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2755         break;
2756       if (!no_side_effect_bb (e->dest))
2757         break;
2758       last_bb = e->dest;
2759     }
2760   if (first_bb == last_bb)
2761     return;
2762   /* Here basic blocks first_bb through last_bb's predecessor
2763      end with GIMPLE_COND, all of them have one of the edges to
2764      other_bb and another to another block in the range,
2765      all blocks except first_bb don't have side-effects and
2766      last_bb ends with either GIMPLE_COND, or cast satisfying
2767      final_range_test_p.  */
2768   for (bb = last_bb; ; bb = single_pred (bb))
2769     {
2770       enum tree_code code;
2771       tree lhs, rhs;
2772       inter_bb_range_test_entry bb_ent;
2773
2774       bb_ent.op = NULL_TREE;
2775       bb_ent.first_idx = ops.length ();
2776       bb_ent.last_idx = bb_ent.first_idx;
2777       e = find_edge (bb, other_bb);
2778       stmt = last_stmt (bb);
2779       gimple_set_visited (stmt, true);
2780       if (gimple_code (stmt) != GIMPLE_COND)
2781         {
2782           use_operand_p use_p;
2783           gimple phi;
2784           edge e2;
2785           unsigned int d;
2786
2787           lhs = gimple_assign_lhs (stmt);
2788           rhs = gimple_assign_rhs1 (stmt);
2789           gcc_assert (bb == last_bb);
2790
2791           /* stmt is
2792              _123 = (int) _234;
2793
2794              followed by:
2795              <bb M>:
2796              # _345 = PHI <_123(N), 1(...), 1(...)>
2797
2798              or 0 instead of 1.  If it is 0, the _234
2799              range test is anded together with all the
2800              other range tests, if it is 1, it is ored with
2801              them.  */
2802           single_imm_use (lhs, &use_p, &phi);
2803           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2804           e2 = find_edge (first_bb, other_bb);
2805           d = e2->dest_idx;
2806           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2807           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2808             code = BIT_AND_EXPR;
2809           else
2810             {
2811               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2812               code = BIT_IOR_EXPR;
2813             }
2814
2815           /* If _234 SSA_NAME_DEF_STMT is
2816              _234 = _567 | _789;
2817              (or &, corresponding to 1/0 in the phi arguments,
2818              push into ops the individual range test arguments
2819              of the bitwise or resp. and, recursively.  */
2820           if (!get_ops (rhs, code, &ops,
2821                         loop_containing_stmt (stmt))
2822               && has_single_use (rhs))
2823             {
2824               /* Otherwise, push the _234 range test itself.  */
2825               operand_entry_t oe
2826                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2827
2828               oe->op = rhs;
2829               oe->rank = code;
2830               oe->id = 0;
2831               oe->count = 1;
2832               ops.safe_push (oe);
2833               bb_ent.last_idx++;
2834             }
2835           else
2836             bb_ent.last_idx = ops.length ();
2837           bb_ent.op = rhs;
2838           bbinfo.safe_push (bb_ent);
2839           continue;
2840         }
2841       /* Otherwise stmt is GIMPLE_COND.  */
2842       code = gimple_cond_code (stmt);
2843       lhs = gimple_cond_lhs (stmt);
2844       rhs = gimple_cond_rhs (stmt);
2845       if (TREE_CODE (lhs) == SSA_NAME
2846           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2847           && ((code != EQ_EXPR && code != NE_EXPR)
2848               || rhs != boolean_false_node
2849                  /* Either push into ops the individual bitwise
2850                     or resp. and operands, depending on which
2851                     edge is other_bb.  */
2852               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2853                                  ^ (code == EQ_EXPR))
2854                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2855                            loop_containing_stmt (stmt))))
2856         {
2857           /* Or push the GIMPLE_COND stmt itself.  */
2858           operand_entry_t oe
2859             = (operand_entry_t) pool_alloc (operand_entry_pool);
2860
2861           oe->op = NULL;
2862           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2863                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2864           /* oe->op = NULL signs that there is no SSA_NAME
2865              for the range test, and oe->id instead is the
2866              basic block number, at which's end the GIMPLE_COND
2867              is.  */
2868           oe->id = bb->index;
2869           oe->count = 1;
2870           ops.safe_push (oe);
2871           bb_ent.op = NULL;
2872           bb_ent.last_idx++;
2873         }
2874       else if (ops.length () > bb_ent.first_idx)
2875         {
2876           bb_ent.op = lhs;
2877           bb_ent.last_idx = ops.length ();
2878         }
2879       bbinfo.safe_push (bb_ent);
2880       if (bb == first_bb)
2881         break;
2882     }
2883   if (ops.length () > 1)
2884     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2885   if (any_changes)
2886     {
2887       unsigned int idx;
2888       /* update_ops relies on has_single_use predicates returning the
2889          same values as it did during get_ops earlier.  Additionally it
2890          never removes statements, only adds new ones and it should walk
2891          from the single imm use and check the predicate already before
2892          making those changes.
2893          On the other side, the handling of GIMPLE_COND directly can turn
2894          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2895          it needs to be done in a separate loop afterwards.  */
2896       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2897         {
2898           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2899               && bbinfo[idx].op != NULL_TREE)
2900             {
2901               tree new_op;
2902
2903               stmt = last_stmt (bb);
2904               new_op = update_ops (bbinfo[idx].op,
2905                                    (enum tree_code)
2906                                    ops[bbinfo[idx].first_idx]->rank,
2907                                    ops, &bbinfo[idx].first_idx,
2908                                    loop_containing_stmt (stmt));
2909               if (new_op == NULL_TREE)
2910                 {
2911                   gcc_assert (bb == last_bb);
2912                   new_op = ops[bbinfo[idx].first_idx++]->op;
2913                 }
2914               if (bbinfo[idx].op != new_op)
2915                 {
2916                   imm_use_iterator iter;
2917                   use_operand_p use_p;
2918                   gimple use_stmt, cast_stmt = NULL;
2919
2920                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2921                     if (is_gimple_debug (use_stmt))
2922                       continue;
2923                     else if (gimple_code (use_stmt) == GIMPLE_COND
2924                              || gimple_code (use_stmt) == GIMPLE_PHI)
2925                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2926                         SET_USE (use_p, new_op);
2927                     else if (gimple_assign_cast_p (use_stmt))
2928                       cast_stmt = use_stmt;
2929                     else
2930                       gcc_unreachable ();
2931                   if (cast_stmt)
2932                     {
2933                       gcc_assert (bb == last_bb);
2934                       tree lhs = gimple_assign_lhs (cast_stmt);
2935                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2936                       enum tree_code rhs_code
2937                         = gimple_assign_rhs_code (cast_stmt);
2938                       gimple g;
2939                       if (is_gimple_min_invariant (new_op))
2940                         {
2941                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
2942                           g = gimple_build_assign (new_lhs, new_op);
2943                         }
2944                       else
2945                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
2946                                                           new_op, NULL_TREE);
2947                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2948                       gimple_set_uid (g, gimple_uid (cast_stmt));
2949                       gimple_set_visited (g, true);
2950                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2951                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2952                         if (is_gimple_debug (use_stmt))
2953                           continue;
2954                         else if (gimple_code (use_stmt) == GIMPLE_COND
2955                                  || gimple_code (use_stmt) == GIMPLE_PHI)
2956                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2957                             SET_USE (use_p, new_lhs);
2958                         else
2959                           gcc_unreachable ();
2960                     }
2961                 }
2962             }
2963           if (bb == first_bb)
2964             break;
2965         }
2966       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2967         {
2968           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2969               && bbinfo[idx].op == NULL_TREE
2970               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2971             {
2972               stmt = last_stmt (bb);
2973               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2974                 gimple_cond_make_false (stmt);
2975               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2976                 gimple_cond_make_true (stmt);
2977               else
2978                 {
2979                   gimple_cond_set_code (stmt, NE_EXPR);
2980                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2981                   gimple_cond_set_rhs (stmt, boolean_false_node);
2982                 }
2983               update_stmt (stmt);
2984             }
2985           if (bb == first_bb)
2986             break;
2987         }
2988     }
2989 }
2990
2991 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2992    of STMT in it's operands.  This is also known as a "destructive
2993    update" operation.  */
2994
2995 static bool
2996 is_phi_for_stmt (gimple stmt, tree operand)
2997 {
2998   gimple def_stmt;
2999   tree lhs;
3000   use_operand_p arg_p;
3001   ssa_op_iter i;
3002
3003   if (TREE_CODE (operand) != SSA_NAME)
3004     return false;
3005
3006   lhs = gimple_assign_lhs (stmt);
3007
3008   def_stmt = SSA_NAME_DEF_STMT (operand);
3009   if (gimple_code (def_stmt) != GIMPLE_PHI)
3010     return false;
3011
3012   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3013     if (lhs == USE_FROM_PTR (arg_p))
3014       return true;
3015   return false;
3016 }
3017
3018 /* Remove def stmt of VAR if VAR has zero uses and recurse
3019    on rhs1 operand if so.  */
3020
3021 static void
3022 remove_visited_stmt_chain (tree var)
3023 {
3024   gimple stmt;
3025   gimple_stmt_iterator gsi;
3026
3027   while (1)
3028     {
3029       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3030         return;
3031       stmt = SSA_NAME_DEF_STMT (var);
3032       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3033         {
3034           var = gimple_assign_rhs1 (stmt);
3035           gsi = gsi_for_stmt (stmt);
3036           gsi_remove (&gsi, true);
3037           release_defs (stmt);
3038         }
3039       else
3040         return;
3041     }
3042 }
3043
3044 /* This function checks three consequtive operands in
3045    passed operands vector OPS starting from OPINDEX and
3046    swaps two operands if it is profitable for binary operation
3047    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3048
3049    We pair ops with the same rank if possible.
3050
3051    The alternative we try is to see if STMT is a destructive
3052    update style statement, which is like:
3053    b = phi (a, ...)
3054    a = c + b;
3055    In that case, we want to use the destructive update form to
3056    expose the possible vectorizer sum reduction opportunity.
3057    In that case, the third operand will be the phi node. This
3058    check is not performed if STMT is null.
3059
3060    We could, of course, try to be better as noted above, and do a
3061    lot of work to try to find these opportunities in >3 operand
3062    cases, but it is unlikely to be worth it.  */
3063
3064 static void
3065 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3066                           unsigned int opindex, gimple stmt)
3067 {
3068   operand_entry_t oe1, oe2, oe3;
3069
3070   oe1 = ops[opindex];
3071   oe2 = ops[opindex + 1];
3072   oe3 = ops[opindex + 2];
3073
3074   if ((oe1->rank == oe2->rank
3075        && oe2->rank != oe3->rank)
3076       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3077           && !is_phi_for_stmt (stmt, oe1->op)
3078           && !is_phi_for_stmt (stmt, oe2->op)))
3079     {
3080       struct operand_entry temp = *oe3;
3081       oe3->op = oe1->op;
3082       oe3->rank = oe1->rank;
3083       oe1->op = temp.op;
3084       oe1->rank= temp.rank;
3085     }
3086   else if ((oe1->rank == oe3->rank
3087             && oe2->rank != oe3->rank)
3088            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3089                && !is_phi_for_stmt (stmt, oe1->op)
3090                && !is_phi_for_stmt (stmt, oe3->op)))
3091     {
3092       struct operand_entry temp = *oe2;
3093       oe2->op = oe1->op;
3094       oe2->rank = oe1->rank;
3095       oe1->op = temp.op;
3096       oe1->rank = temp.rank;
3097     }
3098 }
3099
3100 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3101    two definitions, otherwise return STMT.  */
3102
3103 static inline gimple
3104 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3105 {
3106   if (TREE_CODE (rhs1) == SSA_NAME
3107       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3108     stmt = SSA_NAME_DEF_STMT (rhs1);
3109   if (TREE_CODE (rhs2) == SSA_NAME
3110       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3111     stmt = SSA_NAME_DEF_STMT (rhs2);
3112   return stmt;
3113 }
3114
3115 /* Recursively rewrite our linearized statements so that the operators
3116    match those in OPS[OPINDEX], putting the computation in rank
3117    order.  Return new lhs.  */
3118
3119 static tree
3120 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3121                    vec<operand_entry_t> ops, bool changed)
3122 {
3123   tree rhs1 = gimple_assign_rhs1 (stmt);
3124   tree rhs2 = gimple_assign_rhs2 (stmt);
3125   tree lhs = gimple_assign_lhs (stmt);
3126   operand_entry_t oe;
3127
3128   /* The final recursion case for this function is that you have
3129      exactly two operations left.
3130      If we had one exactly one op in the entire list to start with, we
3131      would have never called this function, and the tail recursion
3132      rewrites them one at a time.  */
3133   if (opindex + 2 == ops.length ())
3134     {
3135       operand_entry_t oe1, oe2;
3136
3137       oe1 = ops[opindex];
3138       oe2 = ops[opindex + 1];
3139
3140       if (rhs1 != oe1->op || rhs2 != oe2->op)
3141         {
3142           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3143           unsigned int uid = gimple_uid (stmt);
3144
3145           if (dump_file && (dump_flags & TDF_DETAILS))
3146             {
3147               fprintf (dump_file, "Transforming ");
3148               print_gimple_stmt (dump_file, stmt, 0, 0);
3149             }
3150
3151           if (changed)
3152             {
3153               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3154               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3155               stmt
3156                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3157                                                 lhs, oe1->op, oe2->op);
3158               gimple_set_uid (stmt, uid);
3159               gimple_set_visited (stmt, true);
3160               if (insert_point == gsi_stmt (gsi))
3161                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3162               else
3163                 insert_stmt_after (stmt, insert_point);
3164             }
3165           else
3166             {
3167               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3168                                    == stmt);
3169               gimple_assign_set_rhs1 (stmt, oe1->op);
3170               gimple_assign_set_rhs2 (stmt, oe2->op);
3171               update_stmt (stmt);
3172             }
3173
3174           if (rhs1 != oe1->op && rhs1 != oe2->op)
3175             remove_visited_stmt_chain (rhs1);
3176
3177           if (dump_file && (dump_flags & TDF_DETAILS))
3178             {
3179               fprintf (dump_file, " into ");
3180               print_gimple_stmt (dump_file, stmt, 0, 0);
3181             }
3182         }
3183       return lhs;
3184     }
3185
3186   /* If we hit here, we should have 3 or more ops left.  */
3187   gcc_assert (opindex + 2 < ops.length ());
3188
3189   /* Rewrite the next operator.  */
3190   oe = ops[opindex];
3191
3192   /* Recurse on the LHS of the binary operator, which is guaranteed to
3193      be the non-leaf side.  */
3194   tree new_rhs1
3195     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3196                          changed || oe->op != rhs2);
3197
3198   if (oe->op != rhs2 || new_rhs1 != rhs1)
3199     {
3200       if (dump_file && (dump_flags & TDF_DETAILS))
3201         {
3202           fprintf (dump_file, "Transforming ");
3203           print_gimple_stmt (dump_file, stmt, 0, 0);
3204         }
3205
3206       /* If changed is false, this is either opindex == 0
3207          or all outer rhs2's were equal to corresponding oe->op,
3208          and powi_result is NULL.
3209          That means lhs is equivalent before and after reassociation.
3210          Otherwise ensure the old lhs SSA_NAME is not reused and
3211          create a new stmt as well, so that any debug stmts will be
3212          properly adjusted.  */
3213       if (changed)
3214         {
3215           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3216           unsigned int uid = gimple_uid (stmt);
3217           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3218
3219           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3220           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3221                                                lhs, new_rhs1, oe->op);
3222           gimple_set_uid (stmt, uid);
3223           gimple_set_visited (stmt, true);
3224           if (insert_point == gsi_stmt (gsi))
3225             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3226           else
3227             insert_stmt_after (stmt, insert_point);
3228         }
3229       else
3230         {
3231           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3232                                == stmt);
3233           gimple_assign_set_rhs1 (stmt, new_rhs1);
3234           gimple_assign_set_rhs2 (stmt, oe->op);
3235           update_stmt (stmt);
3236         }
3237
3238       if (dump_file && (dump_flags & TDF_DETAILS))
3239         {
3240           fprintf (dump_file, " into ");
3241           print_gimple_stmt (dump_file, stmt, 0, 0);
3242         }
3243     }
3244   return lhs;
3245 }
3246
3247 /* Find out how many cycles we need to compute statements chain.
3248    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3249    maximum number of independent statements we may execute per cycle.  */
3250
3251 static int
3252 get_required_cycles (int ops_num, int cpu_width)
3253 {
3254   int res;
3255   int elog;
3256   unsigned int rest;
3257
3258   /* While we have more than 2 * cpu_width operands
3259      we may reduce number of operands by cpu_width
3260      per cycle.  */
3261   res = ops_num / (2 * cpu_width);
3262
3263   /* Remained operands count may be reduced twice per cycle
3264      until we have only one operand.  */
3265   rest = (unsigned)(ops_num - res * cpu_width);
3266   elog = exact_log2 (rest);
3267   if (elog >= 0)
3268     res += elog;
3269   else
3270     res += floor_log2 (rest) + 1;
3271
3272   return res;
3273 }
3274
3275 /* Returns an optimal number of registers to use for computation of
3276    given statements.  */
3277
3278 static int
3279 get_reassociation_width (int ops_num, enum tree_code opc,
3280                          enum machine_mode mode)
3281 {
3282   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3283   int width;
3284   int width_min;
3285   int cycles_best;
3286
3287   if (param_width > 0)
3288     width = param_width;
3289   else
3290     width = targetm.sched.reassociation_width (opc, mode);
3291
3292   if (width == 1)
3293     return width;
3294
3295   /* Get the minimal time required for sequence computation.  */
3296   cycles_best = get_required_cycles (ops_num, width);
3297
3298   /* Check if we may use less width and still compute sequence for
3299      the same time.  It will allow us to reduce registers usage.
3300      get_required_cycles is monotonically increasing with lower width
3301      so we can perform a binary search for the minimal width that still
3302      results in the optimal cycle count.  */
3303   width_min = 1;
3304   while (width > width_min)
3305     {
3306       int width_mid = (width + width_min) / 2;
3307
3308       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3309         width = width_mid;
3310       else if (width_min < width_mid)
3311         width_min = width_mid;
3312       else
3313         break;
3314     }
3315
3316   return width;
3317 }
3318
3319 /* Recursively rewrite our linearized statements so that the operators
3320    match those in OPS[OPINDEX], putting the computation in rank
3321    order and trying to allow operations to be executed in
3322    parallel.  */
3323
3324 static void
3325 rewrite_expr_tree_parallel (gimple stmt, int width,
3326                             vec<operand_entry_t> ops)
3327 {
3328   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3329   int op_num = ops.length ();
3330   int stmt_num = op_num - 1;
3331   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3332   int op_index = op_num - 1;
3333   int stmt_index = 0;
3334   int ready_stmts_end = 0;
3335   int i = 0;
3336   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3337
3338   /* We start expression rewriting from the top statements.
3339      So, in this loop we create a full list of statements
3340      we will work with.  */
3341   stmts[stmt_num - 1] = stmt;
3342   for (i = stmt_num - 2; i >= 0; i--)
3343     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3344
3345   for (i = 0; i < stmt_num; i++)
3346     {
3347       tree op1, op2;
3348
3349       /* Determine whether we should use results of
3350          already handled statements or not.  */
3351       if (ready_stmts_end == 0
3352           && (i - stmt_index >= width || op_index < 1))
3353         ready_stmts_end = i;
3354
3355       /* Now we choose operands for the next statement.  Non zero
3356          value in ready_stmts_end means here that we should use
3357          the result of already generated statements as new operand.  */
3358       if (ready_stmts_end > 0)
3359         {
3360           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3361           if (ready_stmts_end > stmt_index)
3362             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3363           else if (op_index >= 0)
3364             op2 = ops[op_index--]->op;
3365           else
3366             {
3367               gcc_assert (stmt_index < i);
3368               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3369             }
3370
3371           if (stmt_index >= ready_stmts_end)
3372             ready_stmts_end = 0;
3373         }
3374       else
3375         {
3376           if (op_index > 1)
3377             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3378           op2 = ops[op_index--]->op;
3379           op1 = ops[op_index--]->op;
3380         }
3381
3382       /* If we emit the last statement then we should put
3383          operands into the last statement.  It will also
3384          break the loop.  */
3385       if (op_index < 0 && stmt_index == i)
3386         i = stmt_num - 1;
3387
3388       if (dump_file && (dump_flags & TDF_DETAILS))
3389         {
3390           fprintf (dump_file, "Transforming ");
3391           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3392         }
3393
3394       /* We keep original statement only for the last one.  All
3395          others are recreated.  */
3396       if (i == stmt_num - 1)
3397         {
3398           gimple_assign_set_rhs1 (stmts[i], op1);
3399           gimple_assign_set_rhs2 (stmts[i], op2);
3400           update_stmt (stmts[i]);
3401         }
3402       else
3403         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3404
3405       if (dump_file && (dump_flags & TDF_DETAILS))
3406         {
3407           fprintf (dump_file, " into ");
3408           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3409         }
3410     }
3411
3412   remove_visited_stmt_chain (last_rhs1);
3413 }
3414
3415 /* Transform STMT, which is really (A +B) + (C + D) into the left
3416    linear form, ((A+B)+C)+D.
3417    Recurse on D if necessary.  */
3418
3419 static void
3420 linearize_expr (gimple stmt)
3421 {
3422   gimple_stmt_iterator gsi;
3423   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3424   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3425   gimple oldbinrhs = binrhs;
3426   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3427   gimple newbinrhs = NULL;
3428   struct loop *loop = loop_containing_stmt (stmt);
3429   tree lhs = gimple_assign_lhs (stmt);
3430
3431   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3432               && is_reassociable_op (binrhs, rhscode, loop));
3433
3434   gsi = gsi_for_stmt (stmt);
3435
3436   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3437   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3438                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3439                                          gimple_assign_lhs (binlhs),
3440                                          gimple_assign_rhs2 (binrhs));
3441   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3442   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3443   gimple_set_uid (binrhs, gimple_uid (stmt));
3444
3445   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3446     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3447
3448   if (dump_file && (dump_flags & TDF_DETAILS))
3449     {
3450       fprintf (dump_file, "Linearized: ");
3451       print_gimple_stmt (dump_file, stmt, 0, 0);
3452     }
3453
3454   reassociate_stats.linearized++;
3455   update_stmt (stmt);
3456
3457   gsi = gsi_for_stmt (oldbinrhs);
3458   gsi_remove (&gsi, true);
3459   release_defs (oldbinrhs);
3460
3461   gimple_set_visited (stmt, true);
3462   gimple_set_visited (binlhs, true);
3463   gimple_set_visited (binrhs, true);
3464
3465   /* Tail recurse on the new rhs if it still needs reassociation.  */
3466   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3467     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3468            want to change the algorithm while converting to tuples.  */
3469     linearize_expr (stmt);
3470 }
3471
3472 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3473    it.  Otherwise, return NULL.  */
3474
3475 static gimple
3476 get_single_immediate_use (tree lhs)
3477 {
3478   use_operand_p immuse;
3479   gimple immusestmt;
3480
3481   if (TREE_CODE (lhs) == SSA_NAME
3482       && single_imm_use (lhs, &immuse, &immusestmt)
3483       && is_gimple_assign (immusestmt))
3484     return immusestmt;
3485
3486   return NULL;
3487 }
3488
3489 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3490    representing the negated value.  Insertions of any necessary
3491    instructions go before GSI.
3492    This function is recursive in that, if you hand it "a_5" as the
3493    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3494    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3495
3496 static tree
3497 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3498 {
3499   gimple negatedefstmt = NULL;
3500   tree resultofnegate;
3501   gimple_stmt_iterator gsi;
3502   unsigned int uid;
3503
3504   /* If we are trying to negate a name, defined by an add, negate the
3505      add operands instead.  */
3506   if (TREE_CODE (tonegate) == SSA_NAME)
3507     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3508   if (TREE_CODE (tonegate) == SSA_NAME
3509       && is_gimple_assign (negatedefstmt)
3510       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3511       && has_single_use (gimple_assign_lhs (negatedefstmt))
3512       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3513     {
3514       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3515       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3516       tree lhs = gimple_assign_lhs (negatedefstmt);
3517       gimple g;
3518
3519       gsi = gsi_for_stmt (negatedefstmt);
3520       rhs1 = negate_value (rhs1, &gsi);
3521
3522       gsi = gsi_for_stmt (negatedefstmt);
3523       rhs2 = negate_value (rhs2, &gsi);
3524
3525       gsi = gsi_for_stmt (negatedefstmt);
3526       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3527       gimple_set_visited (negatedefstmt, true);
3528       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3529       gimple_set_uid (g, gimple_uid (negatedefstmt));
3530       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3531       return lhs;
3532     }
3533
3534   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3535   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3536                                              NULL_TREE, true, GSI_SAME_STMT);
3537   gsi = *gsip;
3538   uid = gimple_uid (gsi_stmt (gsi));
3539   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3540     {
3541       gimple stmt = gsi_stmt (gsi);
3542       if (gimple_uid (stmt) != 0)
3543         break;
3544       gimple_set_uid (stmt, uid);
3545     }
3546   return resultofnegate;
3547 }
3548
3549 /* Return true if we should break up the subtract in STMT into an add
3550    with negate.  This is true when we the subtract operands are really
3551    adds, or the subtract itself is used in an add expression.  In
3552    either case, breaking up the subtract into an add with negate
3553    exposes the adds to reassociation.  */
3554
3555 static bool
3556 should_break_up_subtract (gimple stmt)
3557 {
3558   tree lhs = gimple_assign_lhs (stmt);
3559   tree binlhs = gimple_assign_rhs1 (stmt);
3560   tree binrhs = gimple_assign_rhs2 (stmt);
3561   gimple immusestmt;
3562   struct loop *loop = loop_containing_stmt (stmt);
3563
3564   if (TREE_CODE (binlhs) == SSA_NAME
3565       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3566     return true;
3567
3568   if (TREE_CODE (binrhs) == SSA_NAME
3569       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3570     return true;
3571
3572   if (TREE_CODE (lhs) == SSA_NAME
3573       && (immusestmt = get_single_immediate_use (lhs))
3574       && is_gimple_assign (immusestmt)
3575       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3576           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3577     return true;
3578   return false;
3579 }
3580
3581 /* Transform STMT from A - B into A + -B.  */
3582
3583 static void
3584 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3585 {
3586   tree rhs1 = gimple_assign_rhs1 (stmt);
3587   tree rhs2 = gimple_assign_rhs2 (stmt);
3588
3589   if (dump_file && (dump_flags & TDF_DETAILS))
3590     {
3591       fprintf (dump_file, "Breaking up subtract ");
3592       print_gimple_stmt (dump_file, stmt, 0, 0);
3593     }
3594
3595   rhs2 = negate_value (rhs2, gsip);
3596   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3597   update_stmt (stmt);
3598 }
3599
3600 /* Determine whether STMT is a builtin call that raises an SSA name
3601    to an integer power and has only one use.  If so, and this is early
3602    reassociation and unsafe math optimizations are permitted, place
3603    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3604    If any of these conditions does not hold, return FALSE.  */
3605
3606 static bool
3607 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3608 {
3609   tree fndecl, arg1;
3610   REAL_VALUE_TYPE c, cint;
3611
3612   if (!first_pass_instance
3613       || !flag_unsafe_math_optimizations
3614       || !is_gimple_call (stmt)
3615       || !has_single_use (gimple_call_lhs (stmt)))
3616     return false;
3617
3618   fndecl = gimple_call_fndecl (stmt);
3619
3620   if (!fndecl
3621       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3622     return false;
3623
3624   switch (DECL_FUNCTION_CODE (fndecl))
3625     {
3626     CASE_FLT_FN (BUILT_IN_POW):
3627       *base = gimple_call_arg (stmt, 0);
3628       arg1 = gimple_call_arg (stmt, 1);
3629
3630       if (TREE_CODE (arg1) != REAL_CST)
3631         return false;
3632
3633       c = TREE_REAL_CST (arg1);
3634
3635       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3636         return false;
3637
3638       *exponent = real_to_integer (&c);
3639       real_from_integer (&cint, VOIDmode, *exponent,
3640                          *exponent < 0 ? -1 : 0, 0);
3641       if (!real_identical (&c, &cint))
3642         return false;
3643
3644       break;
3645
3646     CASE_FLT_FN (BUILT_IN_POWI):
3647       *base = gimple_call_arg (stmt, 0);
3648       arg1 = gimple_call_arg (stmt, 1);
3649
3650       if (!tree_fits_shwi_p (arg1))
3651         return false;
3652
3653       *exponent = tree_to_shwi (arg1);
3654       break;
3655
3656     default:
3657       return false;
3658     }
3659
3660   /* Expanding negative exponents is generally unproductive, so we don't
3661      complicate matters with those.  Exponents of zero and one should
3662      have been handled by expression folding.  */
3663   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3664     return false;
3665
3666   return true;
3667 }
3668
3669 /* Recursively linearize a binary expression that is the RHS of STMT.
3670    Place the operands of the expression tree in the vector named OPS.  */
3671
3672 static void
3673 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3674                      bool is_associative, bool set_visited)
3675 {
3676   tree binlhs = gimple_assign_rhs1 (stmt);
3677   tree binrhs = gimple_assign_rhs2 (stmt);
3678   gimple binlhsdef = NULL, binrhsdef = NULL;
3679   bool binlhsisreassoc = false;
3680   bool binrhsisreassoc = false;
3681   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3682   struct loop *loop = loop_containing_stmt (stmt);
3683   tree base = NULL_TREE;
3684   HOST_WIDE_INT exponent = 0;
3685
3686   if (set_visited)
3687     gimple_set_visited (stmt, true);
3688
3689   if (TREE_CODE (binlhs) == SSA_NAME)
3690     {
3691       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3692       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3693                          && !stmt_could_throw_p (binlhsdef));
3694     }
3695
3696   if (TREE_CODE (binrhs) == SSA_NAME)
3697     {
3698       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3699       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3700                          && !stmt_could_throw_p (binrhsdef));
3701     }
3702
3703   /* If the LHS is not reassociable, but the RHS is, we need to swap
3704      them.  If neither is reassociable, there is nothing we can do, so
3705      just put them in the ops vector.  If the LHS is reassociable,
3706      linearize it.  If both are reassociable, then linearize the RHS
3707      and the LHS.  */
3708
3709   if (!binlhsisreassoc)
3710     {
3711       tree temp;
3712
3713       /* If this is not a associative operation like division, give up.  */
3714       if (!is_associative)
3715         {
3716           add_to_ops_vec (ops, binrhs);
3717           return;
3718         }
3719
3720       if (!binrhsisreassoc)
3721         {
3722           if (rhscode == MULT_EXPR
3723               && TREE_CODE (binrhs) == SSA_NAME
3724               && acceptable_pow_call (binrhsdef, &base, &exponent))
3725             {
3726               add_repeat_to_ops_vec (ops, base, exponent);
3727               gimple_set_visited (binrhsdef, true);
3728             }
3729           else
3730             add_to_ops_vec (ops, binrhs);
3731
3732           if (rhscode == MULT_EXPR
3733               && TREE_CODE (binlhs) == SSA_NAME
3734               && acceptable_pow_call (binlhsdef, &base, &exponent))
3735             {
3736               add_repeat_to_ops_vec (ops, base, exponent);
3737               gimple_set_visited (binlhsdef, true);
3738             }
3739           else
3740             add_to_ops_vec (ops, binlhs);
3741
3742           return;
3743         }
3744
3745       if (dump_file && (dump_flags & TDF_DETAILS))
3746         {
3747           fprintf (dump_file, "swapping operands of ");
3748           print_gimple_stmt (dump_file, stmt, 0, 0);
3749         }
3750
3751       swap_ssa_operands (stmt,
3752                          gimple_assign_rhs1_ptr (stmt),
3753                          gimple_assign_rhs2_ptr (stmt));
3754       update_stmt (stmt);
3755
3756       if (dump_file && (dump_flags & TDF_DETAILS))
3757         {
3758           fprintf (dump_file, " is now ");
3759           print_gimple_stmt (dump_file, stmt, 0, 0);
3760         }
3761
3762       /* We want to make it so the lhs is always the reassociative op,
3763          so swap.  */
3764       temp = binlhs;
3765       binlhs = binrhs;
3766       binrhs = temp;
3767     }
3768   else if (binrhsisreassoc)
3769     {
3770       linearize_expr (stmt);
3771       binlhs = gimple_assign_rhs1 (stmt);
3772       binrhs = gimple_assign_rhs2 (stmt);
3773     }
3774
3775   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3776               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3777                                       rhscode, loop));
3778   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3779                        is_associative, set_visited);
3780
3781   if (rhscode == MULT_EXPR
3782       && TREE_CODE (binrhs) == SSA_NAME
3783       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3784     {
3785       add_repeat_to_ops_vec (ops, base, exponent);
3786       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3787     }
3788   else
3789     add_to_ops_vec (ops, binrhs);
3790 }
3791
3792 /* Repropagate the negates back into subtracts, since no other pass
3793    currently does it.  */
3794
3795 static void
3796 repropagate_negates (void)
3797 {
3798   unsigned int i = 0;
3799   tree negate;
3800
3801   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3802     {
3803       gimple user = get_single_immediate_use (negate);
3804
3805       if (!user || !is_gimple_assign (user))
3806         continue;
3807
3808       /* The negate operand can be either operand of a PLUS_EXPR
3809          (it can be the LHS if the RHS is a constant for example).
3810
3811          Force the negate operand to the RHS of the PLUS_EXPR, then
3812          transform the PLUS_EXPR into a MINUS_EXPR.  */
3813       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3814         {
3815           /* If the negated operand appears on the LHS of the
3816              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3817              to force the negated operand to the RHS of the PLUS_EXPR.  */
3818           if (gimple_assign_rhs1 (user) == negate)
3819             {
3820               swap_ssa_operands (user,
3821                                  gimple_assign_rhs1_ptr (user),
3822                                  gimple_assign_rhs2_ptr (user));
3823             }
3824
3825           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3826              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3827           if (gimple_assign_rhs2 (user) == negate)
3828             {
3829               tree rhs1 = gimple_assign_rhs1 (user);
3830               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3831               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3832               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3833               update_stmt (user);
3834             }
3835         }
3836       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3837         {
3838           if (gimple_assign_rhs1 (user) == negate)
3839             {
3840               /* We have
3841                    x = -a
3842                    y = x - b
3843                  which we transform into
3844                    x = a + b
3845                    y = -x .
3846                  This pushes down the negate which we possibly can merge
3847                  into some other operation, hence insert it into the
3848                  plus_negates vector.  */
3849               gimple feed = SSA_NAME_DEF_STMT (negate);
3850               tree a = gimple_assign_rhs1 (feed);
3851               tree b = gimple_assign_rhs2 (user);
3852               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3853               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3854               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3855               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3856               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3857               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3858               user = gsi_stmt (gsi2);
3859               update_stmt (user);
3860               gsi_remove (&gsi, true);
3861               release_defs (feed);
3862               plus_negates.safe_push (gimple_assign_lhs (user));
3863             }
3864           else
3865             {
3866               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3867                  rid of one operation.  */
3868               gimple feed = SSA_NAME_DEF_STMT (negate);
3869               tree a = gimple_assign_rhs1 (feed);
3870               tree rhs1 = gimple_assign_rhs1 (user);
3871               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3872               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3873               update_stmt (gsi_stmt (gsi));
3874             }
3875         }
3876     }
3877 }
3878
3879 /* Returns true if OP is of a type for which we can do reassociation.
3880    That is for integral or non-saturating fixed-point types, and for
3881    floating point type when associative-math is enabled.  */
3882
3883 static bool
3884 can_reassociate_p (tree op)
3885 {
3886   tree type = TREE_TYPE (op);
3887   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3888       || NON_SAT_FIXED_POINT_TYPE_P (type)
3889       || (flag_associative_math && FLOAT_TYPE_P (type)))
3890     return true;
3891   return false;
3892 }
3893
3894 /* Break up subtract operations in block BB.
3895
3896    We do this top down because we don't know whether the subtract is
3897    part of a possible chain of reassociation except at the top.
3898
3899    IE given
3900    d = f + g
3901    c = a + e
3902    b = c - d
3903    q = b - r
3904    k = t - q
3905
3906    we want to break up k = t - q, but we won't until we've transformed q
3907    = b - r, which won't be broken up until we transform b = c - d.
3908
3909    En passant, clear the GIMPLE visited flag on every statement
3910    and set UIDs within each basic block.  */
3911
3912 static void
3913 break_up_subtract_bb (basic_block bb)
3914 {
3915   gimple_stmt_iterator gsi;
3916   basic_block son;
3917   unsigned int uid = 1;
3918
3919   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3920     {
3921       gimple stmt = gsi_stmt (gsi);
3922       gimple_set_visited (stmt, false);
3923       gimple_set_uid (stmt, uid++);
3924
3925       if (!is_gimple_assign (stmt)
3926           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3927         continue;
3928
3929       /* Look for simple gimple subtract operations.  */
3930       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3931         {
3932           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3933               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3934             continue;
3935
3936           /* Check for a subtract used only in an addition.  If this
3937              is the case, transform it into add of a negate for better
3938              reassociation.  IE transform C = A-B into C = A + -B if C
3939              is only used in an addition.  */
3940           if (should_break_up_subtract (stmt))
3941             break_up_subtract (stmt, &gsi);
3942         }
3943       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3944                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3945         plus_negates.safe_push (gimple_assign_lhs (stmt));
3946     }
3947   for (son = first_dom_son (CDI_DOMINATORS, bb);
3948        son;
3949        son = next_dom_son (CDI_DOMINATORS, son))
3950     break_up_subtract_bb (son);
3951 }
3952
3953 /* Used for repeated factor analysis.  */
3954 struct repeat_factor_d
3955 {
3956   /* An SSA name that occurs in a multiply chain.  */
3957   tree factor;
3958
3959   /* Cached rank of the factor.  */
3960   unsigned rank;
3961
3962   /* Number of occurrences of the factor in the chain.  */
3963   HOST_WIDE_INT count;
3964
3965   /* An SSA name representing the product of this factor and
3966      all factors appearing later in the repeated factor vector.  */
3967   tree repr;
3968 };
3969
3970 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3971 typedef const struct repeat_factor_d *const_repeat_factor_t;
3972
3973
3974 static vec<repeat_factor> repeat_factor_vec;
3975
3976 /* Used for sorting the repeat factor vector.  Sort primarily by
3977    ascending occurrence count, secondarily by descending rank.  */
3978
3979 static int
3980 compare_repeat_factors (const void *x1, const void *x2)
3981 {
3982   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3983   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3984
3985   if (rf1->count != rf2->count)
3986     return rf1->count - rf2->count;
3987
3988   return rf2->rank - rf1->rank;
3989 }
3990
3991 /* Look for repeated operands in OPS in the multiply tree rooted at
3992    STMT.  Replace them with an optimal sequence of multiplies and powi
3993    builtin calls, and remove the used operands from OPS.  Return an
3994    SSA name representing the value of the replacement sequence.  */
3995
3996 static tree
3997 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3998 {
3999   unsigned i, j, vec_len;
4000   int ii;
4001   operand_entry_t oe;
4002   repeat_factor_t rf1, rf2;
4003   repeat_factor rfnew;
4004   tree result = NULL_TREE;
4005   tree target_ssa, iter_result;
4006   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4007   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4008   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4009   gimple mul_stmt, pow_stmt;
4010
4011   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4012      target.  */
4013   if (!powi_fndecl)
4014     return NULL_TREE;
4015
4016   /* Allocate the repeated factor vector.  */
4017   repeat_factor_vec.create (10);
4018
4019   /* Scan the OPS vector for all SSA names in the product and build
4020      up a vector of occurrence counts for each factor.  */
4021   FOR_EACH_VEC_ELT (*ops, i, oe)
4022     {
4023       if (TREE_CODE (oe->op) == SSA_NAME)
4024         {
4025           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4026             {
4027               if (rf1->factor == oe->op)
4028                 {
4029                   rf1->count += oe->count;
4030                   break;
4031                 }
4032             }
4033
4034           if (j >= repeat_factor_vec.length ())
4035             {
4036               rfnew.factor = oe->op;
4037               rfnew.rank = oe->rank;
4038               rfnew.count = oe->count;
4039               rfnew.repr = NULL_TREE;
4040               repeat_factor_vec.safe_push (rfnew);
4041             }
4042         }
4043     }
4044
4045   /* Sort the repeated factor vector by (a) increasing occurrence count,
4046      and (b) decreasing rank.  */
4047   repeat_factor_vec.qsort (compare_repeat_factors);
4048
4049   /* It is generally best to combine as many base factors as possible
4050      into a product before applying __builtin_powi to the result.
4051      However, the sort order chosen for the repeated factor vector
4052      allows us to cache partial results for the product of the base
4053      factors for subsequent use.  When we already have a cached partial
4054      result from a previous iteration, it is best to make use of it
4055      before looking for another __builtin_pow opportunity.
4056
4057      As an example, consider x * x * y * y * y * z * z * z * z.
4058      We want to first compose the product x * y * z, raise it to the
4059      second power, then multiply this by y * z, and finally multiply
4060      by z.  This can be done in 5 multiplies provided we cache y * z
4061      for use in both expressions:
4062
4063         t1 = y * z
4064         t2 = t1 * x
4065         t3 = t2 * t2
4066         t4 = t1 * t3
4067         result = t4 * z
4068
4069      If we instead ignored the cached y * z and first multiplied by
4070      the __builtin_pow opportunity z * z, we would get the inferior:
4071
4072         t1 = y * z
4073         t2 = t1 * x
4074         t3 = t2 * t2
4075         t4 = z * z
4076         t5 = t3 * t4
4077         result = t5 * y  */
4078
4079   vec_len = repeat_factor_vec.length ();
4080
4081   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4082   while (true)
4083     {
4084       HOST_WIDE_INT power;
4085
4086       /* First look for the largest cached product of factors from
4087          preceding iterations.  If found, create a builtin_powi for
4088          it if the minimum occurrence count for its factors is at
4089          least 2, or just use this cached product as our next
4090          multiplicand if the minimum occurrence count is 1.  */
4091       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4092         {
4093           if (rf1->repr && rf1->count > 0)
4094             break;
4095         }
4096
4097       if (j < vec_len)
4098         {
4099           power = rf1->count;
4100
4101           if (power == 1)
4102             {
4103               iter_result = rf1->repr;
4104
4105               if (dump_file && (dump_flags & TDF_DETAILS))
4106                 {
4107                   unsigned elt;
4108                   repeat_factor_t rf;
4109                   fputs ("Multiplying by cached product ", dump_file);
4110                   for (elt = j; elt < vec_len; elt++)
4111                     {
4112                       rf = &repeat_factor_vec[elt];
4113                       print_generic_expr (dump_file, rf->factor, 0);
4114                       if (elt < vec_len - 1)
4115                         fputs (" * ", dump_file);
4116                     }
4117                   fputs ("\n", dump_file);
4118                 }
4119             }
4120           else
4121             {
4122               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4123               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4124                                             build_int_cst (integer_type_node,
4125                                                            power));
4126               gimple_call_set_lhs (pow_stmt, iter_result);
4127               gimple_set_location (pow_stmt, gimple_location (stmt));
4128               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4129
4130               if (dump_file && (dump_flags & TDF_DETAILS))
4131                 {
4132                   unsigned elt;
4133                   repeat_factor_t rf;
4134                   fputs ("Building __builtin_pow call for cached product (",
4135                          dump_file);
4136                   for (elt = j; elt < vec_len; elt++)
4137                     {
4138                       rf = &repeat_factor_vec[elt];
4139                       print_generic_expr (dump_file, rf->factor, 0);
4140                       if (elt < vec_len - 1)
4141                         fputs (" * ", dump_file);
4142                     }
4143                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4144                            power);
4145                 }
4146             }
4147         }
4148       else
4149         {
4150           /* Otherwise, find the first factor in the repeated factor
4151              vector whose occurrence count is at least 2.  If no such
4152              factor exists, there are no builtin_powi opportunities
4153              remaining.  */
4154           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4155             {
4156               if (rf1->count >= 2)
4157                 break;
4158             }
4159
4160           if (j >= vec_len)
4161             break;
4162
4163           power = rf1->count;
4164
4165           if (dump_file && (dump_flags & TDF_DETAILS))
4166             {
4167               unsigned elt;
4168               repeat_factor_t rf;
4169               fputs ("Building __builtin_pow call for (", dump_file);
4170               for (elt = j; elt < vec_len; elt++)
4171                 {
4172                   rf = &repeat_factor_vec[elt];
4173                   print_generic_expr (dump_file, rf->factor, 0);
4174                   if (elt < vec_len - 1)
4175                     fputs (" * ", dump_file);
4176                 }
4177               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4178             }
4179
4180           reassociate_stats.pows_created++;
4181
4182           /* Visit each element of the vector in reverse order (so that
4183              high-occurrence elements are visited first, and within the
4184              same occurrence count, lower-ranked elements are visited
4185              first).  Form a linear product of all elements in this order
4186              whose occurrencce count is at least that of element J.
4187              Record the SSA name representing the product of each element
4188              with all subsequent elements in the vector.  */
4189           if (j == vec_len - 1)
4190             rf1->repr = rf1->factor;
4191           else
4192             {
4193               for (ii = vec_len - 2; ii >= (int)j; ii--)
4194                 {
4195                   tree op1, op2;
4196
4197                   rf1 = &repeat_factor_vec[ii];
4198                   rf2 = &repeat_factor_vec[ii + 1];
4199
4200                   /* Init the last factor's representative to be itself.  */
4201                   if (!rf2->repr)
4202                     rf2->repr = rf2->factor;
4203
4204                   op1 = rf1->factor;
4205                   op2 = rf2->repr;
4206
4207                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4208                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4209                                                            target_ssa,
4210                                                            op1, op2);
4211                   gimple_set_location (mul_stmt, gimple_location (stmt));
4212                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4213                   rf1->repr = target_ssa;
4214
4215                   /* Don't reprocess the multiply we just introduced.  */
4216                   gimple_set_visited (mul_stmt, true);
4217                 }
4218             }
4219
4220           /* Form a call to __builtin_powi for the maximum product
4221              just formed, raised to the power obtained earlier.  */
4222           rf1 = &repeat_factor_vec[j];
4223           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4224           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4225                                         build_int_cst (integer_type_node,
4226                                                        power));
4227           gimple_call_set_lhs (pow_stmt, iter_result);
4228           gimple_set_location (pow_stmt, gimple_location (stmt));
4229           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4230         }
4231
4232       /* If we previously formed at least one other builtin_powi call,
4233          form the product of this one and those others.  */
4234       if (result)
4235         {
4236           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4237           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4238                                                    result, iter_result);
4239           gimple_set_location (mul_stmt, gimple_location (stmt));
4240           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4241           gimple_set_visited (mul_stmt, true);
4242           result = new_result;
4243         }
4244       else
4245         result = iter_result;
4246
4247       /* Decrement the occurrence count of each element in the product
4248          by the count found above, and remove this many copies of each
4249          factor from OPS.  */
4250       for (i = j; i < vec_len; i++)
4251         {
4252           unsigned k = power;
4253           unsigned n;
4254
4255           rf1 = &repeat_factor_vec[i];
4256           rf1->count -= power;
4257
4258           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4259             {
4260               if (oe->op == rf1->factor)
4261                 {
4262                   if (oe->count <= k)
4263                     {
4264                       ops->ordered_remove (n);
4265                       k -= oe->count;
4266
4267                       if (k == 0)
4268                         break;
4269                     }
4270                   else
4271                     {
4272                       oe->count -= k;
4273                       break;
4274                     }
4275                 }
4276             }
4277         }
4278     }
4279
4280   /* At this point all elements in the repeated factor vector have a
4281      remaining occurrence count of 0 or 1, and those with a count of 1
4282      don't have cached representatives.  Re-sort the ops vector and
4283      clean up.  */
4284   ops->qsort (sort_by_operand_rank);
4285   repeat_factor_vec.release ();
4286
4287   /* Return the final product computed herein.  Note that there may
4288      still be some elements with single occurrence count left in OPS;
4289      those will be handled by the normal reassociation logic.  */
4290   return result;
4291 }
4292
4293 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4294
4295 static void
4296 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4297 {
4298   tree rhs1;
4299
4300   if (dump_file && (dump_flags & TDF_DETAILS))
4301     {
4302       fprintf (dump_file, "Transforming ");
4303       print_gimple_stmt (dump_file, stmt, 0, 0);
4304     }
4305
4306   rhs1 = gimple_assign_rhs1 (stmt);
4307   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4308   update_stmt (stmt);
4309   remove_visited_stmt_chain (rhs1);
4310
4311   if (dump_file && (dump_flags & TDF_DETAILS))
4312     {
4313       fprintf (dump_file, " into ");
4314       print_gimple_stmt (dump_file, stmt, 0, 0);
4315     }
4316 }
4317
4318 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4319
4320 static void
4321 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4322                             tree rhs1, tree rhs2)
4323 {
4324   if (dump_file && (dump_flags & TDF_DETAILS))
4325     {
4326       fprintf (dump_file, "Transforming ");
4327       print_gimple_stmt (dump_file, stmt, 0, 0);
4328     }
4329
4330   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4331   update_stmt (gsi_stmt (*gsi));
4332   remove_visited_stmt_chain (rhs1);
4333
4334   if (dump_file && (dump_flags & TDF_DETAILS))
4335     {
4336       fprintf (dump_file, " into ");
4337       print_gimple_stmt (dump_file, stmt, 0, 0);
4338     }
4339 }
4340
4341 /* Reassociate expressions in basic block BB and its post-dominator as
4342    children.  */
4343
4344 static void
4345 reassociate_bb (basic_block bb)
4346 {
4347   gimple_stmt_iterator gsi;
4348   basic_block son;
4349   gimple stmt = last_stmt (bb);
4350
4351   if (stmt && !gimple_visited_p (stmt))
4352     maybe_optimize_range_tests (stmt);
4353
4354   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4355     {
4356       stmt = gsi_stmt (gsi);
4357
4358       if (is_gimple_assign (stmt)
4359           && !stmt_could_throw_p (stmt))
4360         {
4361           tree lhs, rhs1, rhs2;
4362           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4363
4364           /* If this is not a gimple binary expression, there is
4365              nothing for us to do with it.  */
4366           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4367             continue;
4368
4369           /* If this was part of an already processed statement,
4370              we don't need to touch it again. */
4371           if (gimple_visited_p (stmt))
4372             {
4373               /* This statement might have become dead because of previous
4374                  reassociations.  */
4375               if (has_zero_uses (gimple_get_lhs (stmt)))
4376                 {
4377                   gsi_remove (&gsi, true);
4378                   release_defs (stmt);
4379                   /* We might end up removing the last stmt above which
4380                      places the iterator to the end of the sequence.
4381                      Reset it to the last stmt in this case which might
4382                      be the end of the sequence as well if we removed
4383                      the last statement of the sequence.  In which case
4384                      we need to bail out.  */
4385                   if (gsi_end_p (gsi))
4386                     {
4387                       gsi = gsi_last_bb (bb);
4388                       if (gsi_end_p (gsi))
4389                         break;
4390                     }
4391                 }
4392               continue;
4393             }
4394
4395           lhs = gimple_assign_lhs (stmt);
4396           rhs1 = gimple_assign_rhs1 (stmt);
4397           rhs2 = gimple_assign_rhs2 (stmt);
4398
4399           /* For non-bit or min/max operations we can't associate
4400              all types.  Verify that here.  */
4401           if (rhs_code != BIT_IOR_EXPR
4402               && rhs_code != BIT_AND_EXPR
4403               && rhs_code != BIT_XOR_EXPR
4404               && rhs_code != MIN_EXPR
4405               && rhs_code != MAX_EXPR
4406               && (!can_reassociate_p (lhs)
4407                   || !can_reassociate_p (rhs1)
4408                   || !can_reassociate_p (rhs2)))
4409             continue;
4410
4411           if (associative_tree_code (rhs_code))
4412             {
4413               auto_vec<operand_entry_t> ops;
4414               tree powi_result = NULL_TREE;
4415
4416               /* There may be no immediate uses left by the time we
4417                  get here because we may have eliminated them all.  */
4418               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4419                 continue;
4420
4421               gimple_set_visited (stmt, true);
4422               linearize_expr_tree (&ops, stmt, true, true);
4423               ops.qsort (sort_by_operand_rank);
4424               optimize_ops_list (rhs_code, &ops);
4425               if (undistribute_ops_list (rhs_code, &ops,
4426                                          loop_containing_stmt (stmt)))
4427                 {
4428                   ops.qsort (sort_by_operand_rank);
4429                   optimize_ops_list (rhs_code, &ops);
4430                 }
4431
4432               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4433                 optimize_range_tests (rhs_code, &ops);
4434
4435               if (first_pass_instance
4436                   && rhs_code == MULT_EXPR
4437                   && flag_unsafe_math_optimizations)
4438                 powi_result = attempt_builtin_powi (stmt, &ops);
4439
4440               /* If the operand vector is now empty, all operands were
4441                  consumed by the __builtin_powi optimization.  */
4442               if (ops.length () == 0)
4443                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4444               else if (ops.length () == 1)
4445                 {
4446                   tree last_op = ops.last ()->op;
4447
4448                   if (powi_result)
4449                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4450                                                 powi_result);
4451                   else
4452                     transform_stmt_to_copy (&gsi, stmt, last_op);
4453                 }
4454               else
4455                 {
4456                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4457                   int ops_num = ops.length ();
4458                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4459                   tree new_lhs = lhs;
4460
4461                   if (dump_file && (dump_flags & TDF_DETAILS))
4462                     fprintf (dump_file,
4463                              "Width = %d was chosen for reassociation\n", width);
4464
4465                   if (width > 1
4466                       && ops.length () > 3)
4467                     rewrite_expr_tree_parallel (stmt, width, ops);
4468                   else
4469                     {
4470                       /* When there are three operands left, we want
4471                          to make sure the ones that get the double
4472                          binary op are chosen wisely.  */
4473                       int len = ops.length ();
4474                       if (len >= 3)
4475                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4476
4477                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4478                                                    powi_result != NULL);
4479                     }
4480
4481                   /* If we combined some repeated factors into a
4482                      __builtin_powi call, multiply that result by the
4483                      reassociated operands.  */
4484                   if (powi_result)
4485                     {
4486                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4487                       tree type = TREE_TYPE (lhs);
4488                       tree target_ssa = make_temp_ssa_name (type, NULL,
4489                                                             "reassocpow");
4490                       gimple_set_lhs (lhs_stmt, target_ssa);
4491                       update_stmt (lhs_stmt);
4492                       if (lhs != new_lhs)
4493                         target_ssa = new_lhs;
4494                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4495                                                                powi_result,
4496                                                                target_ssa);
4497                       gimple_set_location (mul_stmt, gimple_location (stmt));
4498                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4499                     }
4500                 }
4501             }
4502         }
4503     }
4504   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4505        son;
4506        son = next_dom_son (CDI_POST_DOMINATORS, son))
4507     reassociate_bb (son);
4508 }
4509
4510 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4511 void debug_ops_vector (vec<operand_entry_t> ops);
4512
4513 /* Dump the operand entry vector OPS to FILE.  */
4514
4515 void
4516 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4517 {
4518   operand_entry_t oe;
4519   unsigned int i;
4520
4521   FOR_EACH_VEC_ELT (ops, i, oe)
4522     {
4523       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4524       print_generic_expr (file, oe->op, 0);
4525     }
4526 }
4527
4528 /* Dump the operand entry vector OPS to STDERR.  */
4529
4530 DEBUG_FUNCTION void
4531 debug_ops_vector (vec<operand_entry_t> ops)
4532 {
4533   dump_ops_vector (stderr, ops);
4534 }
4535
4536 static void
4537 do_reassoc (void)
4538 {
4539   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4540   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4541 }
4542
4543 /* Initialize the reassociation pass.  */
4544
4545 static void
4546 init_reassoc (void)
4547 {
4548   int i;
4549   long rank = 2;
4550   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4551
4552   /* Find the loops, so that we can prevent moving calculations in
4553      them.  */
4554   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4555
4556   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4557
4558   operand_entry_pool = create_alloc_pool ("operand entry pool",
4559                                           sizeof (struct operand_entry), 30);
4560   next_operand_entry_id = 0;
4561
4562   /* Reverse RPO (Reverse Post Order) will give us something where
4563      deeper loops come later.  */
4564   pre_and_rev_post_order_compute (NULL, bbs, false);
4565   bb_rank = XCNEWVEC (long, last_basic_block);
4566   operand_rank = pointer_map_create ();
4567
4568   /* Give each default definition a distinct rank.  This includes
4569      parameters and the static chain.  Walk backwards over all
4570      SSA names so that we get proper rank ordering according
4571      to tree_swap_operands_p.  */
4572   for (i = num_ssa_names - 1; i > 0; --i)
4573     {
4574       tree name = ssa_name (i);
4575       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4576         insert_operand_rank (name, ++rank);
4577     }
4578
4579   /* Set up rank for each BB  */
4580   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4581     bb_rank[bbs[i]] = ++rank  << 16;
4582
4583   free (bbs);
4584   calculate_dominance_info (CDI_POST_DOMINATORS);
4585   plus_negates = vNULL;
4586 }
4587
4588 /* Cleanup after the reassociation pass, and print stats if
4589    requested.  */
4590
4591 static void
4592 fini_reassoc (void)
4593 {
4594   statistics_counter_event (cfun, "Linearized",
4595                             reassociate_stats.linearized);
4596   statistics_counter_event (cfun, "Constants eliminated",
4597                             reassociate_stats.constants_eliminated);
4598   statistics_counter_event (cfun, "Ops eliminated",
4599                             reassociate_stats.ops_eliminated);
4600   statistics_counter_event (cfun, "Statements rewritten",
4601                             reassociate_stats.rewritten);
4602   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4603                             reassociate_stats.pows_encountered);
4604   statistics_counter_event (cfun, "Built-in powi calls created",
4605                             reassociate_stats.pows_created);
4606
4607   pointer_map_destroy (operand_rank);
4608   free_alloc_pool (operand_entry_pool);
4609   free (bb_rank);
4610   plus_negates.release ();
4611   free_dominance_info (CDI_POST_DOMINATORS);
4612   loop_optimizer_finalize ();
4613 }
4614
4615 /* Gate and execute functions for Reassociation.  */
4616
4617 static unsigned int
4618 execute_reassoc (void)
4619 {
4620   init_reassoc ();
4621
4622   do_reassoc ();
4623   repropagate_negates ();
4624
4625   fini_reassoc ();
4626   return 0;
4627 }
4628
4629 static bool
4630 gate_tree_ssa_reassoc (void)
4631 {
4632   return flag_tree_reassoc != 0;
4633 }
4634
4635 namespace {
4636
4637 const pass_data pass_data_reassoc =
4638 {
4639   GIMPLE_PASS, /* type */
4640   "reassoc", /* name */
4641   OPTGROUP_NONE, /* optinfo_flags */
4642   true, /* has_gate */
4643   true, /* has_execute */
4644   TV_TREE_REASSOC, /* tv_id */
4645   ( PROP_cfg | PROP_ssa ), /* properties_required */
4646   0, /* properties_provided */
4647   0, /* properties_destroyed */
4648   0, /* todo_flags_start */
4649   ( TODO_verify_ssa
4650     | TODO_update_ssa_only_virtuals
4651     | TODO_verify_flow ), /* todo_flags_finish */
4652 };
4653
4654 class pass_reassoc : public gimple_opt_pass
4655 {
4656 public:
4657   pass_reassoc (gcc::context *ctxt)
4658     : gimple_opt_pass (pass_data_reassoc, ctxt)
4659   {}
4660
4661   /* opt_pass methods: */
4662   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4663   bool gate () { return gate_tree_ssa_reassoc (); }
4664   unsigned int execute () { return execute_reassoc (); }
4665
4666 }; // class pass_reassoc
4667
4668 } // anon namespace
4669
4670 gimple_opt_pass *
4671 make_pass_reassoc (gcc::context *ctxt)
4672 {
4673   return new pass_reassoc (ctxt);
4674 }