gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "gimple.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "gimple-ssa.h"
  37 #include "tree-cfg.h"
  38 #include "tree-phinodes.h"
  39 #include "ssa-iterators.h"
  40 #include "stringpool.h"
  41 #include "tree-ssanames.h"
  42 #include "tree-ssa-loop-niter.h"
  43 #include "tree-ssa-loop.h"
  44 #include "expr.h"
  45 #include "tree-dfa.h"
  46 #include "tree-ssa.h"
  47 #include "tree-iterator.h"
  48 #include "tree-pass.h"
  49 #include "alloc-pool.h"
  50 #include "vec.h"
  51 #include "langhooks.h"
  52 #include "pointer-set.h"
  53 #include "cfgloop.h"
  54 #include "flags.h"
  55 #include "target.h"
  56 #include "params.h"
  57 #include "diagnostic-core.h"
  58
  59 /*  This is a simple global reassociation pass.  It is, in part, based
  60     on the LLVM pass of the same name (They do some things more/less
  61     than we do, in different orders, etc).
  62
  63     It consists of five steps:
  64
  65     1. Breaking up subtract operations into addition + negate, where
  66     it would promote the reassociation of adds.
  67
  68     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  69     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  70     During linearization, we place the operands of the binary
  71     expressions into a vector of operand_entry_t
  72
  73     3. Optimization of the operand lists, eliminating things like a +
  74     -a, a & a, etc.
  75
  76     3a. Combine repeated factors with the same occurrence counts
  77     into a __builtin_powi call that will later be optimized into
  78     an optimal number of multiplies.
  79
  80     4. Rewrite the expression trees we linearized and optimized so
  81     they are in proper rank order.
  82
  83     5. Repropagate negates, as nothing else will clean it up ATM.
  84
  85     A bit of theory on #4, since nobody seems to write anything down
  86     about why it makes sense to do it the way they do it:
  87
  88     We could do this much nicer theoretically, but don't (for reasons
  89     explained after how to do it theoretically nice :P).
  90
  91     In order to promote the most redundancy elimination, you want
  92     binary expressions whose operands are the same rank (or
  93     preferably, the same value) exposed to the redundancy eliminator,
  94     for possible elimination.
  95
  96     So the way to do this if we really cared, is to build the new op
  97     tree from the leaves to the roots, merging as you go, and putting the
  98     new op on the end of the worklist, until you are left with one
  99     thing on the worklist.
 100
 101     IE if you have to rewrite the following set of operands (listed with
 102     rank in parentheses), with opcode PLUS_EXPR:
 103
 104     a (1),  b (1),  c (1),  d (2), e (2)
 105
 106
 107     We start with our merge worklist empty, and the ops list with all of
 108     those on it.
 109
 110     You want to first merge all leaves of the same rank, as much as
 111     possible.
 112
 113     So first build a binary op of
 114
 115     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 116
 117     Because there is no three operand form of PLUS_EXPR, c is not going to
 118     be exposed to redundancy elimination as a rank 1 operand.
 119
 120     So you might as well throw it on the merge worklist (you could also
 121     consider it to now be a rank two operand, and merge it with d and e,
 122     but in this case, you then have evicted e from a binary op. So at
 123     least in this situation, you can't win.)
 124
 125     Then build a binary op of d + e
 126     mergetmp2 = d + e
 127
 128     and put mergetmp2 on the merge worklist.
 129
 130     so merge worklist = {mergetmp, c, mergetmp2}
 131
 132     Continue building binary ops of these operations until you have only
 133     one operation left on the worklist.
 134
 135     So we have
 136
 137     build binary op
 138     mergetmp3 = mergetmp + c
 139
 140     worklist = {mergetmp2, mergetmp3}
 141
 142     mergetmp4 = mergetmp2 + mergetmp3
 143
 144     worklist = {mergetmp4}
 145
 146     because we have one operation left, we can now just set the original
 147     statement equal to the result of that operation.
 148
 149     This will at least expose a + b  and d + e to redundancy elimination
 150     as binary operations.
 151
 152     For extra points, you can reuse the old statements to build the
 153     mergetmps, since you shouldn't run out.
 154
 155     So why don't we do this?
 156
 157     Because it's expensive, and rarely will help.  Most trees we are
 158     reassociating have 3 or less ops.  If they have 2 ops, they already
 159     will be written into a nice single binary op.  If you have 3 ops, a
 160     single simple check suffices to tell you whether the first two are of the
 161     same rank.  If so, you know to order it
 162
 163     mergetmp = op1 + op2
 164     newstmt = mergetmp + op3
 165
 166     instead of
 167     mergetmp = op2 + op3
 168     newstmt = mergetmp + op1
 169
 170     If all three are of the same rank, you can't expose them all in a
 171     single binary operator anyway, so the above is *still* the best you
 172     can do.
 173
 174     Thus, this is what we do.  When we have three ops left, we check to see
 175     what order to put them in, and call it a day.  As a nod to vector sum
 176     reduction, we check if any of the ops are really a phi node that is a
 177     destructive update for the associating op, and keep the destructive
 178     update together for vector sum reduction recognition.  */
 179
 180
 181 /* Statistics */
 182 static struct
 183 {
 184   int linearized;
 185   int constants_eliminated;
 186   int ops_eliminated;
 187   int rewritten;
 188   int pows_encountered;
 189   int pows_created;
 190 } reassociate_stats;
 191
 192 /* Operator, rank pair.  */
 193 typedef struct operand_entry
 194 {
 195   unsigned int rank;
 196   int id;
 197   tree op;
 198   unsigned int count;
 199 } *operand_entry_t;
 200
 201 static alloc_pool operand_entry_pool;
 202
 203 /* This is used to assign a unique ID to each struct operand_entry
 204    so that qsort results are identical on different hosts.  */
 205 static int next_operand_entry_id;
 206
 207 /* Starting rank number for a given basic block, so that we can rank
 208    operations using unmovable instructions in that BB based on the bb
 209    depth.  */
 210 static long *bb_rank;
 211
 212 /* Operand->rank hashtable.  */
 213 static struct pointer_map_t *operand_rank;
 214
 215 /* Forward decls.  */
 216 static long get_rank (tree);
 217
 218
 219 /* Bias amount for loop-carried phis.  We want this to be larger than
 220    the depth of any reassociation tree we can see, but not larger than
 221    the rank difference between two blocks.  */
 222 #define PHI_LOOP_BIAS (1 << 15)
 223
 224 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 225    an innermost loop, and the phi has only a single use which is inside
 226    the loop, then the rank is the block rank of the loop latch plus an
 227    extra bias for the loop-carried dependence.  This causes expressions
 228    calculated into an accumulator variable to be independent for each
 229    iteration of the loop.  If STMT is some other phi, the rank is the
 230    block rank of its containing block.  */
 231 static long
 232 phi_rank (gimple stmt)
 233 {
 234   basic_block bb = gimple_bb (stmt);
 235   struct loop *father = bb->loop_father;
 236   tree res;
 237   unsigned i;
 238   use_operand_p use;
 239   gimple use_stmt;
 240
 241   /* We only care about real loops (those with a latch).  */
 242   if (!father->latch)
 243     return bb_rank[bb->index];
 244
 245   /* Interesting phis must be in headers of innermost loops.  */
 246   if (bb != father->header
 247       || father->inner)
 248     return bb_rank[bb->index];
 249
 250   /* Ignore virtual SSA_NAMEs.  */
 251   res = gimple_phi_result (stmt);
 252   if (virtual_operand_p (res))
 253     return bb_rank[bb->index];
 254
 255   /* The phi definition must have a single use, and that use must be
 256      within the loop.  Otherwise this isn't an accumulator pattern.  */
 257   if (!single_imm_use (res, &use, &use_stmt)
 258       || gimple_bb (use_stmt)->loop_father != father)
 259     return bb_rank[bb->index];
 260
 261   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 262   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 263     {
 264       tree arg = gimple_phi_arg_def (stmt, i);
 265       if (TREE_CODE (arg) == SSA_NAME
 266           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 267         {
 268           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 269           if (gimple_bb (def_stmt)->loop_father == father)
 270             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 271         }
 272     }
 273
 274   /* Must be an uninteresting phi.  */
 275   return bb_rank[bb->index];
 276 }
 277
 278 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 279    loop-carried dependence of an innermost loop, return TRUE; else
 280    return FALSE.  */
 281 static bool
 282 loop_carried_phi (tree exp)
 283 {
 284   gimple phi_stmt;
 285   long block_rank;
 286
 287   if (TREE_CODE (exp) != SSA_NAME
 288       || SSA_NAME_IS_DEFAULT_DEF (exp))
 289     return false;
 290
 291   phi_stmt = SSA_NAME_DEF_STMT (exp);
 292
 293   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 294     return false;
 295
 296   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 297      an additional bias added in.  If this phi doesn't have block rank,
 298      it's biased and should not be propagated.  */
 299   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 300
 301   if (phi_rank (phi_stmt) != block_rank)
 302     return true;
 303
 304   return false;
 305 }
 306
 307 /* Return the maximum of RANK and the rank that should be propagated
 308    from expression OP.  For most operands, this is just the rank of OP.
 309    For loop-carried phis, the value is zero to avoid undoing the bias
 310    in favor of the phi.  */
 311 static long
 312 propagate_rank (long rank, tree op)
 313 {
 314   long op_rank;
 315
 316   if (loop_carried_phi (op))
 317     return rank;
 318
 319   op_rank = get_rank (op);
 320
 321   return MAX (rank, op_rank);
 322 }
 323
 324 /* Look up the operand rank structure for expression E.  */
 325
 326 static inline long
 327 find_operand_rank (tree e)
 328 {
 329   void **slot = pointer_map_contains (operand_rank, e);
 330   return slot ? (long) (intptr_t) *slot : -1;
 331 }
 332
 333 /* Insert {E,RANK} into the operand rank hashtable.  */
 334
 335 static inline void
 336 insert_operand_rank (tree e, long rank)
 337 {
 338   void **slot;
 339   gcc_assert (rank > 0);
 340   slot = pointer_map_insert (operand_rank, e);
 341   gcc_assert (!*slot);
 342   *slot = (void *) (intptr_t) rank;
 343 }
 344
 345 /* Given an expression E, return the rank of the expression.  */
 346
 347 static long
 348 get_rank (tree e)
 349 {
 350   /* Constants have rank 0.  */
 351   if (is_gimple_min_invariant (e))
 352     return 0;
 353
 354   /* SSA_NAME's have the rank of the expression they are the result
 355      of.
 356      For globals and uninitialized values, the rank is 0.
 357      For function arguments, use the pre-setup rank.
 358      For PHI nodes, stores, asm statements, etc, we use the rank of
 359      the BB.
 360      For simple operations, the rank is the maximum rank of any of
 361      its operands, or the bb_rank, whichever is less.
 362      I make no claims that this is optimal, however, it gives good
 363      results.  */
 364
 365   /* We make an exception to the normal ranking system to break
 366      dependences of accumulator variables in loops.  Suppose we
 367      have a simple one-block loop containing:
 368
 369        x_1 = phi(x_0, x_2)
 370        b = a + x_1
 371        c = b + d
 372        x_2 = c + e
 373
 374      As shown, each iteration of the calculation into x is fully
 375      dependent upon the iteration before it.  We would prefer to
 376      see this in the form:
 377
 378        x_1 = phi(x_0, x_2)
 379        b = a + d
 380        c = b + e
 381        x_2 = c + x_1
 382
 383      If the loop is unrolled, the calculations of b and c from
 384      different iterations can be interleaved.
 385
 386      To obtain this result during reassociation, we bias the rank
 387      of the phi definition x_1 upward, when it is recognized as an
 388      accumulator pattern.  The artificial rank causes it to be
 389      added last, providing the desired independence.  */
 390
 391   if (TREE_CODE (e) == SSA_NAME)
 392     {
 393       gimple stmt;
 394       long rank;
 395       int i, n;
 396       tree op;
 397
 398       if (SSA_NAME_IS_DEFAULT_DEF (e))
 399         return find_operand_rank (e);
 400
 401       stmt = SSA_NAME_DEF_STMT (e);
 402       if (gimple_code (stmt) == GIMPLE_PHI)
 403         return phi_rank (stmt);
 404
 405       if (!is_gimple_assign (stmt)
 406           || gimple_vdef (stmt))
 407         return bb_rank[gimple_bb (stmt)->index];
 408
 409       /* If we already have a rank for this expression, use that.  */
 410       rank = find_operand_rank (e);
 411       if (rank != -1)
 412         return rank;
 413
 414       /* Otherwise, find the maximum rank for the operands.  As an
 415          exception, remove the bias from loop-carried phis when propagating
 416          the rank so that dependent operations are not also biased.  */
 417       rank = 0;
 418       if (gimple_assign_single_p (stmt))
 419         {
 420           tree rhs = gimple_assign_rhs1 (stmt);
 421           n = TREE_OPERAND_LENGTH (rhs);
 422           if (n == 0)
 423             rank = propagate_rank (rank, rhs);
 424           else
 425             {
 426               for (i = 0; i < n; i++)
 427                 {
 428                   op = TREE_OPERAND (rhs, i);
 429
 430                   if (op != NULL_TREE)
 431                     rank = propagate_rank (rank, op);
 432                 }
 433             }
 434         }
 435       else
 436         {
 437           n = gimple_num_ops (stmt);
 438           for (i = 1; i < n; i++)
 439             {
 440               op = gimple_op (stmt, i);
 441               gcc_assert (op);
 442               rank = propagate_rank (rank, op);
 443             }
 444         }
 445
 446       if (dump_file && (dump_flags & TDF_DETAILS))
 447         {
 448           fprintf (dump_file, "Rank for ");
 449           print_generic_expr (dump_file, e, 0);
 450           fprintf (dump_file, " is %ld\n", (rank + 1));
 451         }
 452
 453       /* Note the rank in the hashtable so we don't recompute it.  */
 454       insert_operand_rank (e, (rank + 1));
 455       return (rank + 1);
 456     }
 457
 458   /* Globals, etc,  are rank 0 */
 459   return 0;
 460 }
 461
 462
 463 /* We want integer ones to end up last no matter what, since they are
 464    the ones we can do the most with.  */
 465 #define INTEGER_CONST_TYPE 1 << 3
 466 #define FLOAT_CONST_TYPE 1 << 2
 467 #define OTHER_CONST_TYPE 1 << 1
 468
 469 /* Classify an invariant tree into integer, float, or other, so that
 470    we can sort them to be near other constants of the same type.  */
 471 static inline int
 472 constant_type (tree t)
 473 {
 474   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 475     return INTEGER_CONST_TYPE;
 476   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 477     return FLOAT_CONST_TYPE;
 478   else
 479     return OTHER_CONST_TYPE;
 480 }
 481
 482 /* qsort comparison function to sort operand entries PA and PB by rank
 483    so that the sorted array is ordered by rank in decreasing order.  */
 484 static int
 485 sort_by_operand_rank (const void *pa, const void *pb)
 486 {
 487   const operand_entry_t oea = *(const operand_entry_t *)pa;
 488   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 489
 490   /* It's nicer for optimize_expression if constants that are likely
 491      to fold when added/multiplied//whatever are put next to each
 492      other.  Since all constants have rank 0, order them by type.  */
 493   if (oeb->rank == 0 && oea->rank == 0)
 494     {
 495       if (constant_type (oeb->op) != constant_type (oea->op))
 496         return constant_type (oeb->op) - constant_type (oea->op);
 497       else
 498         /* To make sorting result stable, we use unique IDs to determine
 499            order.  */
 500         return oeb->id - oea->id;
 501     }
 502
 503   /* Lastly, make sure the versions that are the same go next to each
 504      other.  We use SSA_NAME_VERSION because it's stable.  */
 505   if ((oeb->rank - oea->rank == 0)
 506       && TREE_CODE (oea->op) == SSA_NAME
 507       && TREE_CODE (oeb->op) == SSA_NAME)
 508     {
 509       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 510         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 511       else
 512         return oeb->id - oea->id;
 513     }
 514
 515   if (oeb->rank != oea->rank)
 516     return oeb->rank - oea->rank;
 517   else
 518     return oeb->id - oea->id;
 519 }
 520
 521 /* Add an operand entry to *OPS for the tree operand OP.  */
 522
 523 static void
 524 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 525 {
 526   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 527
 528   oe->op = op;
 529   oe->rank = get_rank (op);
 530   oe->id = next_operand_entry_id++;
 531   oe->count = 1;
 532   ops->safe_push (oe);
 533 }
 534
 535 /* Add an operand entry to *OPS for the tree operand OP with repeat
 536    count REPEAT.  */
 537
 538 static void
 539 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 540                        HOST_WIDE_INT repeat)
 541 {
 542   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 543
 544   oe->op = op;
 545   oe->rank = get_rank (op);
 546   oe->id = next_operand_entry_id++;
 547   oe->count = repeat;
 548   ops->safe_push (oe);
 549
 550   reassociate_stats.pows_encountered++;
 551 }
 552
 553 /* Return true if STMT is reassociable operation containing a binary
 554    operation with tree code CODE, and is inside LOOP.  */
 555
 556 static bool
 557 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 558 {
 559   basic_block bb = gimple_bb (stmt);
 560
 561   if (gimple_bb (stmt) == NULL)
 562     return false;
 563
 564   if (!flow_bb_inside_loop_p (loop, bb))
 565     return false;
 566
 567   if (is_gimple_assign (stmt)
 568       && gimple_assign_rhs_code (stmt) == code
 569       && has_single_use (gimple_assign_lhs (stmt)))
 570     return true;
 571
 572   return false;
 573 }
 574
 575
 576 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 577    operand of the negate operation.  Otherwise, return NULL.  */
 578
 579 static tree
 580 get_unary_op (tree name, enum tree_code opcode)
 581 {
 582   gimple stmt = SSA_NAME_DEF_STMT (name);
 583
 584   if (!is_gimple_assign (stmt))
 585     return NULL_TREE;
 586
 587   if (gimple_assign_rhs_code (stmt) == opcode)
 588     return gimple_assign_rhs1 (stmt);
 589   return NULL_TREE;
 590 }
 591
 592 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 593    eliminate through equivalences, do so, remove them from OPS, and
 594    return true.  Otherwise, return false.  */
 595
 596 static bool
 597 eliminate_duplicate_pair (enum tree_code opcode,
 598                           vec<operand_entry_t> *ops,
 599                           bool *all_done,
 600                           unsigned int i,
 601                           operand_entry_t curr,
 602                           operand_entry_t last)
 603 {
 604
 605   /* If we have two of the same op, and the opcode is & |, min, or max,
 606      we can eliminate one of them.
 607      If we have two of the same op, and the opcode is ^, we can
 608      eliminate both of them.  */
 609
 610   if (last && last->op == curr->op)
 611     {
 612       switch (opcode)
 613         {
 614         case MAX_EXPR:
 615         case MIN_EXPR:
 616         case BIT_IOR_EXPR:
 617         case BIT_AND_EXPR:
 618           if (dump_file && (dump_flags & TDF_DETAILS))
 619             {
 620               fprintf (dump_file, "Equivalence: ");
 621               print_generic_expr (dump_file, curr->op, 0);
 622               fprintf (dump_file, " [&|minmax] ");
 623               print_generic_expr (dump_file, last->op, 0);
 624               fprintf (dump_file, " -> ");
 625               print_generic_stmt (dump_file, last->op, 0);
 626             }
 627
 628           ops->ordered_remove (i);
 629           reassociate_stats.ops_eliminated ++;
 630
 631           return true;
 632
 633         case BIT_XOR_EXPR:
 634           if (dump_file && (dump_flags & TDF_DETAILS))
 635             {
 636               fprintf (dump_file, "Equivalence: ");
 637               print_generic_expr (dump_file, curr->op, 0);
 638               fprintf (dump_file, " ^ ");
 639               print_generic_expr (dump_file, last->op, 0);
 640               fprintf (dump_file, " -> nothing\n");
 641             }
 642
 643           reassociate_stats.ops_eliminated += 2;
 644
 645           if (ops->length () == 2)
 646             {
 647               ops->create (0);
 648               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 649               *all_done = true;
 650             }
 651           else
 652             {
 653               ops->ordered_remove (i-1);
 654               ops->ordered_remove (i-1);
 655             }
 656
 657           return true;
 658
 659         default:
 660           break;
 661         }
 662     }
 663   return false;
 664 }
 665
 666 static vec<tree> plus_negates;
 667
 668 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 669    expression, look in OPS for a corresponding positive operation to cancel
 670    it out.  If we find one, remove the other from OPS, replace
 671    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 672    return false. */
 673
 674 static bool
 675 eliminate_plus_minus_pair (enum tree_code opcode,
 676                            vec<operand_entry_t> *ops,
 677                            unsigned int currindex,
 678                            operand_entry_t curr)
 679 {
 680   tree negateop;
 681   tree notop;
 682   unsigned int i;
 683   operand_entry_t oe;
 684
 685   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 686     return false;
 687
 688   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 689   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 690   if (negateop == NULL_TREE && notop == NULL_TREE)
 691     return false;
 692
 693   /* Any non-negated version will have a rank that is one less than
 694      the current rank.  So once we hit those ranks, if we don't find
 695      one, we can stop.  */
 696
 697   for (i = currindex + 1;
 698        ops->iterate (i, &oe)
 699        && oe->rank >= curr->rank - 1 ;
 700        i++)
 701     {
 702       if (oe->op == negateop)
 703         {
 704
 705           if (dump_file && (dump_flags & TDF_DETAILS))
 706             {
 707               fprintf (dump_file, "Equivalence: ");
 708               print_generic_expr (dump_file, negateop, 0);
 709               fprintf (dump_file, " + -");
 710               print_generic_expr (dump_file, oe->op, 0);
 711               fprintf (dump_file, " -> 0\n");
 712             }
 713
 714           ops->ordered_remove (i);
 715           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 716           ops->ordered_remove (currindex);
 717           reassociate_stats.ops_eliminated ++;
 718
 719           return true;
 720         }
 721       else if (oe->op == notop)
 722         {
 723           tree op_type = TREE_TYPE (oe->op);
 724
 725           if (dump_file && (dump_flags & TDF_DETAILS))
 726             {
 727               fprintf (dump_file, "Equivalence: ");
 728               print_generic_expr (dump_file, notop, 0);
 729               fprintf (dump_file, " + ~");
 730               print_generic_expr (dump_file, oe->op, 0);
 731               fprintf (dump_file, " -> -1\n");
 732             }
 733
 734           ops->ordered_remove (i);
 735           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 736           ops->ordered_remove (currindex);
 737           reassociate_stats.ops_eliminated ++;
 738
 739           return true;
 740         }
 741     }
 742
 743   /* CURR->OP is a negate expr in a plus expr: save it for later
 744      inspection in repropagate_negates().  */
 745   if (negateop != NULL_TREE)
 746     plus_negates.safe_push (curr->op);
 747
 748   return false;
 749 }
 750
 751 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 752    bitwise not expression, look in OPS for a corresponding operand to
 753    cancel it out.  If we find one, remove the other from OPS, replace
 754    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 755    false. */
 756
 757 static bool
 758 eliminate_not_pairs (enum tree_code opcode,
 759                      vec<operand_entry_t> *ops,
 760                      unsigned int currindex,
 761                      operand_entry_t curr)
 762 {
 763   tree notop;
 764   unsigned int i;
 765   operand_entry_t oe;
 766
 767   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 768       || TREE_CODE (curr->op) != SSA_NAME)
 769     return false;
 770
 771   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 772   if (notop == NULL_TREE)
 773     return false;
 774
 775   /* Any non-not version will have a rank that is one less than
 776      the current rank.  So once we hit those ranks, if we don't find
 777      one, we can stop.  */
 778
 779   for (i = currindex + 1;
 780        ops->iterate (i, &oe)
 781        && oe->rank >= curr->rank - 1;
 782        i++)
 783     {
 784       if (oe->op == notop)
 785         {
 786           if (dump_file && (dump_flags & TDF_DETAILS))
 787             {
 788               fprintf (dump_file, "Equivalence: ");
 789               print_generic_expr (dump_file, notop, 0);
 790               if (opcode == BIT_AND_EXPR)
 791                 fprintf (dump_file, " & ~");
 792               else if (opcode == BIT_IOR_EXPR)
 793                 fprintf (dump_file, " | ~");
 794               print_generic_expr (dump_file, oe->op, 0);
 795               if (opcode == BIT_AND_EXPR)
 796                 fprintf (dump_file, " -> 0\n");
 797               else if (opcode == BIT_IOR_EXPR)
 798                 fprintf (dump_file, " -> -1\n");
 799             }
 800
 801           if (opcode == BIT_AND_EXPR)
 802             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 803           else if (opcode == BIT_IOR_EXPR)
 804             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 805                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 806
 807           reassociate_stats.ops_eliminated += ops->length () - 1;
 808           ops->truncate (0);
 809           ops->quick_push (oe);
 810           return true;
 811         }
 812     }
 813
 814   return false;
 815 }
 816
 817 /* Use constant value that may be present in OPS to try to eliminate
 818    operands.  Note that this function is only really used when we've
 819    eliminated ops for other reasons, or merged constants.  Across
 820    single statements, fold already does all of this, plus more.  There
 821    is little point in duplicating logic, so I've only included the
 822    identities that I could ever construct testcases to trigger.  */
 823
 824 static void
 825 eliminate_using_constants (enum tree_code opcode,
 826                            vec<operand_entry_t> *ops)
 827 {
 828   operand_entry_t oelast = ops->last ();
 829   tree type = TREE_TYPE (oelast->op);
 830
 831   if (oelast->rank == 0
 832       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 833     {
 834       switch (opcode)
 835         {
 836         case BIT_AND_EXPR:
 837           if (integer_zerop (oelast->op))
 838             {
 839               if (ops->length () != 1)
 840                 {
 841                   if (dump_file && (dump_flags & TDF_DETAILS))
 842                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 843
 844                   reassociate_stats.ops_eliminated += ops->length () - 1;
 845
 846                   ops->truncate (0);
 847                   ops->quick_push (oelast);
 848                   return;
 849                 }
 850             }
 851           else if (integer_all_onesp (oelast->op))
 852             {
 853               if (ops->length () != 1)
 854                 {
 855                   if (dump_file && (dump_flags & TDF_DETAILS))
 856                     fprintf (dump_file, "Found & -1, removing\n");
 857                   ops->pop ();
 858                   reassociate_stats.ops_eliminated++;
 859                 }
 860             }
 861           break;
 862         case BIT_IOR_EXPR:
 863           if (integer_all_onesp (oelast->op))
 864             {
 865               if (ops->length () != 1)
 866                 {
 867                   if (dump_file && (dump_flags & TDF_DETAILS))
 868                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 869
 870                   reassociate_stats.ops_eliminated += ops->length () - 1;
 871
 872                   ops->truncate (0);
 873                   ops->quick_push (oelast);
 874                   return;
 875                 }
 876             }
 877           else if (integer_zerop (oelast->op))
 878             {
 879               if (ops->length () != 1)
 880                 {
 881                   if (dump_file && (dump_flags & TDF_DETAILS))
 882                     fprintf (dump_file, "Found | 0, removing\n");
 883                   ops->pop ();
 884                   reassociate_stats.ops_eliminated++;
 885                 }
 886             }
 887           break;
 888         case MULT_EXPR:
 889           if (integer_zerop (oelast->op)
 890               || (FLOAT_TYPE_P (type)
 891                   && !HONOR_NANS (TYPE_MODE (type))
 892                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 893                   && real_zerop (oelast->op)))
 894             {
 895               if (ops->length () != 1)
 896                 {
 897                   if (dump_file && (dump_flags & TDF_DETAILS))
 898                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 899
 900                   reassociate_stats.ops_eliminated += ops->length () - 1;
 901                   ops->truncate (1);
 902                   ops->quick_push (oelast);
 903                   return;
 904                 }
 905             }
 906           else if (integer_onep (oelast->op)
 907                    || (FLOAT_TYPE_P (type)
 908                        && !HONOR_SNANS (TYPE_MODE (type))
 909                        && real_onep (oelast->op)))
 910             {
 911               if (ops->length () != 1)
 912                 {
 913                   if (dump_file && (dump_flags & TDF_DETAILS))
 914                     fprintf (dump_file, "Found * 1, removing\n");
 915                   ops->pop ();
 916                   reassociate_stats.ops_eliminated++;
 917                   return;
 918                 }
 919             }
 920           break;
 921         case BIT_XOR_EXPR:
 922         case PLUS_EXPR:
 923         case MINUS_EXPR:
 924           if (integer_zerop (oelast->op)
 925               || (FLOAT_TYPE_P (type)
 926                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 927                   && fold_real_zero_addition_p (type, oelast->op,
 928                                                 opcode == MINUS_EXPR)))
 929             {
 930               if (ops->length () != 1)
 931                 {
 932                   if (dump_file && (dump_flags & TDF_DETAILS))
 933                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 934                   ops->pop ();
 935                   reassociate_stats.ops_eliminated++;
 936                   return;
 937                 }
 938             }
 939           break;
 940         default:
 941           break;
 942         }
 943     }
 944 }
 945
 946
 947 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 948                                  bool, bool);
 949
 950 /* Structure for tracking and counting operands.  */
 951 typedef struct oecount_s {
 952   int cnt;
 953   int id;
 954   enum tree_code oecode;
 955   tree op;
 956 } oecount;
 957
 958
 959 /* The heap for the oecount hashtable and the sorted list of operands.  */
 960 static vec<oecount> cvec;
 961
 962
 963 /* Oecount hashtable helpers.  */
 964
 965 struct oecount_hasher : typed_noop_remove <void>
 966 {
 967   /* Note that this hash table stores integers, not pointers.
 968      So, observe the casting in the member functions.  */
 969   typedef void value_type;
 970   typedef void compare_type;
 971   static inline hashval_t hash (const value_type *);
 972   static inline bool equal (const value_type *, const compare_type *);
 973 };
 974
 975 /* Hash function for oecount.  */
 976
 977 inline hashval_t
 978 oecount_hasher::hash (const value_type *p)
 979 {
 980   const oecount *c = &cvec[(size_t)p - 42];
 981   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 982 }
 983
 984 /* Comparison function for oecount.  */
 985
 986 inline bool
 987 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 988 {
 989   const oecount *c1 = &cvec[(size_t)p1 - 42];
 990   const oecount *c2 = &cvec[(size_t)p2 - 42];
 991   return (c1->oecode == c2->oecode
 992           && c1->op == c2->op);
 993 }
 994
 995 /* Comparison function for qsort sorting oecount elements by count.  */
 996
 997 static int
 998 oecount_cmp (const void *p1, const void *p2)
 999 {
1000   const oecount *c1 = (const oecount *)p1;
1001   const oecount *c2 = (const oecount *)p2;
1002   if (c1->cnt != c2->cnt)
1003     return c1->cnt - c2->cnt;
1004   else
1005     /* If counts are identical, use unique IDs to stabilize qsort.  */
1006     return c1->id - c2->id;
1007 }
1008
1009 /* Return TRUE iff STMT represents a builtin call that raises OP
1010    to some exponent.  */
1011
1012 static bool
1013 stmt_is_power_of_op (gimple stmt, tree op)
1014 {
1015   tree fndecl;
1016
1017   if (!is_gimple_call (stmt))
1018     return false;
1019
1020   fndecl = gimple_call_fndecl (stmt);
1021
1022   if (!fndecl
1023       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1024     return false;
1025
1026   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1027     {
1028     CASE_FLT_FN (BUILT_IN_POW):
1029     CASE_FLT_FN (BUILT_IN_POWI):
1030       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1031
1032     default:
1033       return false;
1034     }
1035 }
1036
1037 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1038    in place and return the result.  Assumes that stmt_is_power_of_op
1039    was previously called for STMT and returned TRUE.  */
1040
1041 static HOST_WIDE_INT
1042 decrement_power (gimple stmt)
1043 {
1044   REAL_VALUE_TYPE c, cint;
1045   HOST_WIDE_INT power;
1046   tree arg1;
1047
1048   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1049     {
1050     CASE_FLT_FN (BUILT_IN_POW):
1051       arg1 = gimple_call_arg (stmt, 1);
1052       c = TREE_REAL_CST (arg1);
1053       power = real_to_integer (&c) - 1;
1054       real_from_integer (&cint, VOIDmode, power, 0, 0);
1055       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1056       return power;
1057
1058     CASE_FLT_FN (BUILT_IN_POWI):
1059       arg1 = gimple_call_arg (stmt, 1);
1060       power = TREE_INT_CST_LOW (arg1) - 1;
1061       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1062       return power;
1063
1064     default:
1065       gcc_unreachable ();
1066     }
1067 }
1068
1069 /* Find the single immediate use of STMT's LHS, and replace it
1070    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1071    replace *DEF with OP as well.  */
1072
1073 static void
1074 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1075 {
1076   tree lhs;
1077   gimple use_stmt;
1078   use_operand_p use;
1079   gimple_stmt_iterator gsi;
1080
1081   if (is_gimple_call (stmt))
1082     lhs = gimple_call_lhs (stmt);
1083   else
1084     lhs = gimple_assign_lhs (stmt);
1085
1086   gcc_assert (has_single_use (lhs));
1087   single_imm_use (lhs, &use, &use_stmt);
1088   if (lhs == *def)
1089     *def = op;
1090   SET_USE (use, op);
1091   if (TREE_CODE (op) != SSA_NAME)
1092     update_stmt (use_stmt);
1093   gsi = gsi_for_stmt (stmt);
1094   unlink_stmt_vdef (stmt);
1095   gsi_remove (&gsi, true);
1096   release_defs (stmt);
1097 }
1098
1099 /* Walks the linear chain with result *DEF searching for an operation
1100    with operand OP and code OPCODE removing that from the chain.  *DEF
1101    is updated if there is only one operand but no operation left.  */
1102
1103 static void
1104 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1105 {
1106   gimple stmt = SSA_NAME_DEF_STMT (*def);
1107
1108   do
1109     {
1110       tree name;
1111
1112       if (opcode == MULT_EXPR
1113           && stmt_is_power_of_op (stmt, op))
1114         {
1115           if (decrement_power (stmt) == 1)
1116             propagate_op_to_single_use (op, stmt, def);
1117           return;
1118         }
1119
1120       name = gimple_assign_rhs1 (stmt);
1121
1122       /* If this is the operation we look for and one of the operands
1123          is ours simply propagate the other operand into the stmts
1124          single use.  */
1125       if (gimple_assign_rhs_code (stmt) == opcode
1126           && (name == op
1127               || gimple_assign_rhs2 (stmt) == op))
1128         {
1129           if (name == op)
1130             name = gimple_assign_rhs2 (stmt);
1131           propagate_op_to_single_use (name, stmt, def);
1132           return;
1133         }
1134
1135       /* We might have a multiply of two __builtin_pow* calls, and
1136          the operand might be hiding in the rightmost one.  */
1137       if (opcode == MULT_EXPR
1138           && gimple_assign_rhs_code (stmt) == opcode
1139           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1140           && has_single_use (gimple_assign_rhs2 (stmt)))
1141         {
1142           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1143           if (stmt_is_power_of_op (stmt2, op))
1144             {
1145               if (decrement_power (stmt2) == 1)
1146                 propagate_op_to_single_use (op, stmt2, def);
1147               return;
1148             }
1149         }
1150
1151       /* Continue walking the chain.  */
1152       gcc_assert (name != op
1153                   && TREE_CODE (name) == SSA_NAME);
1154       stmt = SSA_NAME_DEF_STMT (name);
1155     }
1156   while (1);
1157 }
1158
1159 /* Returns true if statement S1 dominates statement S2.  Like
1160    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1161
1162 static bool
1163 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1164 {
1165   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1166
1167   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1168      SSA_NAME.  Assume it lives at the beginning of function and
1169      thus dominates everything.  */
1170   if (!bb1 || s1 == s2)
1171     return true;
1172
1173   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1174   if (!bb2)
1175     return false;
1176
1177   if (bb1 == bb2)
1178     {
1179       /* PHIs in the same basic block are assumed to be
1180          executed all in parallel, if only one stmt is a PHI,
1181          it dominates the other stmt in the same basic block.  */
1182       if (gimple_code (s1) == GIMPLE_PHI)
1183         return true;
1184
1185       if (gimple_code (s2) == GIMPLE_PHI)
1186         return false;
1187
1188       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1189
1190       if (gimple_uid (s1) < gimple_uid (s2))
1191         return true;
1192
1193       if (gimple_uid (s1) > gimple_uid (s2))
1194         return false;
1195
1196       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1197       unsigned int uid = gimple_uid (s1);
1198       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1199         {
1200           gimple s = gsi_stmt (gsi);
1201           if (gimple_uid (s) != uid)
1202             break;
1203           if (s == s2)
1204             return true;
1205         }
1206
1207       return false;
1208     }
1209
1210   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1211 }
1212
1213 /* Insert STMT after INSERT_POINT.  */
1214
1215 static void
1216 insert_stmt_after (gimple stmt, gimple insert_point)
1217 {
1218   gimple_stmt_iterator gsi;
1219   basic_block bb;
1220
1221   if (gimple_code (insert_point) == GIMPLE_PHI)
1222     bb = gimple_bb (insert_point);
1223   else if (!stmt_ends_bb_p (insert_point))
1224     {
1225       gsi = gsi_for_stmt (insert_point);
1226       gimple_set_uid (stmt, gimple_uid (insert_point));
1227       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1228       return;
1229     }
1230   else
1231     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1232        thus if it must end a basic block, it should be a call that can
1233        throw, or some assignment that can throw.  If it throws, the LHS
1234        of it will not be initialized though, so only valid places using
1235        the SSA_NAME should be dominated by the fallthru edge.  */
1236     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1237   gsi = gsi_after_labels (bb);
1238   if (gsi_end_p (gsi))
1239     {
1240       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1241       gimple_set_uid (stmt,
1242                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1243     }
1244   else
1245     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1246   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1247 }
1248
1249 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1250    the result.  Places the statement after the definition of either
1251    OP1 or OP2.  Returns the new statement.  */
1252
1253 static gimple
1254 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1255 {
1256   gimple op1def = NULL, op2def = NULL;
1257   gimple_stmt_iterator gsi;
1258   tree op;
1259   gimple sum;
1260
1261   /* Create the addition statement.  */
1262   op = make_ssa_name (type, NULL);
1263   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1264
1265   /* Find an insertion place and insert.  */
1266   if (TREE_CODE (op1) == SSA_NAME)
1267     op1def = SSA_NAME_DEF_STMT (op1);
1268   if (TREE_CODE (op2) == SSA_NAME)
1269     op2def = SSA_NAME_DEF_STMT (op2);
1270   if ((!op1def || gimple_nop_p (op1def))
1271       && (!op2def || gimple_nop_p (op2def)))
1272     {
1273       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1274       if (gsi_end_p (gsi))
1275         {
1276           gimple_stmt_iterator gsi2
1277             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR));
1278           gimple_set_uid (sum,
1279                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1280         }
1281       else
1282         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1283       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1284     }
1285   else
1286     {
1287       gimple insert_point;
1288       if ((!op1def || gimple_nop_p (op1def))
1289            || (op2def && !gimple_nop_p (op2def)
1290                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1291         insert_point = op2def;
1292       else
1293         insert_point = op1def;
1294       insert_stmt_after (sum, insert_point);
1295     }
1296   update_stmt (sum);
1297
1298   return sum;
1299 }
1300
1301 /* Perform un-distribution of divisions and multiplications.
1302    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1303    to (A + B) / X for real X.
1304
1305    The algorithm is organized as follows.
1306
1307     - First we walk the addition chain *OPS looking for summands that
1308       are defined by a multiplication or a real division.  This results
1309       in the candidates bitmap with relevant indices into *OPS.
1310
1311     - Second we build the chains of multiplications or divisions for
1312       these candidates, counting the number of occurrences of (operand, code)
1313       pairs in all of the candidates chains.
1314
1315     - Third we sort the (operand, code) pairs by number of occurrence and
1316       process them starting with the pair with the most uses.
1317
1318       * For each such pair we walk the candidates again to build a
1319         second candidate bitmap noting all multiplication/division chains
1320         that have at least one occurrence of (operand, code).
1321
1322       * We build an alternate addition chain only covering these
1323         candidates with one (operand, code) operation removed from their
1324         multiplication/division chain.
1325
1326       * The first candidate gets replaced by the alternate addition chain
1327         multiplied/divided by the operand.
1328
1329       * All candidate chains get disabled for further processing and
1330         processing of (operand, code) pairs continues.
1331
1332   The alternate addition chains built are re-processed by the main
1333   reassociation algorithm which allows optimizing a * x * y + b * y * x
1334   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1335
1336 static bool
1337 undistribute_ops_list (enum tree_code opcode,
1338                        vec<operand_entry_t> *ops, struct loop *loop)
1339 {
1340   unsigned int length = ops->length ();
1341   operand_entry_t oe1;
1342   unsigned i, j;
1343   sbitmap candidates, candidates2;
1344   unsigned nr_candidates, nr_candidates2;
1345   sbitmap_iterator sbi0;
1346   vec<operand_entry_t> *subops;
1347   hash_table <oecount_hasher> ctable;
1348   bool changed = false;
1349   int next_oecount_id = 0;
1350
1351   if (length <= 1
1352       || opcode != PLUS_EXPR)
1353     return false;
1354
1355   /* Build a list of candidates to process.  */
1356   candidates = sbitmap_alloc (length);
1357   bitmap_clear (candidates);
1358   nr_candidates = 0;
1359   FOR_EACH_VEC_ELT (*ops, i, oe1)
1360     {
1361       enum tree_code dcode;
1362       gimple oe1def;
1363
1364       if (TREE_CODE (oe1->op) != SSA_NAME)
1365         continue;
1366       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1367       if (!is_gimple_assign (oe1def))
1368         continue;
1369       dcode = gimple_assign_rhs_code (oe1def);
1370       if ((dcode != MULT_EXPR
1371            && dcode != RDIV_EXPR)
1372           || !is_reassociable_op (oe1def, dcode, loop))
1373         continue;
1374
1375       bitmap_set_bit (candidates, i);
1376       nr_candidates++;
1377     }
1378
1379   if (nr_candidates < 2)
1380     {
1381       sbitmap_free (candidates);
1382       return false;
1383     }
1384
1385   if (dump_file && (dump_flags & TDF_DETAILS))
1386     {
1387       fprintf (dump_file, "searching for un-distribute opportunities ");
1388       print_generic_expr (dump_file,
1389         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1390       fprintf (dump_file, " %d\n", nr_candidates);
1391     }
1392
1393   /* Build linearized sub-operand lists and the counting table.  */
1394   cvec.create (0);
1395   ctable.create (15);
1396   /* ??? Macro arguments cannot have multi-argument template types in
1397      them.  This typedef is needed to workaround that limitation.  */
1398   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1399   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1400   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1401     {
1402       gimple oedef;
1403       enum tree_code oecode;
1404       unsigned j;
1405
1406       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1407       oecode = gimple_assign_rhs_code (oedef);
1408       linearize_expr_tree (&subops[i], oedef,
1409                            associative_tree_code (oecode), false);
1410
1411       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1412         {
1413           oecount c;
1414           void **slot;
1415           size_t idx;
1416           c.oecode = oecode;
1417           c.cnt = 1;
1418           c.id = next_oecount_id++;
1419           c.op = oe1->op;
1420           cvec.safe_push (c);
1421           idx = cvec.length () + 41;
1422           slot = ctable.find_slot ((void *)idx, INSERT);
1423           if (!*slot)
1424             {
1425               *slot = (void *)idx;
1426             }
1427           else
1428             {
1429               cvec.pop ();
1430               cvec[(size_t)*slot - 42].cnt++;
1431             }
1432         }
1433     }
1434   ctable.dispose ();
1435
1436   /* Sort the counting table.  */
1437   cvec.qsort (oecount_cmp);
1438
1439   if (dump_file && (dump_flags & TDF_DETAILS))
1440     {
1441       oecount *c;
1442       fprintf (dump_file, "Candidates:\n");
1443       FOR_EACH_VEC_ELT (cvec, j, c)
1444         {
1445           fprintf (dump_file, "  %u %s: ", c->cnt,
1446                    c->oecode == MULT_EXPR
1447                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1448           print_generic_expr (dump_file, c->op, 0);
1449           fprintf (dump_file, "\n");
1450         }
1451     }
1452
1453   /* Process the (operand, code) pairs in order of most occurrence.  */
1454   candidates2 = sbitmap_alloc (length);
1455   while (!cvec.is_empty ())
1456     {
1457       oecount *c = &cvec.last ();
1458       if (c->cnt < 2)
1459         break;
1460
1461       /* Now collect the operands in the outer chain that contain
1462          the common operand in their inner chain.  */
1463       bitmap_clear (candidates2);
1464       nr_candidates2 = 0;
1465       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1466         {
1467           gimple oedef;
1468           enum tree_code oecode;
1469           unsigned j;
1470           tree op = (*ops)[i]->op;
1471
1472           /* If we undistributed in this chain already this may be
1473              a constant.  */
1474           if (TREE_CODE (op) != SSA_NAME)
1475             continue;
1476
1477           oedef = SSA_NAME_DEF_STMT (op);
1478           oecode = gimple_assign_rhs_code (oedef);
1479           if (oecode != c->oecode)
1480             continue;
1481
1482           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1483             {
1484               if (oe1->op == c->op)
1485                 {
1486                   bitmap_set_bit (candidates2, i);
1487                   ++nr_candidates2;
1488                   break;
1489                 }
1490             }
1491         }
1492
1493       if (nr_candidates2 >= 2)
1494         {
1495           operand_entry_t oe1, oe2;
1496           gimple prod;
1497           int first = bitmap_first_set_bit (candidates2);
1498
1499           /* Build the new addition chain.  */
1500           oe1 = (*ops)[first];
1501           if (dump_file && (dump_flags & TDF_DETAILS))
1502             {
1503               fprintf (dump_file, "Building (");
1504               print_generic_expr (dump_file, oe1->op, 0);
1505             }
1506           zero_one_operation (&oe1->op, c->oecode, c->op);
1507           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1508             {
1509               gimple sum;
1510               oe2 = (*ops)[i];
1511               if (dump_file && (dump_flags & TDF_DETAILS))
1512                 {
1513                   fprintf (dump_file, " + ");
1514                   print_generic_expr (dump_file, oe2->op, 0);
1515                 }
1516               zero_one_operation (&oe2->op, c->oecode, c->op);
1517               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1518                                        oe1->op, oe2->op, opcode);
1519               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1520               oe2->rank = 0;
1521               oe1->op = gimple_get_lhs (sum);
1522             }
1523
1524           /* Apply the multiplication/division.  */
1525           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1526                                     oe1->op, c->op, c->oecode);
1527           if (dump_file && (dump_flags & TDF_DETAILS))
1528             {
1529               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1530               print_generic_expr (dump_file, c->op, 0);
1531               fprintf (dump_file, "\n");
1532             }
1533
1534           /* Record it in the addition chain and disable further
1535              undistribution with this op.  */
1536           oe1->op = gimple_assign_lhs (prod);
1537           oe1->rank = get_rank (oe1->op);
1538           subops[first].release ();
1539
1540           changed = true;
1541         }
1542
1543       cvec.pop ();
1544     }
1545
1546   for (i = 0; i < ops->length (); ++i)
1547     subops[i].release ();
1548   free (subops);
1549   cvec.release ();
1550   sbitmap_free (candidates);
1551   sbitmap_free (candidates2);
1552
1553   return changed;
1554 }
1555
1556 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1557    expression, examine the other OPS to see if any of them are comparisons
1558    of the same values, which we may be able to combine or eliminate.
1559    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1560
1561 static bool
1562 eliminate_redundant_comparison (enum tree_code opcode,
1563                                 vec<operand_entry_t> *ops,
1564                                 unsigned int currindex,
1565                                 operand_entry_t curr)
1566 {
1567   tree op1, op2;
1568   enum tree_code lcode, rcode;
1569   gimple def1, def2;
1570   int i;
1571   operand_entry_t oe;
1572
1573   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1574     return false;
1575
1576   /* Check that CURR is a comparison.  */
1577   if (TREE_CODE (curr->op) != SSA_NAME)
1578     return false;
1579   def1 = SSA_NAME_DEF_STMT (curr->op);
1580   if (!is_gimple_assign (def1))
1581     return false;
1582   lcode = gimple_assign_rhs_code (def1);
1583   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1584     return false;
1585   op1 = gimple_assign_rhs1 (def1);
1586   op2 = gimple_assign_rhs2 (def1);
1587
1588   /* Now look for a similar comparison in the remaining OPS.  */
1589   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1590     {
1591       tree t;
1592
1593       if (TREE_CODE (oe->op) != SSA_NAME)
1594         continue;
1595       def2 = SSA_NAME_DEF_STMT (oe->op);
1596       if (!is_gimple_assign (def2))
1597         continue;
1598       rcode = gimple_assign_rhs_code (def2);
1599       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1600         continue;
1601
1602       /* If we got here, we have a match.  See if we can combine the
1603          two comparisons.  */
1604       if (opcode == BIT_IOR_EXPR)
1605         t = maybe_fold_or_comparisons (lcode, op1, op2,
1606                                        rcode, gimple_assign_rhs1 (def2),
1607                                        gimple_assign_rhs2 (def2));
1608       else
1609         t = maybe_fold_and_comparisons (lcode, op1, op2,
1610                                         rcode, gimple_assign_rhs1 (def2),
1611                                         gimple_assign_rhs2 (def2));
1612       if (!t)
1613         continue;
1614
1615       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1616          always give us a boolean_type_node value back.  If the original
1617          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1618          we need to convert.  */
1619       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1620         t = fold_convert (TREE_TYPE (curr->op), t);
1621
1622       if (TREE_CODE (t) != INTEGER_CST
1623           && !operand_equal_p (t, curr->op, 0))
1624         {
1625           enum tree_code subcode;
1626           tree newop1, newop2;
1627           if (!COMPARISON_CLASS_P (t))
1628             continue;
1629           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1630           STRIP_USELESS_TYPE_CONVERSION (newop1);
1631           STRIP_USELESS_TYPE_CONVERSION (newop2);
1632           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1633             continue;
1634         }
1635
1636       if (dump_file && (dump_flags & TDF_DETAILS))
1637         {
1638           fprintf (dump_file, "Equivalence: ");
1639           print_generic_expr (dump_file, curr->op, 0);
1640           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1641           print_generic_expr (dump_file, oe->op, 0);
1642           fprintf (dump_file, " -> ");
1643           print_generic_expr (dump_file, t, 0);
1644           fprintf (dump_file, "\n");
1645         }
1646
1647       /* Now we can delete oe, as it has been subsumed by the new combined
1648          expression t.  */
1649       ops->ordered_remove (i);
1650       reassociate_stats.ops_eliminated ++;
1651
1652       /* If t is the same as curr->op, we're done.  Otherwise we must
1653          replace curr->op with t.  Special case is if we got a constant
1654          back, in which case we add it to the end instead of in place of
1655          the current entry.  */
1656       if (TREE_CODE (t) == INTEGER_CST)
1657         {
1658           ops->ordered_remove (currindex);
1659           add_to_ops_vec (ops, t);
1660         }
1661       else if (!operand_equal_p (t, curr->op, 0))
1662         {
1663           gimple sum;
1664           enum tree_code subcode;
1665           tree newop1;
1666           tree newop2;
1667           gcc_assert (COMPARISON_CLASS_P (t));
1668           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1669           STRIP_USELESS_TYPE_CONVERSION (newop1);
1670           STRIP_USELESS_TYPE_CONVERSION (newop2);
1671           gcc_checking_assert (is_gimple_val (newop1)
1672                                && is_gimple_val (newop2));
1673           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1674           curr->op = gimple_get_lhs (sum);
1675         }
1676       return true;
1677     }
1678
1679   return false;
1680 }
1681
1682 /* Perform various identities and other optimizations on the list of
1683    operand entries, stored in OPS.  The tree code for the binary
1684    operation between all the operands is OPCODE.  */
1685
1686 static void
1687 optimize_ops_list (enum tree_code opcode,
1688                    vec<operand_entry_t> *ops)
1689 {
1690   unsigned int length = ops->length ();
1691   unsigned int i;
1692   operand_entry_t oe;
1693   operand_entry_t oelast = NULL;
1694   bool iterate = false;
1695
1696   if (length == 1)
1697     return;
1698
1699   oelast = ops->last ();
1700
1701   /* If the last two are constants, pop the constants off, merge them
1702      and try the next two.  */
1703   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1704     {
1705       operand_entry_t oelm1 = (*ops)[length - 2];
1706
1707       if (oelm1->rank == 0
1708           && is_gimple_min_invariant (oelm1->op)
1709           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1710                                        TREE_TYPE (oelast->op)))
1711         {
1712           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1713                                      oelm1->op, oelast->op);
1714
1715           if (folded && is_gimple_min_invariant (folded))
1716             {
1717               if (dump_file && (dump_flags & TDF_DETAILS))
1718                 fprintf (dump_file, "Merging constants\n");
1719
1720               ops->pop ();
1721               ops->pop ();
1722
1723               add_to_ops_vec (ops, folded);
1724               reassociate_stats.constants_eliminated++;
1725
1726               optimize_ops_list (opcode, ops);
1727               return;
1728             }
1729         }
1730     }
1731
1732   eliminate_using_constants (opcode, ops);
1733   oelast = NULL;
1734
1735   for (i = 0; ops->iterate (i, &oe);)
1736     {
1737       bool done = false;
1738
1739       if (eliminate_not_pairs (opcode, ops, i, oe))
1740         return;
1741       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1742           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1743           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1744         {
1745           if (done)
1746             return;
1747           iterate = true;
1748           oelast = NULL;
1749           continue;
1750         }
1751       oelast = oe;
1752       i++;
1753     }
1754
1755   length = ops->length ();
1756   oelast = ops->last ();
1757
1758   if (iterate)
1759     optimize_ops_list (opcode, ops);
1760 }
1761
1762 /* The following functions are subroutines to optimize_range_tests and allow
1763    it to try to change a logical combination of comparisons into a range
1764    test.
1765
1766    For example, both
1767         X == 2 || X == 5 || X == 3 || X == 4
1768    and
1769         X >= 2 && X <= 5
1770    are converted to
1771         (unsigned) (X - 2) <= 3
1772
1773    For more information see comments above fold_test_range in fold-const.c,
1774    this implementation is for GIMPLE.  */
1775
1776 struct range_entry
1777 {
1778   tree exp;
1779   tree low;
1780   tree high;
1781   bool in_p;
1782   bool strict_overflow_p;
1783   unsigned int idx, next;
1784 };
1785
1786 /* This is similar to make_range in fold-const.c, but on top of
1787    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1788    an SSA_NAME and STMT argument is ignored, otherwise STMT
1789    argument should be a GIMPLE_COND.  */
1790
1791 static void
1792 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1793 {
1794   int in_p;
1795   tree low, high;
1796   bool is_bool, strict_overflow_p;
1797
1798   r->exp = NULL_TREE;
1799   r->in_p = false;
1800   r->strict_overflow_p = false;
1801   r->low = NULL_TREE;
1802   r->high = NULL_TREE;
1803   if (exp != NULL_TREE
1804       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1805     return;
1806
1807   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1808      and see if we can refine the range.  Some of the cases below may not
1809      happen, but it doesn't seem worth worrying about this.  We "continue"
1810      the outer loop when we've changed something; otherwise we "break"
1811      the switch, which will "break" the while.  */
1812   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1813   high = low;
1814   in_p = 0;
1815   strict_overflow_p = false;
1816   is_bool = false;
1817   if (exp == NULL_TREE)
1818     is_bool = true;
1819   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1820     {
1821       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1822         is_bool = true;
1823       else
1824         return;
1825     }
1826   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1827     is_bool = true;
1828
1829   while (1)
1830     {
1831       enum tree_code code;
1832       tree arg0, arg1, exp_type;
1833       tree nexp;
1834       location_t loc;
1835
1836       if (exp != NULL_TREE)
1837         {
1838           if (TREE_CODE (exp) != SSA_NAME)
1839             break;
1840
1841           stmt = SSA_NAME_DEF_STMT (exp);
1842           if (!is_gimple_assign (stmt))
1843             break;
1844
1845           code = gimple_assign_rhs_code (stmt);
1846           arg0 = gimple_assign_rhs1 (stmt);
1847           arg1 = gimple_assign_rhs2 (stmt);
1848           exp_type = TREE_TYPE (exp);
1849         }
1850       else
1851         {
1852           code = gimple_cond_code (stmt);
1853           arg0 = gimple_cond_lhs (stmt);
1854           arg1 = gimple_cond_rhs (stmt);
1855           exp_type = boolean_type_node;
1856         }
1857
1858       if (TREE_CODE (arg0) != SSA_NAME)
1859         break;
1860       loc = gimple_location (stmt);
1861       switch (code)
1862         {
1863         case BIT_NOT_EXPR:
1864           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1865               /* Ensure the range is either +[-,0], +[0,0],
1866                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1867                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1868                  or similar expression of unconditional true or
1869                  false, it should not be negated.  */
1870               && ((high && integer_zerop (high))
1871                   || (low && integer_onep (low))))
1872             {
1873               in_p = !in_p;
1874               exp = arg0;
1875               continue;
1876             }
1877           break;
1878         case SSA_NAME:
1879           exp = arg0;
1880           continue;
1881         CASE_CONVERT:
1882           if (is_bool)
1883             goto do_default;
1884           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1885             {
1886               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1887                 is_bool = true;
1888               else
1889                 return;
1890             }
1891           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1892             is_bool = true;
1893           goto do_default;
1894         case EQ_EXPR:
1895         case NE_EXPR:
1896         case LT_EXPR:
1897         case LE_EXPR:
1898         case GE_EXPR:
1899         case GT_EXPR:
1900           is_bool = true;
1901           /* FALLTHRU */
1902         default:
1903           if (!is_bool)
1904             return;
1905         do_default:
1906           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1907                                   &low, &high, &in_p,
1908                                   &strict_overflow_p);
1909           if (nexp != NULL_TREE)
1910             {
1911               exp = nexp;
1912               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1913               continue;
1914             }
1915           break;
1916         }
1917       break;
1918     }
1919   if (is_bool)
1920     {
1921       r->exp = exp;
1922       r->in_p = in_p;
1923       r->low = low;
1924       r->high = high;
1925       r->strict_overflow_p = strict_overflow_p;
1926     }
1927 }
1928
1929 /* Comparison function for qsort.  Sort entries
1930    without SSA_NAME exp first, then with SSA_NAMEs sorted
1931    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1932    by increasing ->low and if ->low is the same, by increasing
1933    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1934    maximum.  */
1935
1936 static int
1937 range_entry_cmp (const void *a, const void *b)
1938 {
1939   const struct range_entry *p = (const struct range_entry *) a;
1940   const struct range_entry *q = (const struct range_entry *) b;
1941
1942   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1943     {
1944       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1945         {
1946           /* Group range_entries for the same SSA_NAME together.  */
1947           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1948             return -1;
1949           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1950             return 1;
1951           /* If ->low is different, NULL low goes first, then by
1952              ascending low.  */
1953           if (p->low != NULL_TREE)
1954             {
1955               if (q->low != NULL_TREE)
1956                 {
1957                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1958                                           p->low, q->low);
1959                   if (tem && integer_onep (tem))
1960                     return -1;
1961                   tem = fold_binary (GT_EXPR, boolean_type_node,
1962                                      p->low, q->low);
1963                   if (tem && integer_onep (tem))
1964                     return 1;
1965                 }
1966               else
1967                 return 1;
1968             }
1969           else if (q->low != NULL_TREE)
1970             return -1;
1971           /* If ->high is different, NULL high goes last, before that by
1972              ascending high.  */
1973           if (p->high != NULL_TREE)
1974             {
1975               if (q->high != NULL_TREE)
1976                 {
1977                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1978                                           p->high, q->high);
1979                   if (tem && integer_onep (tem))
1980                     return -1;
1981                   tem = fold_binary (GT_EXPR, boolean_type_node,
1982                                      p->high, q->high);
1983                   if (tem && integer_onep (tem))
1984                     return 1;
1985                 }
1986               else
1987                 return -1;
1988             }
1989           else if (p->high != NULL_TREE)
1990             return 1;
1991           /* If both ranges are the same, sort below by ascending idx.  */
1992         }
1993       else
1994         return 1;
1995     }
1996   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1997     return -1;
1998
1999   if (p->idx < q->idx)
2000     return -1;
2001   else
2002     {
2003       gcc_checking_assert (p->idx > q->idx);
2004       return 1;
2005     }
2006 }
2007
2008 /* Helper routine of optimize_range_test.
2009    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2010    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2011    OPCODE and OPS are arguments of optimize_range_tests.  Return
2012    true if the range merge has been successful.
2013    If OPCODE is ERROR_MARK, this is called from within
2014    maybe_optimize_range_tests and is performing inter-bb range optimization.
2015    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2016    oe->rank.  */
2017
2018 static bool
2019 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2020                    unsigned int count, enum tree_code opcode,
2021                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2022                    tree low, tree high, bool strict_overflow_p)
2023 {
2024   operand_entry_t oe = (*ops)[range->idx];
2025   tree op = oe->op;
2026   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
2027   location_t loc = gimple_location (stmt);
2028   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2029   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2030   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2031   gimple_stmt_iterator gsi;
2032
2033   if (tem == NULL_TREE)
2034     return false;
2035
2036   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2037     warning_at (loc, OPT_Wstrict_overflow,
2038                 "assuming signed overflow does not occur "
2039                 "when simplifying range test");
2040
2041   if (dump_file && (dump_flags & TDF_DETAILS))
2042     {
2043       struct range_entry *r;
2044       fprintf (dump_file, "Optimizing range tests ");
2045       print_generic_expr (dump_file, range->exp, 0);
2046       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2047       print_generic_expr (dump_file, range->low, 0);
2048       fprintf (dump_file, ", ");
2049       print_generic_expr (dump_file, range->high, 0);
2050       fprintf (dump_file, "]");
2051       for (r = otherrange; r < otherrange + count; r++)
2052         {
2053           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2054           print_generic_expr (dump_file, r->low, 0);
2055           fprintf (dump_file, ", ");
2056           print_generic_expr (dump_file, r->high, 0);
2057           fprintf (dump_file, "]");
2058         }
2059       fprintf (dump_file, "\n into ");
2060       print_generic_expr (dump_file, tem, 0);
2061       fprintf (dump_file, "\n");
2062     }
2063
2064   if (opcode == BIT_IOR_EXPR
2065       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2066     tem = invert_truthvalue_loc (loc, tem);
2067
2068   tem = fold_convert_loc (loc, optype, tem);
2069   gsi = gsi_for_stmt (stmt);
2070   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2071                                   GSI_SAME_STMT);
2072   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2073     if (gimple_uid (gsi_stmt (gsi)))
2074       break;
2075     else
2076       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2077
2078   oe->op = tem;
2079   range->exp = exp;
2080   range->low = low;
2081   range->high = high;
2082   range->in_p = in_p;
2083   range->strict_overflow_p = false;
2084
2085   for (range = otherrange; range < otherrange + count; range++)
2086     {
2087       oe = (*ops)[range->idx];
2088       /* Now change all the other range test immediate uses, so that
2089          those tests will be optimized away.  */
2090       if (opcode == ERROR_MARK)
2091         {
2092           if (oe->op)
2093             oe->op = build_int_cst (TREE_TYPE (oe->op),
2094                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2095           else
2096             oe->op = (oe->rank == BIT_IOR_EXPR
2097                       ? boolean_false_node : boolean_true_node);
2098         }
2099       else
2100         oe->op = error_mark_node;
2101       range->exp = NULL_TREE;
2102     }
2103   return true;
2104 }
2105
2106 /* Optimize X == CST1 || X == CST2
2107    if popcount (CST1 ^ CST2) == 1 into
2108    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2109    Similarly for ranges.  E.g.
2110    X != 2 && X != 3 && X != 10 && X != 11
2111    will be transformed by the previous optimization into
2112    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2113    and this loop can transform that into
2114    !(((X & ~8) - 2U) <= 1U).  */
2115
2116 static bool
2117 optimize_range_tests_xor (enum tree_code opcode, tree type,
2118                           tree lowi, tree lowj, tree highi, tree highj,
2119                           vec<operand_entry_t> *ops,
2120                           struct range_entry *rangei,
2121                           struct range_entry *rangej)
2122 {
2123   tree lowxor, highxor, tem, exp;
2124   /* Check highi ^ lowi == highj ^ lowj and
2125      popcount (highi ^ lowi) == 1.  */
2126   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2127   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2128     return false;
2129   if (tree_log2 (lowxor) < 0)
2130     return false;
2131   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2132   if (!tree_int_cst_equal (lowxor, highxor))
2133     return false;
2134
2135   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2136   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2137   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2138   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2139   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2140                          rangei->in_p, lowj, highj,
2141                          rangei->strict_overflow_p
2142                          || rangej->strict_overflow_p))
2143     return true;
2144   return false;
2145 }
2146
2147 /* Optimize X == CST1 || X == CST2
2148    if popcount (CST2 - CST1) == 1 into
2149    ((X - CST1) & ~(CST2 - CST1)) == 0.
2150    Similarly for ranges.  E.g.
2151    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2152    || X == 75 || X == 45
2153    will be transformed by the previous optimization into
2154    (X - 43U) <= 3U || (X - 75U) <= 3U
2155    and this loop can transform that into
2156    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2157 static bool
2158 optimize_range_tests_diff (enum tree_code opcode, tree type,
2159                             tree lowi, tree lowj, tree highi, tree highj,
2160                             vec<operand_entry_t> *ops,
2161                             struct range_entry *rangei,
2162                             struct range_entry *rangej)
2163 {
2164   tree tem1, tem2, mask;
2165   /* Check highi - lowi == highj - lowj.  */
2166   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2167   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2168     return false;
2169   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2170   if (!tree_int_cst_equal (tem1, tem2))
2171     return false;
2172   /* Check popcount (lowj - lowi) == 1.  */
2173   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2174   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2175     return false;
2176   if (tree_log2 (tem1) < 0)
2177     return false;
2178
2179   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2180   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2181   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2182   lowj = build_int_cst (type, 0);
2183   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2184                          rangei->in_p, lowj, tem2,
2185                          rangei->strict_overflow_p
2186                          || rangej->strict_overflow_p))
2187     return true;
2188   return false;
2189 }
2190
2191 /* It does some common checks for function optimize_range_tests_xor and
2192    optimize_range_tests_diff.
2193    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2194    Else it calls optimize_range_tests_diff.  */
2195
2196 static bool
2197 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2198                         bool optimize_xor, vec<operand_entry_t> *ops,
2199                         struct range_entry *ranges)
2200 {
2201   int i, j;
2202   bool any_changes = false;
2203   for (i = first; i < length; i++)
2204     {
2205       tree lowi, highi, lowj, highj, type, tem;
2206
2207       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2208         continue;
2209       type = TREE_TYPE (ranges[i].exp);
2210       if (!INTEGRAL_TYPE_P (type))
2211         continue;
2212       lowi = ranges[i].low;
2213       if (lowi == NULL_TREE)
2214         lowi = TYPE_MIN_VALUE (type);
2215       highi = ranges[i].high;
2216       if (highi == NULL_TREE)
2217         continue;
2218       for (j = i + 1; j < length && j < i + 64; j++)
2219         {
2220           bool changes;
2221           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2222             continue;
2223           lowj = ranges[j].low;
2224           if (lowj == NULL_TREE)
2225             continue;
2226           highj = ranges[j].high;
2227           if (highj == NULL_TREE)
2228             highj = TYPE_MAX_VALUE (type);
2229           /* Check lowj > highi.  */
2230           tem = fold_binary (GT_EXPR, boolean_type_node,
2231                              lowj, highi);
2232           if (tem == NULL_TREE || !integer_onep (tem))
2233             continue;
2234           if (optimize_xor)
2235             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2236                                                 highi, highj, ops,
2237                                                 ranges + i, ranges + j);
2238           else
2239             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2240                                                  highi, highj, ops,
2241                                                  ranges + i, ranges + j);
2242           if (changes)
2243             {
2244               any_changes = true;
2245               break;
2246             }
2247         }
2248     }
2249   return any_changes;
2250 }
2251
2252 /* Optimize range tests, similarly how fold_range_test optimizes
2253    it on trees.  The tree code for the binary
2254    operation between all the operands is OPCODE.
2255    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2256    maybe_optimize_range_tests for inter-bb range optimization.
2257    In that case if oe->op is NULL, oe->id is bb->index whose
2258    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2259    the actual opcode.  */
2260
2261 static bool
2262 optimize_range_tests (enum tree_code opcode,
2263                       vec<operand_entry_t> *ops)
2264 {
2265   unsigned int length = ops->length (), i, j, first;
2266   operand_entry_t oe;
2267   struct range_entry *ranges;
2268   bool any_changes = false;
2269
2270   if (length == 1)
2271     return false;
2272
2273   ranges = XNEWVEC (struct range_entry, length);
2274   for (i = 0; i < length; i++)
2275     {
2276       oe = (*ops)[i];
2277       ranges[i].idx = i;
2278       init_range_entry (ranges + i, oe->op,
2279                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2280       /* For | invert it now, we will invert it again before emitting
2281          the optimized expression.  */
2282       if (opcode == BIT_IOR_EXPR
2283           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2284         ranges[i].in_p = !ranges[i].in_p;
2285     }
2286
2287   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2288   for (i = 0; i < length; i++)
2289     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2290       break;
2291
2292   /* Try to merge ranges.  */
2293   for (first = i; i < length; i++)
2294     {
2295       tree low = ranges[i].low;
2296       tree high = ranges[i].high;
2297       int in_p = ranges[i].in_p;
2298       bool strict_overflow_p = ranges[i].strict_overflow_p;
2299       int update_fail_count = 0;
2300
2301       for (j = i + 1; j < length; j++)
2302         {
2303           if (ranges[i].exp != ranges[j].exp)
2304             break;
2305           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2306                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2307             break;
2308           strict_overflow_p |= ranges[j].strict_overflow_p;
2309         }
2310
2311       if (j == i + 1)
2312         continue;
2313
2314       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2315                              ops, ranges[i].exp, in_p, low, high,
2316                              strict_overflow_p))
2317         {
2318           i = j - 1;
2319           any_changes = true;
2320         }
2321       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2322          while update_range_test would fail.  */
2323       else if (update_fail_count == 64)
2324         i = j - 1;
2325       else
2326         ++update_fail_count;
2327     }
2328
2329   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2330                                          ops, ranges);
2331
2332   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2333     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2334                                            ops, ranges);
2335
2336   if (any_changes && opcode != ERROR_MARK)
2337     {
2338       j = 0;
2339       FOR_EACH_VEC_ELT (*ops, i, oe)
2340         {
2341           if (oe->op == error_mark_node)
2342             continue;
2343           else if (i != j)
2344             (*ops)[j] = oe;
2345           j++;
2346         }
2347       ops->truncate (j);
2348     }
2349
2350   XDELETEVEC (ranges);
2351   return any_changes;
2352 }
2353
2354 /* Return true if STMT is a cast like:
2355    <bb N>:
2356    ...
2357    _123 = (int) _234;
2358
2359    <bb M>:
2360    # _345 = PHI <_123(N), 1(...), 1(...)>
2361    where _234 has bool type, _123 has single use and
2362    bb N has a single successor M.  This is commonly used in
2363    the last block of a range test.  */
2364
2365 static bool
2366 final_range_test_p (gimple stmt)
2367 {
2368   basic_block bb, rhs_bb;
2369   edge e;
2370   tree lhs, rhs;
2371   use_operand_p use_p;
2372   gimple use_stmt;
2373
2374   if (!gimple_assign_cast_p (stmt))
2375     return false;
2376   bb = gimple_bb (stmt);
2377   if (!single_succ_p (bb))
2378     return false;
2379   e = single_succ_edge (bb);
2380   if (e->flags & EDGE_COMPLEX)
2381     return false;
2382
2383   lhs = gimple_assign_lhs (stmt);
2384   rhs = gimple_assign_rhs1 (stmt);
2385   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2386       || TREE_CODE (rhs) != SSA_NAME
2387       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2388     return false;
2389
2390   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2391   if (!single_imm_use (lhs, &use_p, &use_stmt))
2392     return false;
2393
2394   if (gimple_code (use_stmt) != GIMPLE_PHI
2395       || gimple_bb (use_stmt) != e->dest)
2396     return false;
2397
2398   /* And that the rhs is defined in the same loop.  */
2399   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2400   if (rhs_bb == NULL
2401       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2402     return false;
2403
2404   return true;
2405 }
2406
2407 /* Return true if BB is suitable basic block for inter-bb range test
2408    optimization.  If BACKWARD is true, BB should be the only predecessor
2409    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2410    or compared with to find a common basic block to which all conditions
2411    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2412    be the only predecessor of BB.  */
2413
2414 static bool
2415 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2416                   bool backward)
2417 {
2418   edge_iterator ei, ei2;
2419   edge e, e2;
2420   gimple stmt;
2421   gimple_stmt_iterator gsi;
2422   bool other_edge_seen = false;
2423   bool is_cond;
2424
2425   if (test_bb == bb)
2426     return false;
2427   /* Check last stmt first.  */
2428   stmt = last_stmt (bb);
2429   if (stmt == NULL
2430       || (gimple_code (stmt) != GIMPLE_COND
2431           && (backward || !final_range_test_p (stmt)))
2432       || gimple_visited_p (stmt)
2433       || stmt_could_throw_p (stmt)
2434       || *other_bb == bb)
2435     return false;
2436   is_cond = gimple_code (stmt) == GIMPLE_COND;
2437   if (is_cond)
2438     {
2439       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2440          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2441          to *OTHER_BB (if not set yet, try to find it out).  */
2442       if (EDGE_COUNT (bb->succs) != 2)
2443         return false;
2444       FOR_EACH_EDGE (e, ei, bb->succs)
2445         {
2446           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2447             return false;
2448           if (e->dest == test_bb)
2449             {
2450               if (backward)
2451                 continue;
2452               else
2453                 return false;
2454             }
2455           if (e->dest == bb)
2456             return false;
2457           if (*other_bb == NULL)
2458             {
2459               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2460                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2461                   return false;
2462                 else if (e->dest == e2->dest)
2463                   *other_bb = e->dest;
2464               if (*other_bb == NULL)
2465                 return false;
2466             }
2467           if (e->dest == *other_bb)
2468             other_edge_seen = true;
2469           else if (backward)
2470             return false;
2471         }
2472       if (*other_bb == NULL || !other_edge_seen)
2473         return false;
2474     }
2475   else if (single_succ (bb) != *other_bb)
2476     return false;
2477
2478   /* Now check all PHIs of *OTHER_BB.  */
2479   e = find_edge (bb, *other_bb);
2480   e2 = find_edge (test_bb, *other_bb);
2481   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2482     {
2483       gimple phi = gsi_stmt (gsi);
2484       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2485          corresponding to BB and TEST_BB predecessor must be the same.  */
2486       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2487                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2488         {
2489           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2490              one of the PHIs should have the lhs of the last stmt in
2491              that block as PHI arg and that PHI should have 0 or 1
2492              corresponding to it in all other range test basic blocks
2493              considered.  */
2494           if (!is_cond)
2495             {
2496               if (gimple_phi_arg_def (phi, e->dest_idx)
2497                   == gimple_assign_lhs (stmt)
2498                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2499                       || integer_onep (gimple_phi_arg_def (phi,
2500                                                            e2->dest_idx))))
2501                 continue;
2502             }
2503           else
2504             {
2505               gimple test_last = last_stmt (test_bb);
2506               if (gimple_code (test_last) != GIMPLE_COND
2507                   && gimple_phi_arg_def (phi, e2->dest_idx)
2508                      == gimple_assign_lhs (test_last)
2509                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2510                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2511                 continue;
2512             }
2513
2514           return false;
2515         }
2516     }
2517   return true;
2518 }
2519
2520 /* Return true if BB doesn't have side-effects that would disallow
2521    range test optimization, all SSA_NAMEs set in the bb are consumed
2522    in the bb and there are no PHIs.  */
2523
2524 static bool
2525 no_side_effect_bb (basic_block bb)
2526 {
2527   gimple_stmt_iterator gsi;
2528   gimple last;
2529
2530   if (!gimple_seq_empty_p (phi_nodes (bb)))
2531     return false;
2532   last = last_stmt (bb);
2533   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2534     {
2535       gimple stmt = gsi_stmt (gsi);
2536       tree lhs;
2537       imm_use_iterator imm_iter;
2538       use_operand_p use_p;
2539
2540       if (is_gimple_debug (stmt))
2541         continue;
2542       if (gimple_has_side_effects (stmt))
2543         return false;
2544       if (stmt == last)
2545         return true;
2546       if (!is_gimple_assign (stmt))
2547         return false;
2548       lhs = gimple_assign_lhs (stmt);
2549       if (TREE_CODE (lhs) != SSA_NAME)
2550         return false;
2551       if (gimple_assign_rhs_could_trap_p (stmt))
2552         return false;
2553       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2554         {
2555           gimple use_stmt = USE_STMT (use_p);
2556           if (is_gimple_debug (use_stmt))
2557             continue;
2558           if (gimple_bb (use_stmt) != bb)
2559             return false;
2560         }
2561     }
2562   return false;
2563 }
2564
2565 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2566    return true and fill in *OPS recursively.  */
2567
2568 static bool
2569 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2570          struct loop *loop)
2571 {
2572   gimple stmt = SSA_NAME_DEF_STMT (var);
2573   tree rhs[2];
2574   int i;
2575
2576   if (!is_reassociable_op (stmt, code, loop))
2577     return false;
2578
2579   rhs[0] = gimple_assign_rhs1 (stmt);
2580   rhs[1] = gimple_assign_rhs2 (stmt);
2581   gimple_set_visited (stmt, true);
2582   for (i = 0; i < 2; i++)
2583     if (TREE_CODE (rhs[i]) == SSA_NAME
2584         && !get_ops (rhs[i], code, ops, loop)
2585         && has_single_use (rhs[i]))
2586       {
2587         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2588
2589         oe->op = rhs[i];
2590         oe->rank = code;
2591         oe->id = 0;
2592         oe->count = 1;
2593         ops->safe_push (oe);
2594       }
2595   return true;
2596 }
2597
2598 /* Find the ops that were added by get_ops starting from VAR, see if
2599    they were changed during update_range_test and if yes, create new
2600    stmts.  */
2601
2602 static tree
2603 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2604             unsigned int *pidx, struct loop *loop)
2605 {
2606   gimple stmt = SSA_NAME_DEF_STMT (var);
2607   tree rhs[4];
2608   int i;
2609
2610   if (!is_reassociable_op (stmt, code, loop))
2611     return NULL;
2612
2613   rhs[0] = gimple_assign_rhs1 (stmt);
2614   rhs[1] = gimple_assign_rhs2 (stmt);
2615   rhs[2] = rhs[0];
2616   rhs[3] = rhs[1];
2617   for (i = 0; i < 2; i++)
2618     if (TREE_CODE (rhs[i]) == SSA_NAME)
2619       {
2620         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2621         if (rhs[2 + i] == NULL_TREE)
2622           {
2623             if (has_single_use (rhs[i]))
2624               rhs[2 + i] = ops[(*pidx)++]->op;
2625             else
2626               rhs[2 + i] = rhs[i];
2627           }
2628       }
2629   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2630       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2631     {
2632       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2633       var = make_ssa_name (TREE_TYPE (var), NULL);
2634       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2635                                                var, rhs[2], rhs[3]);
2636       gimple_set_uid (g, gimple_uid (stmt));
2637       gimple_set_visited (g, true);
2638       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2639     }
2640   return var;
2641 }
2642
2643 /* Structure to track the initial value passed to get_ops and
2644    the range in the ops vector for each basic block.  */
2645
2646 struct inter_bb_range_test_entry
2647 {
2648   tree op;
2649   unsigned int first_idx, last_idx;
2650 };
2651
2652 /* Inter-bb range test optimization.  */
2653
2654 static void
2655 maybe_optimize_range_tests (gimple stmt)
2656 {
2657   basic_block first_bb = gimple_bb (stmt);
2658   basic_block last_bb = first_bb;
2659   basic_block other_bb = NULL;
2660   basic_block bb;
2661   edge_iterator ei;
2662   edge e;
2663   vec<operand_entry_t> ops = vNULL;
2664   vec<inter_bb_range_test_entry> bbinfo = vNULL;
2665   bool any_changes = false;
2666
2667   /* Consider only basic blocks that end with GIMPLE_COND or
2668      a cast statement satisfying final_range_test_p.  All
2669      but the last bb in the first_bb .. last_bb range
2670      should end with GIMPLE_COND.  */
2671   if (gimple_code (stmt) == GIMPLE_COND)
2672     {
2673       if (EDGE_COUNT (first_bb->succs) != 2)
2674         return;
2675     }
2676   else if (final_range_test_p (stmt))
2677     other_bb = single_succ (first_bb);
2678   else
2679     return;
2680
2681   if (stmt_could_throw_p (stmt))
2682     return;
2683
2684   /* As relative ordering of post-dominator sons isn't fixed,
2685      maybe_optimize_range_tests can be called first on any
2686      bb in the range we want to optimize.  So, start searching
2687      backwards, if first_bb can be set to a predecessor.  */
2688   while (single_pred_p (first_bb))
2689     {
2690       basic_block pred_bb = single_pred (first_bb);
2691       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2692         break;
2693       if (!no_side_effect_bb (first_bb))
2694         break;
2695       first_bb = pred_bb;
2696     }
2697   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2698      Before starting forward search in last_bb successors, find
2699      out the other_bb.  */
2700   if (first_bb == last_bb)
2701     {
2702       other_bb = NULL;
2703       /* As non-GIMPLE_COND last stmt always terminates the range,
2704          if forward search didn't discover anything, just give up.  */
2705       if (gimple_code (stmt) != GIMPLE_COND)
2706         return;
2707       /* Look at both successors.  Either it ends with a GIMPLE_COND
2708          and satisfies suitable_cond_bb, or ends with a cast and
2709          other_bb is that cast's successor.  */
2710       FOR_EACH_EDGE (e, ei, first_bb->succs)
2711         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2712             || e->dest == first_bb)
2713           return;
2714         else if (single_pred_p (e->dest))
2715           {
2716             stmt = last_stmt (e->dest);
2717             if (stmt
2718                 && gimple_code (stmt) == GIMPLE_COND
2719                 && EDGE_COUNT (e->dest->succs) == 2)
2720               {
2721                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2722                   break;
2723                 else
2724                   other_bb = NULL;
2725               }
2726             else if (stmt
2727                      && final_range_test_p (stmt)
2728                      && find_edge (first_bb, single_succ (e->dest)))
2729               {
2730                 other_bb = single_succ (e->dest);
2731                 if (other_bb == first_bb)
2732                   other_bb = NULL;
2733               }
2734           }
2735       if (other_bb == NULL)
2736         return;
2737     }
2738   /* Now do the forward search, moving last_bb to successor bbs
2739      that aren't other_bb.  */
2740   while (EDGE_COUNT (last_bb->succs) == 2)
2741     {
2742       FOR_EACH_EDGE (e, ei, last_bb->succs)
2743         if (e->dest != other_bb)
2744           break;
2745       if (e == NULL)
2746         break;
2747       if (!single_pred_p (e->dest))
2748         break;
2749       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2750         break;
2751       if (!no_side_effect_bb (e->dest))
2752         break;
2753       last_bb = e->dest;
2754     }
2755   if (first_bb == last_bb)
2756     return;
2757   /* Here basic blocks first_bb through last_bb's predecessor
2758      end with GIMPLE_COND, all of them have one of the edges to
2759      other_bb and another to another block in the range,
2760      all blocks except first_bb don't have side-effects and
2761      last_bb ends with either GIMPLE_COND, or cast satisfying
2762      final_range_test_p.  */
2763   for (bb = last_bb; ; bb = single_pred (bb))
2764     {
2765       enum tree_code code;
2766       tree lhs, rhs;
2767       inter_bb_range_test_entry bb_ent;
2768
2769       bb_ent.op = NULL_TREE;
2770       bb_ent.first_idx = ops.length ();
2771       bb_ent.last_idx = bb_ent.first_idx;
2772       e = find_edge (bb, other_bb);
2773       stmt = last_stmt (bb);
2774       gimple_set_visited (stmt, true);
2775       if (gimple_code (stmt) != GIMPLE_COND)
2776         {
2777           use_operand_p use_p;
2778           gimple phi;
2779           edge e2;
2780           unsigned int d;
2781
2782           lhs = gimple_assign_lhs (stmt);
2783           rhs = gimple_assign_rhs1 (stmt);
2784           gcc_assert (bb == last_bb);
2785
2786           /* stmt is
2787              _123 = (int) _234;
2788
2789              followed by:
2790              <bb M>:
2791              # _345 = PHI <_123(N), 1(...), 1(...)>
2792
2793              or 0 instead of 1.  If it is 0, the _234
2794              range test is anded together with all the
2795              other range tests, if it is 1, it is ored with
2796              them.  */
2797           single_imm_use (lhs, &use_p, &phi);
2798           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2799           e2 = find_edge (first_bb, other_bb);
2800           d = e2->dest_idx;
2801           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2802           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2803             code = BIT_AND_EXPR;
2804           else
2805             {
2806               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2807               code = BIT_IOR_EXPR;
2808             }
2809
2810           /* If _234 SSA_NAME_DEF_STMT is
2811              _234 = _567 | _789;
2812              (or &, corresponding to 1/0 in the phi arguments,
2813              push into ops the individual range test arguments
2814              of the bitwise or resp. and, recursively.  */
2815           if (!get_ops (rhs, code, &ops,
2816                         loop_containing_stmt (stmt))
2817               && has_single_use (rhs))
2818             {
2819               /* Otherwise, push the _234 range test itself.  */
2820               operand_entry_t oe
2821                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2822
2823               oe->op = rhs;
2824               oe->rank = code;
2825               oe->id = 0;
2826               oe->count = 1;
2827               ops.safe_push (oe);
2828               bb_ent.last_idx++;
2829             }
2830           else
2831             bb_ent.last_idx = ops.length ();
2832           bb_ent.op = rhs;
2833           bbinfo.safe_push (bb_ent);
2834           continue;
2835         }
2836       /* Otherwise stmt is GIMPLE_COND.  */
2837       code = gimple_cond_code (stmt);
2838       lhs = gimple_cond_lhs (stmt);
2839       rhs = gimple_cond_rhs (stmt);
2840       if (TREE_CODE (lhs) == SSA_NAME
2841           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2842           && ((code != EQ_EXPR && code != NE_EXPR)
2843               || rhs != boolean_false_node
2844                  /* Either push into ops the individual bitwise
2845                     or resp. and operands, depending on which
2846                     edge is other_bb.  */
2847               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2848                                  ^ (code == EQ_EXPR))
2849                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2850                            loop_containing_stmt (stmt))))
2851         {
2852           /* Or push the GIMPLE_COND stmt itself.  */
2853           operand_entry_t oe
2854             = (operand_entry_t) pool_alloc (operand_entry_pool);
2855
2856           oe->op = NULL;
2857           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2858                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2859           /* oe->op = NULL signs that there is no SSA_NAME
2860              for the range test, and oe->id instead is the
2861              basic block number, at which's end the GIMPLE_COND
2862              is.  */
2863           oe->id = bb->index;
2864           oe->count = 1;
2865           ops.safe_push (oe);
2866           bb_ent.op = NULL;
2867           bb_ent.last_idx++;
2868         }
2869       else if (ops.length () > bb_ent.first_idx)
2870         {
2871           bb_ent.op = lhs;
2872           bb_ent.last_idx = ops.length ();
2873         }
2874       bbinfo.safe_push (bb_ent);
2875       if (bb == first_bb)
2876         break;
2877     }
2878   if (ops.length () > 1)
2879     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2880   if (any_changes)
2881     {
2882       unsigned int idx;
2883       /* update_ops relies on has_single_use predicates returning the
2884          same values as it did during get_ops earlier.  Additionally it
2885          never removes statements, only adds new ones and it should walk
2886          from the single imm use and check the predicate already before
2887          making those changes.
2888          On the other side, the handling of GIMPLE_COND directly can turn
2889          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2890          it needs to be done in a separate loop afterwards.  */
2891       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2892         {
2893           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2894               && bbinfo[idx].op != NULL_TREE)
2895             {
2896               tree new_op;
2897
2898               stmt = last_stmt (bb);
2899               new_op = update_ops (bbinfo[idx].op,
2900                                    (enum tree_code)
2901                                    ops[bbinfo[idx].first_idx]->rank,
2902                                    ops, &bbinfo[idx].first_idx,
2903                                    loop_containing_stmt (stmt));
2904               if (new_op == NULL_TREE)
2905                 {
2906                   gcc_assert (bb == last_bb);
2907                   new_op = ops[bbinfo[idx].first_idx++]->op;
2908                 }
2909               if (bbinfo[idx].op != new_op)
2910                 {
2911                   imm_use_iterator iter;
2912                   use_operand_p use_p;
2913                   gimple use_stmt, cast_stmt = NULL;
2914
2915                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2916                     if (is_gimple_debug (use_stmt))
2917                       continue;
2918                     else if (gimple_code (use_stmt) == GIMPLE_COND
2919                              || gimple_code (use_stmt) == GIMPLE_PHI)
2920                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2921                         SET_USE (use_p, new_op);
2922                     else if (gimple_assign_cast_p (use_stmt))
2923                       cast_stmt = use_stmt;
2924                     else
2925                       gcc_unreachable ();
2926                   if (cast_stmt)
2927                     {
2928                       gcc_assert (bb == last_bb);
2929                       tree lhs = gimple_assign_lhs (cast_stmt);
2930                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2931                       enum tree_code rhs_code
2932                         = gimple_assign_rhs_code (cast_stmt);
2933                       gimple g
2934                         = gimple_build_assign_with_ops (rhs_code, new_lhs,
2935                                                         new_op, NULL_TREE);
2936                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2937                       gimple_set_uid (g, gimple_uid (cast_stmt));
2938                       gimple_set_visited (g, true);
2939                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2940                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2941                         if (is_gimple_debug (use_stmt))
2942                           continue;
2943                         else if (gimple_code (use_stmt) == GIMPLE_COND
2944                                  || gimple_code (use_stmt) == GIMPLE_PHI)
2945                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2946                             SET_USE (use_p, new_lhs);
2947                         else
2948                           gcc_unreachable ();
2949                     }
2950                 }
2951             }
2952           if (bb == first_bb)
2953             break;
2954         }
2955       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2956         {
2957           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2958               && bbinfo[idx].op == NULL_TREE
2959               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2960             {
2961               stmt = last_stmt (bb);
2962               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2963                 gimple_cond_make_false (stmt);
2964               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2965                 gimple_cond_make_true (stmt);
2966               else
2967                 {
2968                   gimple_cond_set_code (stmt, NE_EXPR);
2969                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2970                   gimple_cond_set_rhs (stmt, boolean_false_node);
2971                 }
2972               update_stmt (stmt);
2973             }
2974           if (bb == first_bb)
2975             break;
2976         }
2977     }
2978   bbinfo.release ();
2979   ops.release ();
2980 }
2981
2982 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2983    of STMT in it's operands.  This is also known as a "destructive
2984    update" operation.  */
2985
2986 static bool
2987 is_phi_for_stmt (gimple stmt, tree operand)
2988 {
2989   gimple def_stmt;
2990   tree lhs;
2991   use_operand_p arg_p;
2992   ssa_op_iter i;
2993
2994   if (TREE_CODE (operand) != SSA_NAME)
2995     return false;
2996
2997   lhs = gimple_assign_lhs (stmt);
2998
2999   def_stmt = SSA_NAME_DEF_STMT (operand);
3000   if (gimple_code (def_stmt) != GIMPLE_PHI)
3001     return false;
3002
3003   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3004     if (lhs == USE_FROM_PTR (arg_p))
3005       return true;
3006   return false;
3007 }
3008
3009 /* Remove def stmt of VAR if VAR has zero uses and recurse
3010    on rhs1 operand if so.  */
3011
3012 static void
3013 remove_visited_stmt_chain (tree var)
3014 {
3015   gimple stmt;
3016   gimple_stmt_iterator gsi;
3017
3018   while (1)
3019     {
3020       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3021         return;
3022       stmt = SSA_NAME_DEF_STMT (var);
3023       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3024         {
3025           var = gimple_assign_rhs1 (stmt);
3026           gsi = gsi_for_stmt (stmt);
3027           gsi_remove (&gsi, true);
3028           release_defs (stmt);
3029         }
3030       else
3031         return;
3032     }
3033 }
3034
3035 /* This function checks three consequtive operands in
3036    passed operands vector OPS starting from OPINDEX and
3037    swaps two operands if it is profitable for binary operation
3038    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3039
3040    We pair ops with the same rank if possible.
3041
3042    The alternative we try is to see if STMT is a destructive
3043    update style statement, which is like:
3044    b = phi (a, ...)
3045    a = c + b;
3046    In that case, we want to use the destructive update form to
3047    expose the possible vectorizer sum reduction opportunity.
3048    In that case, the third operand will be the phi node. This
3049    check is not performed if STMT is null.
3050
3051    We could, of course, try to be better as noted above, and do a
3052    lot of work to try to find these opportunities in >3 operand
3053    cases, but it is unlikely to be worth it.  */
3054
3055 static void
3056 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3057                           unsigned int opindex, gimple stmt)
3058 {
3059   operand_entry_t oe1, oe2, oe3;
3060
3061   oe1 = ops[opindex];
3062   oe2 = ops[opindex + 1];
3063   oe3 = ops[opindex + 2];
3064
3065   if ((oe1->rank == oe2->rank
3066        && oe2->rank != oe3->rank)
3067       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3068           && !is_phi_for_stmt (stmt, oe1->op)
3069           && !is_phi_for_stmt (stmt, oe2->op)))
3070     {
3071       struct operand_entry temp = *oe3;
3072       oe3->op = oe1->op;
3073       oe3->rank = oe1->rank;
3074       oe1->op = temp.op;
3075       oe1->rank= temp.rank;
3076     }
3077   else if ((oe1->rank == oe3->rank
3078             && oe2->rank != oe3->rank)
3079            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3080                && !is_phi_for_stmt (stmt, oe1->op)
3081                && !is_phi_for_stmt (stmt, oe3->op)))
3082     {
3083       struct operand_entry temp = *oe2;
3084       oe2->op = oe1->op;
3085       oe2->rank = oe1->rank;
3086       oe1->op = temp.op;
3087       oe1->rank = temp.rank;
3088     }
3089 }
3090
3091 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3092    two definitions, otherwise return STMT.  */
3093
3094 static inline gimple
3095 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3096 {
3097   if (TREE_CODE (rhs1) == SSA_NAME
3098       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3099     stmt = SSA_NAME_DEF_STMT (rhs1);
3100   if (TREE_CODE (rhs2) == SSA_NAME
3101       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3102     stmt = SSA_NAME_DEF_STMT (rhs2);
3103   return stmt;
3104 }
3105
3106 /* Recursively rewrite our linearized statements so that the operators
3107    match those in OPS[OPINDEX], putting the computation in rank
3108    order.  Return new lhs.  */
3109
3110 static tree
3111 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3112                    vec<operand_entry_t> ops, bool changed)
3113 {
3114   tree rhs1 = gimple_assign_rhs1 (stmt);
3115   tree rhs2 = gimple_assign_rhs2 (stmt);
3116   tree lhs = gimple_assign_lhs (stmt);
3117   operand_entry_t oe;
3118
3119   /* The final recursion case for this function is that you have
3120      exactly two operations left.
3121      If we had one exactly one op in the entire list to start with, we
3122      would have never called this function, and the tail recursion
3123      rewrites them one at a time.  */
3124   if (opindex + 2 == ops.length ())
3125     {
3126       operand_entry_t oe1, oe2;
3127
3128       oe1 = ops[opindex];
3129       oe2 = ops[opindex + 1];
3130
3131       if (rhs1 != oe1->op || rhs2 != oe2->op)
3132         {
3133           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3134           unsigned int uid = gimple_uid (stmt);
3135
3136           if (dump_file && (dump_flags & TDF_DETAILS))
3137             {
3138               fprintf (dump_file, "Transforming ");
3139               print_gimple_stmt (dump_file, stmt, 0, 0);
3140             }
3141
3142           if (changed)
3143             {
3144               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3145               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3146               stmt
3147                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3148                                                 lhs, oe1->op, oe2->op);
3149               gimple_set_uid (stmt, uid);
3150               gimple_set_visited (stmt, true);
3151               if (insert_point == gsi_stmt (gsi))
3152                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3153               else
3154                 insert_stmt_after (stmt, insert_point);
3155             }
3156           else
3157             {
3158               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3159                                    == stmt);
3160               gimple_assign_set_rhs1 (stmt, oe1->op);
3161               gimple_assign_set_rhs2 (stmt, oe2->op);
3162               update_stmt (stmt);
3163             }
3164
3165           if (rhs1 != oe1->op && rhs1 != oe2->op)
3166             remove_visited_stmt_chain (rhs1);
3167
3168           if (dump_file && (dump_flags & TDF_DETAILS))
3169             {
3170               fprintf (dump_file, " into ");
3171               print_gimple_stmt (dump_file, stmt, 0, 0);
3172             }
3173         }
3174       return lhs;
3175     }
3176
3177   /* If we hit here, we should have 3 or more ops left.  */
3178   gcc_assert (opindex + 2 < ops.length ());
3179
3180   /* Rewrite the next operator.  */
3181   oe = ops[opindex];
3182
3183   /* Recurse on the LHS of the binary operator, which is guaranteed to
3184      be the non-leaf side.  */
3185   tree new_rhs1
3186     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3187                          changed || oe->op != rhs2);
3188
3189   if (oe->op != rhs2 || new_rhs1 != rhs1)
3190     {
3191       if (dump_file && (dump_flags & TDF_DETAILS))
3192         {
3193           fprintf (dump_file, "Transforming ");
3194           print_gimple_stmt (dump_file, stmt, 0, 0);
3195         }
3196
3197       /* If changed is false, this is either opindex == 0
3198          or all outer rhs2's were equal to corresponding oe->op,
3199          and powi_result is NULL.
3200          That means lhs is equivalent before and after reassociation.
3201          Otherwise ensure the old lhs SSA_NAME is not reused and
3202          create a new stmt as well, so that any debug stmts will be
3203          properly adjusted.  */
3204       if (changed)
3205         {
3206           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3207           unsigned int uid = gimple_uid (stmt);
3208           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3209
3210           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3211           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3212                                                lhs, new_rhs1, oe->op);
3213           gimple_set_uid (stmt, uid);
3214           gimple_set_visited (stmt, true);
3215           if (insert_point == gsi_stmt (gsi))
3216             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3217           else
3218             insert_stmt_after (stmt, insert_point);
3219         }
3220       else
3221         {
3222           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3223                                == stmt);
3224           gimple_assign_set_rhs1 (stmt, new_rhs1);
3225           gimple_assign_set_rhs2 (stmt, oe->op);
3226           update_stmt (stmt);
3227         }
3228
3229       if (dump_file && (dump_flags & TDF_DETAILS))
3230         {
3231           fprintf (dump_file, " into ");
3232           print_gimple_stmt (dump_file, stmt, 0, 0);
3233         }
3234     }
3235   return lhs;
3236 }
3237
3238 /* Find out how many cycles we need to compute statements chain.
3239    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3240    maximum number of independent statements we may execute per cycle.  */
3241
3242 static int
3243 get_required_cycles (int ops_num, int cpu_width)
3244 {
3245   int res;
3246   int elog;
3247   unsigned int rest;
3248
3249   /* While we have more than 2 * cpu_width operands
3250      we may reduce number of operands by cpu_width
3251      per cycle.  */
3252   res = ops_num / (2 * cpu_width);
3253
3254   /* Remained operands count may be reduced twice per cycle
3255      until we have only one operand.  */
3256   rest = (unsigned)(ops_num - res * cpu_width);
3257   elog = exact_log2 (rest);
3258   if (elog >= 0)
3259     res += elog;
3260   else
3261     res += floor_log2 (rest) + 1;
3262
3263   return res;
3264 }
3265
3266 /* Returns an optimal number of registers to use for computation of
3267    given statements.  */
3268
3269 static int
3270 get_reassociation_width (int ops_num, enum tree_code opc,
3271                          enum machine_mode mode)
3272 {
3273   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3274   int width;
3275   int width_min;
3276   int cycles_best;
3277
3278   if (param_width > 0)
3279     width = param_width;
3280   else
3281     width = targetm.sched.reassociation_width (opc, mode);
3282
3283   if (width == 1)
3284     return width;
3285
3286   /* Get the minimal time required for sequence computation.  */
3287   cycles_best = get_required_cycles (ops_num, width);
3288
3289   /* Check if we may use less width and still compute sequence for
3290      the same time.  It will allow us to reduce registers usage.
3291      get_required_cycles is monotonically increasing with lower width
3292      so we can perform a binary search for the minimal width that still
3293      results in the optimal cycle count.  */
3294   width_min = 1;
3295   while (width > width_min)
3296     {
3297       int width_mid = (width + width_min) / 2;
3298
3299       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3300         width = width_mid;
3301       else if (width_min < width_mid)
3302         width_min = width_mid;
3303       else
3304         break;
3305     }
3306
3307   return width;
3308 }
3309
3310 /* Recursively rewrite our linearized statements so that the operators
3311    match those in OPS[OPINDEX], putting the computation in rank
3312    order and trying to allow operations to be executed in
3313    parallel.  */
3314
3315 static void
3316 rewrite_expr_tree_parallel (gimple stmt, int width,
3317                             vec<operand_entry_t> ops)
3318 {
3319   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3320   int op_num = ops.length ();
3321   int stmt_num = op_num - 1;
3322   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3323   int op_index = op_num - 1;
3324   int stmt_index = 0;
3325   int ready_stmts_end = 0;
3326   int i = 0;
3327   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3328
3329   /* We start expression rewriting from the top statements.
3330      So, in this loop we create a full list of statements
3331      we will work with.  */
3332   stmts[stmt_num - 1] = stmt;
3333   for (i = stmt_num - 2; i >= 0; i--)
3334     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3335
3336   for (i = 0; i < stmt_num; i++)
3337     {
3338       tree op1, op2;
3339
3340       /* Determine whether we should use results of
3341          already handled statements or not.  */
3342       if (ready_stmts_end == 0
3343           && (i - stmt_index >= width || op_index < 1))
3344         ready_stmts_end = i;
3345
3346       /* Now we choose operands for the next statement.  Non zero
3347          value in ready_stmts_end means here that we should use
3348          the result of already generated statements as new operand.  */
3349       if (ready_stmts_end > 0)
3350         {
3351           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3352           if (ready_stmts_end > stmt_index)
3353             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3354           else if (op_index >= 0)
3355             op2 = ops[op_index--]->op;
3356           else
3357             {
3358               gcc_assert (stmt_index < i);
3359               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3360             }
3361
3362           if (stmt_index >= ready_stmts_end)
3363             ready_stmts_end = 0;
3364         }
3365       else
3366         {
3367           if (op_index > 1)
3368             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3369           op2 = ops[op_index--]->op;
3370           op1 = ops[op_index--]->op;
3371         }
3372
3373       /* If we emit the last statement then we should put
3374          operands into the last statement.  It will also
3375          break the loop.  */
3376       if (op_index < 0 && stmt_index == i)
3377         i = stmt_num - 1;
3378
3379       if (dump_file && (dump_flags & TDF_DETAILS))
3380         {
3381           fprintf (dump_file, "Transforming ");
3382           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3383         }
3384
3385       /* We keep original statement only for the last one.  All
3386          others are recreated.  */
3387       if (i == stmt_num - 1)
3388         {
3389           gimple_assign_set_rhs1 (stmts[i], op1);
3390           gimple_assign_set_rhs2 (stmts[i], op2);
3391           update_stmt (stmts[i]);
3392         }
3393       else
3394         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3395
3396       if (dump_file && (dump_flags & TDF_DETAILS))
3397         {
3398           fprintf (dump_file, " into ");
3399           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3400         }
3401     }
3402
3403   remove_visited_stmt_chain (last_rhs1);
3404 }
3405
3406 /* Transform STMT, which is really (A +B) + (C + D) into the left
3407    linear form, ((A+B)+C)+D.
3408    Recurse on D if necessary.  */
3409
3410 static void
3411 linearize_expr (gimple stmt)
3412 {
3413   gimple_stmt_iterator gsi;
3414   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3415   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3416   gimple oldbinrhs = binrhs;
3417   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3418   gimple newbinrhs = NULL;
3419   struct loop *loop = loop_containing_stmt (stmt);
3420   tree lhs = gimple_assign_lhs (stmt);
3421
3422   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3423               && is_reassociable_op (binrhs, rhscode, loop));
3424
3425   gsi = gsi_for_stmt (stmt);
3426
3427   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3428   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3429                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3430                                          gimple_assign_lhs (binlhs),
3431                                          gimple_assign_rhs2 (binrhs));
3432   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3433   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3434   gimple_set_uid (binrhs, gimple_uid (stmt));
3435
3436   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3437     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3438
3439   if (dump_file && (dump_flags & TDF_DETAILS))
3440     {
3441       fprintf (dump_file, "Linearized: ");
3442       print_gimple_stmt (dump_file, stmt, 0, 0);
3443     }
3444
3445   reassociate_stats.linearized++;
3446   update_stmt (stmt);
3447
3448   gsi = gsi_for_stmt (oldbinrhs);
3449   gsi_remove (&gsi, true);
3450   release_defs (oldbinrhs);
3451
3452   gimple_set_visited (stmt, true);
3453   gimple_set_visited (binlhs, true);
3454   gimple_set_visited (binrhs, true);
3455
3456   /* Tail recurse on the new rhs if it still needs reassociation.  */
3457   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3458     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3459            want to change the algorithm while converting to tuples.  */
3460     linearize_expr (stmt);
3461 }
3462
3463 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3464    it.  Otherwise, return NULL.  */
3465
3466 static gimple
3467 get_single_immediate_use (tree lhs)
3468 {
3469   use_operand_p immuse;
3470   gimple immusestmt;
3471
3472   if (TREE_CODE (lhs) == SSA_NAME
3473       && single_imm_use (lhs, &immuse, &immusestmt)
3474       && is_gimple_assign (immusestmt))
3475     return immusestmt;
3476
3477   return NULL;
3478 }
3479
3480 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3481    representing the negated value.  Insertions of any necessary
3482    instructions go before GSI.
3483    This function is recursive in that, if you hand it "a_5" as the
3484    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3485    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3486
3487 static tree
3488 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3489 {
3490   gimple negatedefstmt = NULL;
3491   tree resultofnegate;
3492   gimple_stmt_iterator gsi;
3493   unsigned int uid;
3494
3495   /* If we are trying to negate a name, defined by an add, negate the
3496      add operands instead.  */
3497   if (TREE_CODE (tonegate) == SSA_NAME)
3498     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3499   if (TREE_CODE (tonegate) == SSA_NAME
3500       && is_gimple_assign (negatedefstmt)
3501       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3502       && has_single_use (gimple_assign_lhs (negatedefstmt))
3503       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3504     {
3505       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3506       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3507       tree lhs = gimple_assign_lhs (negatedefstmt);
3508       gimple g;
3509
3510       gsi = gsi_for_stmt (negatedefstmt);
3511       rhs1 = negate_value (rhs1, &gsi);
3512
3513       gsi = gsi_for_stmt (negatedefstmt);
3514       rhs2 = negate_value (rhs2, &gsi);
3515
3516       gsi = gsi_for_stmt (negatedefstmt);
3517       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3518       gimple_set_visited (negatedefstmt, true);
3519       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3520       gimple_set_uid (g, gimple_uid (negatedefstmt));
3521       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3522       return lhs;
3523     }
3524
3525   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3526   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3527                                              NULL_TREE, true, GSI_SAME_STMT);
3528   gsi = *gsip;
3529   uid = gimple_uid (gsi_stmt (gsi));
3530   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3531     {
3532       gimple stmt = gsi_stmt (gsi);
3533       if (gimple_uid (stmt) != 0)
3534         break;
3535       gimple_set_uid (stmt, uid);
3536     }
3537   return resultofnegate;
3538 }
3539
3540 /* Return true if we should break up the subtract in STMT into an add
3541    with negate.  This is true when we the subtract operands are really
3542    adds, or the subtract itself is used in an add expression.  In
3543    either case, breaking up the subtract into an add with negate
3544    exposes the adds to reassociation.  */
3545
3546 static bool
3547 should_break_up_subtract (gimple stmt)
3548 {
3549   tree lhs = gimple_assign_lhs (stmt);
3550   tree binlhs = gimple_assign_rhs1 (stmt);
3551   tree binrhs = gimple_assign_rhs2 (stmt);
3552   gimple immusestmt;
3553   struct loop *loop = loop_containing_stmt (stmt);
3554
3555   if (TREE_CODE (binlhs) == SSA_NAME
3556       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3557     return true;
3558
3559   if (TREE_CODE (binrhs) == SSA_NAME
3560       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3561     return true;
3562
3563   if (TREE_CODE (lhs) == SSA_NAME
3564       && (immusestmt = get_single_immediate_use (lhs))
3565       && is_gimple_assign (immusestmt)
3566       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3567           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3568     return true;
3569   return false;
3570 }
3571
3572 /* Transform STMT from A - B into A + -B.  */
3573
3574 static void
3575 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3576 {
3577   tree rhs1 = gimple_assign_rhs1 (stmt);
3578   tree rhs2 = gimple_assign_rhs2 (stmt);
3579
3580   if (dump_file && (dump_flags & TDF_DETAILS))
3581     {
3582       fprintf (dump_file, "Breaking up subtract ");
3583       print_gimple_stmt (dump_file, stmt, 0, 0);
3584     }
3585
3586   rhs2 = negate_value (rhs2, gsip);
3587   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3588   update_stmt (stmt);
3589 }
3590
3591 /* Determine whether STMT is a builtin call that raises an SSA name
3592    to an integer power and has only one use.  If so, and this is early
3593    reassociation and unsafe math optimizations are permitted, place
3594    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3595    If any of these conditions does not hold, return FALSE.  */
3596
3597 static bool
3598 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3599 {
3600   tree fndecl, arg1;
3601   REAL_VALUE_TYPE c, cint;
3602
3603   if (!first_pass_instance
3604       || !flag_unsafe_math_optimizations
3605       || !is_gimple_call (stmt)
3606       || !has_single_use (gimple_call_lhs (stmt)))
3607     return false;
3608
3609   fndecl = gimple_call_fndecl (stmt);
3610
3611   if (!fndecl
3612       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3613     return false;
3614
3615   switch (DECL_FUNCTION_CODE (fndecl))
3616     {
3617     CASE_FLT_FN (BUILT_IN_POW):
3618       *base = gimple_call_arg (stmt, 0);
3619       arg1 = gimple_call_arg (stmt, 1);
3620
3621       if (TREE_CODE (arg1) != REAL_CST)
3622         return false;
3623
3624       c = TREE_REAL_CST (arg1);
3625
3626       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3627         return false;
3628
3629       *exponent = real_to_integer (&c);
3630       real_from_integer (&cint, VOIDmode, *exponent,
3631                          *exponent < 0 ? -1 : 0, 0);
3632       if (!real_identical (&c, &cint))
3633         return false;
3634
3635       break;
3636
3637     CASE_FLT_FN (BUILT_IN_POWI):
3638       *base = gimple_call_arg (stmt, 0);
3639       arg1 = gimple_call_arg (stmt, 1);
3640
3641       if (!tree_fits_shwi_p (arg1))
3642         return false;
3643
3644       *exponent = TREE_INT_CST_LOW (arg1);
3645       break;
3646
3647     default:
3648       return false;
3649     }
3650
3651   /* Expanding negative exponents is generally unproductive, so we don't
3652      complicate matters with those.  Exponents of zero and one should
3653      have been handled by expression folding.  */
3654   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3655     return false;
3656
3657   return true;
3658 }
3659
3660 /* Recursively linearize a binary expression that is the RHS of STMT.
3661    Place the operands of the expression tree in the vector named OPS.  */
3662
3663 static void
3664 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3665                      bool is_associative, bool set_visited)
3666 {
3667   tree binlhs = gimple_assign_rhs1 (stmt);
3668   tree binrhs = gimple_assign_rhs2 (stmt);
3669   gimple binlhsdef = NULL, binrhsdef = NULL;
3670   bool binlhsisreassoc = false;
3671   bool binrhsisreassoc = false;
3672   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3673   struct loop *loop = loop_containing_stmt (stmt);
3674   tree base = NULL_TREE;
3675   HOST_WIDE_INT exponent = 0;
3676
3677   if (set_visited)
3678     gimple_set_visited (stmt, true);
3679
3680   if (TREE_CODE (binlhs) == SSA_NAME)
3681     {
3682       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3683       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3684                          && !stmt_could_throw_p (binlhsdef));
3685     }
3686
3687   if (TREE_CODE (binrhs) == SSA_NAME)
3688     {
3689       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3690       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3691                          && !stmt_could_throw_p (binrhsdef));
3692     }
3693
3694   /* If the LHS is not reassociable, but the RHS is, we need to swap
3695      them.  If neither is reassociable, there is nothing we can do, so
3696      just put them in the ops vector.  If the LHS is reassociable,
3697      linearize it.  If both are reassociable, then linearize the RHS
3698      and the LHS.  */
3699
3700   if (!binlhsisreassoc)
3701     {
3702       tree temp;
3703
3704       /* If this is not a associative operation like division, give up.  */
3705       if (!is_associative)
3706         {
3707           add_to_ops_vec (ops, binrhs);
3708           return;
3709         }
3710
3711       if (!binrhsisreassoc)
3712         {
3713           if (rhscode == MULT_EXPR
3714               && TREE_CODE (binrhs) == SSA_NAME
3715               && acceptable_pow_call (binrhsdef, &base, &exponent))
3716             {
3717               add_repeat_to_ops_vec (ops, base, exponent);
3718               gimple_set_visited (binrhsdef, true);
3719             }
3720           else
3721             add_to_ops_vec (ops, binrhs);
3722
3723           if (rhscode == MULT_EXPR
3724               && TREE_CODE (binlhs) == SSA_NAME
3725               && acceptable_pow_call (binlhsdef, &base, &exponent))
3726             {
3727               add_repeat_to_ops_vec (ops, base, exponent);
3728               gimple_set_visited (binlhsdef, true);
3729             }
3730           else
3731             add_to_ops_vec (ops, binlhs);
3732
3733           return;
3734         }
3735
3736       if (dump_file && (dump_flags & TDF_DETAILS))
3737         {
3738           fprintf (dump_file, "swapping operands of ");
3739           print_gimple_stmt (dump_file, stmt, 0, 0);
3740         }
3741
3742       swap_ssa_operands (stmt,
3743                          gimple_assign_rhs1_ptr (stmt),
3744                          gimple_assign_rhs2_ptr (stmt));
3745       update_stmt (stmt);
3746
3747       if (dump_file && (dump_flags & TDF_DETAILS))
3748         {
3749           fprintf (dump_file, " is now ");
3750           print_gimple_stmt (dump_file, stmt, 0, 0);
3751         }
3752
3753       /* We want to make it so the lhs is always the reassociative op,
3754          so swap.  */
3755       temp = binlhs;
3756       binlhs = binrhs;
3757       binrhs = temp;
3758     }
3759   else if (binrhsisreassoc)
3760     {
3761       linearize_expr (stmt);
3762       binlhs = gimple_assign_rhs1 (stmt);
3763       binrhs = gimple_assign_rhs2 (stmt);
3764     }
3765
3766   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3767               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3768                                       rhscode, loop));
3769   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3770                        is_associative, set_visited);
3771
3772   if (rhscode == MULT_EXPR
3773       && TREE_CODE (binrhs) == SSA_NAME
3774       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3775     {
3776       add_repeat_to_ops_vec (ops, base, exponent);
3777       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3778     }
3779   else
3780     add_to_ops_vec (ops, binrhs);
3781 }
3782
3783 /* Repropagate the negates back into subtracts, since no other pass
3784    currently does it.  */
3785
3786 static void
3787 repropagate_negates (void)
3788 {
3789   unsigned int i = 0;
3790   tree negate;
3791
3792   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3793     {
3794       gimple user = get_single_immediate_use (negate);
3795
3796       if (!user || !is_gimple_assign (user))
3797         continue;
3798
3799       /* The negate operand can be either operand of a PLUS_EXPR
3800          (it can be the LHS if the RHS is a constant for example).
3801
3802          Force the negate operand to the RHS of the PLUS_EXPR, then
3803          transform the PLUS_EXPR into a MINUS_EXPR.  */
3804       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3805         {
3806           /* If the negated operand appears on the LHS of the
3807              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3808              to force the negated operand to the RHS of the PLUS_EXPR.  */
3809           if (gimple_assign_rhs1 (user) == negate)
3810             {
3811               swap_ssa_operands (user,
3812                                  gimple_assign_rhs1_ptr (user),
3813                                  gimple_assign_rhs2_ptr (user));
3814             }
3815
3816           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3817              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3818           if (gimple_assign_rhs2 (user) == negate)
3819             {
3820               tree rhs1 = gimple_assign_rhs1 (user);
3821               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3822               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3823               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3824               update_stmt (user);
3825             }
3826         }
3827       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3828         {
3829           if (gimple_assign_rhs1 (user) == negate)
3830             {
3831               /* We have
3832                    x = -a
3833                    y = x - b
3834                  which we transform into
3835                    x = a + b
3836                    y = -x .
3837                  This pushes down the negate which we possibly can merge
3838                  into some other operation, hence insert it into the
3839                  plus_negates vector.  */
3840               gimple feed = SSA_NAME_DEF_STMT (negate);
3841               tree a = gimple_assign_rhs1 (feed);
3842               tree b = gimple_assign_rhs2 (user);
3843               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3844               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3845               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3846               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3847               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3848               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3849               user = gsi_stmt (gsi2);
3850               update_stmt (user);
3851               gsi_remove (&gsi, true);
3852               release_defs (feed);
3853               plus_negates.safe_push (gimple_assign_lhs (user));
3854             }
3855           else
3856             {
3857               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3858                  rid of one operation.  */
3859               gimple feed = SSA_NAME_DEF_STMT (negate);
3860               tree a = gimple_assign_rhs1 (feed);
3861               tree rhs1 = gimple_assign_rhs1 (user);
3862               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3863               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3864               update_stmt (gsi_stmt (gsi));
3865             }
3866         }
3867     }
3868 }
3869
3870 /* Returns true if OP is of a type for which we can do reassociation.
3871    That is for integral or non-saturating fixed-point types, and for
3872    floating point type when associative-math is enabled.  */
3873
3874 static bool
3875 can_reassociate_p (tree op)
3876 {
3877   tree type = TREE_TYPE (op);
3878   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3879       || NON_SAT_FIXED_POINT_TYPE_P (type)
3880       || (flag_associative_math && FLOAT_TYPE_P (type)))
3881     return true;
3882   return false;
3883 }
3884
3885 /* Break up subtract operations in block BB.
3886
3887    We do this top down because we don't know whether the subtract is
3888    part of a possible chain of reassociation except at the top.
3889
3890    IE given
3891    d = f + g
3892    c = a + e
3893    b = c - d
3894    q = b - r
3895    k = t - q
3896
3897    we want to break up k = t - q, but we won't until we've transformed q
3898    = b - r, which won't be broken up until we transform b = c - d.
3899
3900    En passant, clear the GIMPLE visited flag on every statement
3901    and set UIDs within each basic block.  */
3902
3903 static void
3904 break_up_subtract_bb (basic_block bb)
3905 {
3906   gimple_stmt_iterator gsi;
3907   basic_block son;
3908   unsigned int uid = 1;
3909
3910   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3911     {
3912       gimple stmt = gsi_stmt (gsi);
3913       gimple_set_visited (stmt, false);
3914       gimple_set_uid (stmt, uid++);
3915
3916       if (!is_gimple_assign (stmt)
3917           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3918         continue;
3919
3920       /* Look for simple gimple subtract operations.  */
3921       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3922         {
3923           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3924               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3925             continue;
3926
3927           /* Check for a subtract used only in an addition.  If this
3928              is the case, transform it into add of a negate for better
3929              reassociation.  IE transform C = A-B into C = A + -B if C
3930              is only used in an addition.  */
3931           if (should_break_up_subtract (stmt))
3932             break_up_subtract (stmt, &gsi);
3933         }
3934       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3935                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3936         plus_negates.safe_push (gimple_assign_lhs (stmt));
3937     }
3938   for (son = first_dom_son (CDI_DOMINATORS, bb);
3939        son;
3940        son = next_dom_son (CDI_DOMINATORS, son))
3941     break_up_subtract_bb (son);
3942 }
3943
3944 /* Used for repeated factor analysis.  */
3945 struct repeat_factor_d
3946 {
3947   /* An SSA name that occurs in a multiply chain.  */
3948   tree factor;
3949
3950   /* Cached rank of the factor.  */
3951   unsigned rank;
3952
3953   /* Number of occurrences of the factor in the chain.  */
3954   HOST_WIDE_INT count;
3955
3956   /* An SSA name representing the product of this factor and
3957      all factors appearing later in the repeated factor vector.  */
3958   tree repr;
3959 };
3960
3961 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3962 typedef const struct repeat_factor_d *const_repeat_factor_t;
3963
3964
3965 static vec<repeat_factor> repeat_factor_vec;
3966
3967 /* Used for sorting the repeat factor vector.  Sort primarily by
3968    ascending occurrence count, secondarily by descending rank.  */
3969
3970 static int
3971 compare_repeat_factors (const void *x1, const void *x2)
3972 {
3973   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3974   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3975
3976   if (rf1->count != rf2->count)
3977     return rf1->count - rf2->count;
3978
3979   return rf2->rank - rf1->rank;
3980 }
3981
3982 /* Look for repeated operands in OPS in the multiply tree rooted at
3983    STMT.  Replace them with an optimal sequence of multiplies and powi
3984    builtin calls, and remove the used operands from OPS.  Return an
3985    SSA name representing the value of the replacement sequence.  */
3986
3987 static tree
3988 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3989 {
3990   unsigned i, j, vec_len;
3991   int ii;
3992   operand_entry_t oe;
3993   repeat_factor_t rf1, rf2;
3994   repeat_factor rfnew;
3995   tree result = NULL_TREE;
3996   tree target_ssa, iter_result;
3997   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3998   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3999   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4000   gimple mul_stmt, pow_stmt;
4001
4002   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4003      target.  */
4004   if (!powi_fndecl)
4005     return NULL_TREE;
4006
4007   /* Allocate the repeated factor vector.  */
4008   repeat_factor_vec.create (10);
4009
4010   /* Scan the OPS vector for all SSA names in the product and build
4011      up a vector of occurrence counts for each factor.  */
4012   FOR_EACH_VEC_ELT (*ops, i, oe)
4013     {
4014       if (TREE_CODE (oe->op) == SSA_NAME)
4015         {
4016           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4017             {
4018               if (rf1->factor == oe->op)
4019                 {
4020                   rf1->count += oe->count;
4021                   break;
4022                 }
4023             }
4024
4025           if (j >= repeat_factor_vec.length ())
4026             {
4027               rfnew.factor = oe->op;
4028               rfnew.rank = oe->rank;
4029               rfnew.count = oe->count;
4030               rfnew.repr = NULL_TREE;
4031               repeat_factor_vec.safe_push (rfnew);
4032             }
4033         }
4034     }
4035
4036   /* Sort the repeated factor vector by (a) increasing occurrence count,
4037      and (b) decreasing rank.  */
4038   repeat_factor_vec.qsort (compare_repeat_factors);
4039
4040   /* It is generally best to combine as many base factors as possible
4041      into a product before applying __builtin_powi to the result.
4042      However, the sort order chosen for the repeated factor vector
4043      allows us to cache partial results for the product of the base
4044      factors for subsequent use.  When we already have a cached partial
4045      result from a previous iteration, it is best to make use of it
4046      before looking for another __builtin_pow opportunity.
4047
4048      As an example, consider x * x * y * y * y * z * z * z * z.
4049      We want to first compose the product x * y * z, raise it to the
4050      second power, then multiply this by y * z, and finally multiply
4051      by z.  This can be done in 5 multiplies provided we cache y * z
4052      for use in both expressions:
4053
4054         t1 = y * z
4055         t2 = t1 * x
4056         t3 = t2 * t2
4057         t4 = t1 * t3
4058         result = t4 * z
4059
4060      If we instead ignored the cached y * z and first multiplied by
4061      the __builtin_pow opportunity z * z, we would get the inferior:
4062
4063         t1 = y * z
4064         t2 = t1 * x
4065         t3 = t2 * t2
4066         t4 = z * z
4067         t5 = t3 * t4
4068         result = t5 * y  */
4069
4070   vec_len = repeat_factor_vec.length ();
4071
4072   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4073   while (true)
4074     {
4075       HOST_WIDE_INT power;
4076
4077       /* First look for the largest cached product of factors from
4078          preceding iterations.  If found, create a builtin_powi for
4079          it if the minimum occurrence count for its factors is at
4080          least 2, or just use this cached product as our next
4081          multiplicand if the minimum occurrence count is 1.  */
4082       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4083         {
4084           if (rf1->repr && rf1->count > 0)
4085             break;
4086         }
4087
4088       if (j < vec_len)
4089         {
4090           power = rf1->count;
4091
4092           if (power == 1)
4093             {
4094               iter_result = rf1->repr;
4095
4096               if (dump_file && (dump_flags & TDF_DETAILS))
4097                 {
4098                   unsigned elt;
4099                   repeat_factor_t rf;
4100                   fputs ("Multiplying by cached product ", dump_file);
4101                   for (elt = j; elt < vec_len; elt++)
4102                     {
4103                       rf = &repeat_factor_vec[elt];
4104                       print_generic_expr (dump_file, rf->factor, 0);
4105                       if (elt < vec_len - 1)
4106                         fputs (" * ", dump_file);
4107                     }
4108                   fputs ("\n", dump_file);
4109                 }
4110             }
4111           else
4112             {
4113               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4114               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4115                                             build_int_cst (integer_type_node,
4116                                                            power));
4117               gimple_call_set_lhs (pow_stmt, iter_result);
4118               gimple_set_location (pow_stmt, gimple_location (stmt));
4119               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4120
4121               if (dump_file && (dump_flags & TDF_DETAILS))
4122                 {
4123                   unsigned elt;
4124                   repeat_factor_t rf;
4125                   fputs ("Building __builtin_pow call for cached product (",
4126                          dump_file);
4127                   for (elt = j; elt < vec_len; elt++)
4128                     {
4129                       rf = &repeat_factor_vec[elt];
4130                       print_generic_expr (dump_file, rf->factor, 0);
4131                       if (elt < vec_len - 1)
4132                         fputs (" * ", dump_file);
4133                     }
4134                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4135                            power);
4136                 }
4137             }
4138         }
4139       else
4140         {
4141           /* Otherwise, find the first factor in the repeated factor
4142              vector whose occurrence count is at least 2.  If no such
4143              factor exists, there are no builtin_powi opportunities
4144              remaining.  */
4145           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4146             {
4147               if (rf1->count >= 2)
4148                 break;
4149             }
4150
4151           if (j >= vec_len)
4152             break;
4153
4154           power = rf1->count;
4155
4156           if (dump_file && (dump_flags & TDF_DETAILS))
4157             {
4158               unsigned elt;
4159               repeat_factor_t rf;
4160               fputs ("Building __builtin_pow call for (", dump_file);
4161               for (elt = j; elt < vec_len; elt++)
4162                 {
4163                   rf = &repeat_factor_vec[elt];
4164                   print_generic_expr (dump_file, rf->factor, 0);
4165                   if (elt < vec_len - 1)
4166                     fputs (" * ", dump_file);
4167                 }
4168               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4169             }
4170
4171           reassociate_stats.pows_created++;
4172
4173           /* Visit each element of the vector in reverse order (so that
4174              high-occurrence elements are visited first, and within the
4175              same occurrence count, lower-ranked elements are visited
4176              first).  Form a linear product of all elements in this order
4177              whose occurrencce count is at least that of element J.
4178              Record the SSA name representing the product of each element
4179              with all subsequent elements in the vector.  */
4180           if (j == vec_len - 1)
4181             rf1->repr = rf1->factor;
4182           else
4183             {
4184               for (ii = vec_len - 2; ii >= (int)j; ii--)
4185                 {
4186                   tree op1, op2;
4187
4188                   rf1 = &repeat_factor_vec[ii];
4189                   rf2 = &repeat_factor_vec[ii + 1];
4190
4191                   /* Init the last factor's representative to be itself.  */
4192                   if (!rf2->repr)
4193                     rf2->repr = rf2->factor;
4194
4195                   op1 = rf1->factor;
4196                   op2 = rf2->repr;
4197
4198                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4199                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4200                                                            target_ssa,
4201                                                            op1, op2);
4202                   gimple_set_location (mul_stmt, gimple_location (stmt));
4203                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4204                   rf1->repr = target_ssa;
4205
4206                   /* Don't reprocess the multiply we just introduced.  */
4207                   gimple_set_visited (mul_stmt, true);
4208                 }
4209             }
4210
4211           /* Form a call to __builtin_powi for the maximum product
4212              just formed, raised to the power obtained earlier.  */
4213           rf1 = &repeat_factor_vec[j];
4214           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4215           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4216                                         build_int_cst (integer_type_node,
4217                                                        power));
4218           gimple_call_set_lhs (pow_stmt, iter_result);
4219           gimple_set_location (pow_stmt, gimple_location (stmt));
4220           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4221         }
4222
4223       /* If we previously formed at least one other builtin_powi call,
4224          form the product of this one and those others.  */
4225       if (result)
4226         {
4227           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4228           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4229                                                    result, iter_result);
4230           gimple_set_location (mul_stmt, gimple_location (stmt));
4231           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4232           gimple_set_visited (mul_stmt, true);
4233           result = new_result;
4234         }
4235       else
4236         result = iter_result;
4237
4238       /* Decrement the occurrence count of each element in the product
4239          by the count found above, and remove this many copies of each
4240          factor from OPS.  */
4241       for (i = j; i < vec_len; i++)
4242         {
4243           unsigned k = power;
4244           unsigned n;
4245
4246           rf1 = &repeat_factor_vec[i];
4247           rf1->count -= power;
4248
4249           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4250             {
4251               if (oe->op == rf1->factor)
4252                 {
4253                   if (oe->count <= k)
4254                     {
4255                       ops->ordered_remove (n);
4256                       k -= oe->count;
4257
4258                       if (k == 0)
4259                         break;
4260                     }
4261                   else
4262                     {
4263                       oe->count -= k;
4264                       break;
4265                     }
4266                 }
4267             }
4268         }
4269     }
4270
4271   /* At this point all elements in the repeated factor vector have a
4272      remaining occurrence count of 0 or 1, and those with a count of 1
4273      don't have cached representatives.  Re-sort the ops vector and
4274      clean up.  */
4275   ops->qsort (sort_by_operand_rank);
4276   repeat_factor_vec.release ();
4277
4278   /* Return the final product computed herein.  Note that there may
4279      still be some elements with single occurrence count left in OPS;
4280      those will be handled by the normal reassociation logic.  */
4281   return result;
4282 }
4283
4284 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4285
4286 static void
4287 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4288 {
4289   tree rhs1;
4290
4291   if (dump_file && (dump_flags & TDF_DETAILS))
4292     {
4293       fprintf (dump_file, "Transforming ");
4294       print_gimple_stmt (dump_file, stmt, 0, 0);
4295     }
4296
4297   rhs1 = gimple_assign_rhs1 (stmt);
4298   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4299   update_stmt (stmt);
4300   remove_visited_stmt_chain (rhs1);
4301
4302   if (dump_file && (dump_flags & TDF_DETAILS))
4303     {
4304       fprintf (dump_file, " into ");
4305       print_gimple_stmt (dump_file, stmt, 0, 0);
4306     }
4307 }
4308
4309 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4310
4311 static void
4312 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4313                             tree rhs1, tree rhs2)
4314 {
4315   if (dump_file && (dump_flags & TDF_DETAILS))
4316     {
4317       fprintf (dump_file, "Transforming ");
4318       print_gimple_stmt (dump_file, stmt, 0, 0);
4319     }
4320
4321   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4322   update_stmt (gsi_stmt (*gsi));
4323   remove_visited_stmt_chain (rhs1);
4324
4325   if (dump_file && (dump_flags & TDF_DETAILS))
4326     {
4327       fprintf (dump_file, " into ");
4328       print_gimple_stmt (dump_file, stmt, 0, 0);
4329     }
4330 }
4331
4332 /* Reassociate expressions in basic block BB and its post-dominator as
4333    children.  */
4334
4335 static void
4336 reassociate_bb (basic_block bb)
4337 {
4338   gimple_stmt_iterator gsi;
4339   basic_block son;
4340   gimple stmt = last_stmt (bb);
4341
4342   if (stmt && !gimple_visited_p (stmt))
4343     maybe_optimize_range_tests (stmt);
4344
4345   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4346     {
4347       stmt = gsi_stmt (gsi);
4348
4349       if (is_gimple_assign (stmt)
4350           && !stmt_could_throw_p (stmt))
4351         {
4352           tree lhs, rhs1, rhs2;
4353           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4354
4355           /* If this is not a gimple binary expression, there is
4356              nothing for us to do with it.  */
4357           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4358             continue;
4359
4360           /* If this was part of an already processed statement,
4361              we don't need to touch it again. */
4362           if (gimple_visited_p (stmt))
4363             {
4364               /* This statement might have become dead because of previous
4365                  reassociations.  */
4366               if (has_zero_uses (gimple_get_lhs (stmt)))
4367                 {
4368                   gsi_remove (&gsi, true);
4369                   release_defs (stmt);
4370                   /* We might end up removing the last stmt above which
4371                      places the iterator to the end of the sequence.
4372                      Reset it to the last stmt in this case which might
4373                      be the end of the sequence as well if we removed
4374                      the last statement of the sequence.  In which case
4375                      we need to bail out.  */
4376                   if (gsi_end_p (gsi))
4377                     {
4378                       gsi = gsi_last_bb (bb);
4379                       if (gsi_end_p (gsi))
4380                         break;
4381                     }
4382                 }
4383               continue;
4384             }
4385
4386           lhs = gimple_assign_lhs (stmt);
4387           rhs1 = gimple_assign_rhs1 (stmt);
4388           rhs2 = gimple_assign_rhs2 (stmt);
4389
4390           /* For non-bit or min/max operations we can't associate
4391              all types.  Verify that here.  */
4392           if (rhs_code != BIT_IOR_EXPR
4393               && rhs_code != BIT_AND_EXPR
4394               && rhs_code != BIT_XOR_EXPR
4395               && rhs_code != MIN_EXPR
4396               && rhs_code != MAX_EXPR
4397               && (!can_reassociate_p (lhs)
4398                   || !can_reassociate_p (rhs1)
4399                   || !can_reassociate_p (rhs2)))
4400             continue;
4401
4402           if (associative_tree_code (rhs_code))
4403             {
4404               vec<operand_entry_t> ops = vNULL;
4405               tree powi_result = NULL_TREE;
4406
4407               /* There may be no immediate uses left by the time we
4408                  get here because we may have eliminated them all.  */
4409               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4410                 continue;
4411
4412               gimple_set_visited (stmt, true);
4413               linearize_expr_tree (&ops, stmt, true, true);
4414               ops.qsort (sort_by_operand_rank);
4415               optimize_ops_list (rhs_code, &ops);
4416               if (undistribute_ops_list (rhs_code, &ops,
4417                                          loop_containing_stmt (stmt)))
4418                 {
4419                   ops.qsort (sort_by_operand_rank);
4420                   optimize_ops_list (rhs_code, &ops);
4421                 }
4422
4423               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4424                 optimize_range_tests (rhs_code, &ops);
4425
4426               if (first_pass_instance
4427                   && rhs_code == MULT_EXPR
4428                   && flag_unsafe_math_optimizations)
4429                 powi_result = attempt_builtin_powi (stmt, &ops);
4430
4431               /* If the operand vector is now empty, all operands were
4432                  consumed by the __builtin_powi optimization.  */
4433               if (ops.length () == 0)
4434                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4435               else if (ops.length () == 1)
4436                 {
4437                   tree last_op = ops.last ()->op;
4438
4439                   if (powi_result)
4440                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4441                                                 powi_result);
4442                   else
4443                     transform_stmt_to_copy (&gsi, stmt, last_op);
4444                 }
4445               else
4446                 {
4447                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4448                   int ops_num = ops.length ();
4449                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4450                   tree new_lhs = lhs;
4451
4452                   if (dump_file && (dump_flags & TDF_DETAILS))
4453                     fprintf (dump_file,
4454                              "Width = %d was chosen for reassociation\n", width);
4455
4456                   if (width > 1
4457                       && ops.length () > 3)
4458                     rewrite_expr_tree_parallel (stmt, width, ops);
4459                   else
4460                     {
4461                       /* When there are three operands left, we want
4462                          to make sure the ones that get the double
4463                          binary op are chosen wisely.  */
4464                       int len = ops.length ();
4465                       if (len >= 3)
4466                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4467
4468                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4469                                                    powi_result != NULL);
4470                     }
4471
4472                   /* If we combined some repeated factors into a
4473                      __builtin_powi call, multiply that result by the
4474                      reassociated operands.  */
4475                   if (powi_result)
4476                     {
4477                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4478                       tree type = TREE_TYPE (lhs);
4479                       tree target_ssa = make_temp_ssa_name (type, NULL,
4480                                                             "reassocpow");
4481                       gimple_set_lhs (lhs_stmt, target_ssa);
4482                       update_stmt (lhs_stmt);
4483                       if (lhs != new_lhs)
4484                         target_ssa = new_lhs;
4485                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4486                                                                powi_result,
4487                                                                target_ssa);
4488                       gimple_set_location (mul_stmt, gimple_location (stmt));
4489                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4490                     }
4491                 }
4492
4493               ops.release ();
4494             }
4495         }
4496     }
4497   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4498        son;
4499        son = next_dom_son (CDI_POST_DOMINATORS, son))
4500     reassociate_bb (son);
4501 }
4502
4503 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4504 void debug_ops_vector (vec<operand_entry_t> ops);
4505
4506 /* Dump the operand entry vector OPS to FILE.  */
4507
4508 void
4509 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4510 {
4511   operand_entry_t oe;
4512   unsigned int i;
4513
4514   FOR_EACH_VEC_ELT (ops, i, oe)
4515     {
4516       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4517       print_generic_expr (file, oe->op, 0);
4518     }
4519 }
4520
4521 /* Dump the operand entry vector OPS to STDERR.  */
4522
4523 DEBUG_FUNCTION void
4524 debug_ops_vector (vec<operand_entry_t> ops)
4525 {
4526   dump_ops_vector (stderr, ops);
4527 }
4528
4529 static void
4530 do_reassoc (void)
4531 {
4532   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4533   reassociate_bb (EXIT_BLOCK_PTR);
4534 }
4535
4536 /* Initialize the reassociation pass.  */
4537
4538 static void
4539 init_reassoc (void)
4540 {
4541   int i;
4542   long rank = 2;
4543   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4544
4545   /* Find the loops, so that we can prevent moving calculations in
4546      them.  */
4547   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4548
4549   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4550
4551   operand_entry_pool = create_alloc_pool ("operand entry pool",
4552                                           sizeof (struct operand_entry), 30);
4553   next_operand_entry_id = 0;
4554
4555   /* Reverse RPO (Reverse Post Order) will give us something where
4556      deeper loops come later.  */
4557   pre_and_rev_post_order_compute (NULL, bbs, false);
4558   bb_rank = XCNEWVEC (long, last_basic_block);
4559   operand_rank = pointer_map_create ();
4560
4561   /* Give each default definition a distinct rank.  This includes
4562      parameters and the static chain.  Walk backwards over all
4563      SSA names so that we get proper rank ordering according
4564      to tree_swap_operands_p.  */
4565   for (i = num_ssa_names - 1; i > 0; --i)
4566     {
4567       tree name = ssa_name (i);
4568       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4569         insert_operand_rank (name, ++rank);
4570     }
4571
4572   /* Set up rank for each BB  */
4573   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4574     bb_rank[bbs[i]] = ++rank  << 16;
4575
4576   free (bbs);
4577   calculate_dominance_info (CDI_POST_DOMINATORS);
4578   plus_negates = vNULL;
4579 }
4580
4581 /* Cleanup after the reassociation pass, and print stats if
4582    requested.  */
4583
4584 static void
4585 fini_reassoc (void)
4586 {
4587   statistics_counter_event (cfun, "Linearized",
4588                             reassociate_stats.linearized);
4589   statistics_counter_event (cfun, "Constants eliminated",
4590                             reassociate_stats.constants_eliminated);
4591   statistics_counter_event (cfun, "Ops eliminated",
4592                             reassociate_stats.ops_eliminated);
4593   statistics_counter_event (cfun, "Statements rewritten",
4594                             reassociate_stats.rewritten);
4595   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4596                             reassociate_stats.pows_encountered);
4597   statistics_counter_event (cfun, "Built-in powi calls created",
4598                             reassociate_stats.pows_created);
4599
4600   pointer_map_destroy (operand_rank);
4601   free_alloc_pool (operand_entry_pool);
4602   free (bb_rank);
4603   plus_negates.release ();
4604   free_dominance_info (CDI_POST_DOMINATORS);
4605   loop_optimizer_finalize ();
4606 }
4607
4608 /* Gate and execute functions for Reassociation.  */
4609
4610 static unsigned int
4611 execute_reassoc (void)
4612 {
4613   init_reassoc ();
4614
4615   do_reassoc ();
4616   repropagate_negates ();
4617
4618   fini_reassoc ();
4619   return 0;
4620 }
4621
4622 static bool
4623 gate_tree_ssa_reassoc (void)
4624 {
4625   return flag_tree_reassoc != 0;
4626 }
4627
4628 namespace {
4629
4630 const pass_data pass_data_reassoc =
4631 {
4632   GIMPLE_PASS, /* type */
4633   "reassoc", /* name */
4634   OPTGROUP_NONE, /* optinfo_flags */
4635   true, /* has_gate */
4636   true, /* has_execute */
4637   TV_TREE_REASSOC, /* tv_id */
4638   ( PROP_cfg | PROP_ssa ), /* properties_required */
4639   0, /* properties_provided */
4640   0, /* properties_destroyed */
4641   0, /* todo_flags_start */
4642   ( TODO_verify_ssa
4643     | TODO_update_ssa_only_virtuals
4644     | TODO_verify_flow ), /* todo_flags_finish */
4645 };
4646
4647 class pass_reassoc : public gimple_opt_pass
4648 {
4649 public:
4650   pass_reassoc (gcc::context *ctxt)
4651     : gimple_opt_pass (pass_data_reassoc, ctxt)
4652   {}
4653
4654   /* opt_pass methods: */
4655   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4656   bool gate () { return gate_tree_ssa_reassoc (); }
4657   unsigned int execute () { return execute_reassoc (); }
4658
4659 }; // class pass_reassoc
4660
4661 } // anon namespace
4662
4663 gimple_opt_pass *
4664 make_pass_reassoc (gcc::context *ctxt)
4665 {
4666   return new pass_reassoc (ctxt);
4667 }