gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Daniel Berlin <dan@dberlin.org>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "tree.h"
  27 #include "basic-block.h"
  28 #include "gimple-pretty-print.h"
  29 #include "tree-inline.h"
  30 #include "tree-flow.h"
  31 #include "gimple.h"
  32 #include "tree-iterator.h"
  33 #include "tree-pass.h"
  34 #include "alloc-pool.h"
  35 #include "vec.h"
  36 #include "langhooks.h"
  37 #include "pointer-set.h"
  38 #include "cfgloop.h"
  39 #include "flags.h"
  40 #include "target.h"
  41 #include "params.h"
  42 #include "diagnostic-core.h"
  43
  44 /*  This is a simple global reassociation pass.  It is, in part, based
  45     on the LLVM pass of the same name (They do some things more/less
  46     than we do, in different orders, etc).
  47
  48     It consists of five steps:
  49
  50     1. Breaking up subtract operations into addition + negate, where
  51     it would promote the reassociation of adds.
  52
  53     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  54     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  55     During linearization, we place the operands of the binary
  56     expressions into a vector of operand_entry_t
  57
  58     3. Optimization of the operand lists, eliminating things like a +
  59     -a, a & a, etc.
  60
  61     3a. Combine repeated factors with the same occurrence counts
  62     into a __builtin_powi call that will later be optimized into
  63     an optimal number of multiplies.
  64
  65     4. Rewrite the expression trees we linearized and optimized so
  66     they are in proper rank order.
  67
  68     5. Repropagate negates, as nothing else will clean it up ATM.
  69
  70     A bit of theory on #4, since nobody seems to write anything down
  71     about why it makes sense to do it the way they do it:
  72
  73     We could do this much nicer theoretically, but don't (for reasons
  74     explained after how to do it theoretically nice :P).
  75
  76     In order to promote the most redundancy elimination, you want
  77     binary expressions whose operands are the same rank (or
  78     preferably, the same value) exposed to the redundancy eliminator,
  79     for possible elimination.
  80
  81     So the way to do this if we really cared, is to build the new op
  82     tree from the leaves to the roots, merging as you go, and putting the
  83     new op on the end of the worklist, until you are left with one
  84     thing on the worklist.
  85
  86     IE if you have to rewrite the following set of operands (listed with
  87     rank in parentheses), with opcode PLUS_EXPR:
  88
  89     a (1),  b (1),  c (1),  d (2), e (2)
  90
  91
  92     We start with our merge worklist empty, and the ops list with all of
  93     those on it.
  94
  95     You want to first merge all leaves of the same rank, as much as
  96     possible.
  97
  98     So first build a binary op of
  99
 100     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 101
 102     Because there is no three operand form of PLUS_EXPR, c is not going to
 103     be exposed to redundancy elimination as a rank 1 operand.
 104
 105     So you might as well throw it on the merge worklist (you could also
 106     consider it to now be a rank two operand, and merge it with d and e,
 107     but in this case, you then have evicted e from a binary op. So at
 108     least in this situation, you can't win.)
 109
 110     Then build a binary op of d + e
 111     mergetmp2 = d + e
 112
 113     and put mergetmp2 on the merge worklist.
 114
 115     so merge worklist = {mergetmp, c, mergetmp2}
 116
 117     Continue building binary ops of these operations until you have only
 118     one operation left on the worklist.
 119
 120     So we have
 121
 122     build binary op
 123     mergetmp3 = mergetmp + c
 124
 125     worklist = {mergetmp2, mergetmp3}
 126
 127     mergetmp4 = mergetmp2 + mergetmp3
 128
 129     worklist = {mergetmp4}
 130
 131     because we have one operation left, we can now just set the original
 132     statement equal to the result of that operation.
 133
 134     This will at least expose a + b  and d + e to redundancy elimination
 135     as binary operations.
 136
 137     For extra points, you can reuse the old statements to build the
 138     mergetmps, since you shouldn't run out.
 139
 140     So why don't we do this?
 141
 142     Because it's expensive, and rarely will help.  Most trees we are
 143     reassociating have 3 or less ops.  If they have 2 ops, they already
 144     will be written into a nice single binary op.  If you have 3 ops, a
 145     single simple check suffices to tell you whether the first two are of the
 146     same rank.  If so, you know to order it
 147
 148     mergetmp = op1 + op2
 149     newstmt = mergetmp + op3
 150
 151     instead of
 152     mergetmp = op2 + op3
 153     newstmt = mergetmp + op1
 154
 155     If all three are of the same rank, you can't expose them all in a
 156     single binary operator anyway, so the above is *still* the best you
 157     can do.
 158
 159     Thus, this is what we do.  When we have three ops left, we check to see
 160     what order to put them in, and call it a day.  As a nod to vector sum
 161     reduction, we check if any of the ops are really a phi node that is a
 162     destructive update for the associating op, and keep the destructive
 163     update together for vector sum reduction recognition.  */
 164
 165
 166 /* Statistics */
 167 static struct
 168 {
 169   int linearized;
 170   int constants_eliminated;
 171   int ops_eliminated;
 172   int rewritten;
 173   int pows_encountered;
 174   int pows_created;
 175 } reassociate_stats;
 176
 177 /* Operator, rank pair.  */
 178 typedef struct operand_entry
 179 {
 180   unsigned int rank;
 181   int id;
 182   tree op;
 183   unsigned int count;
 184 } *operand_entry_t;
 185
 186 static alloc_pool operand_entry_pool;
 187
 188 /* This is used to assign a unique ID to each struct operand_entry
 189    so that qsort results are identical on different hosts.  */
 190 static int next_operand_entry_id;
 191
 192 /* Starting rank number for a given basic block, so that we can rank
 193    operations using unmovable instructions in that BB based on the bb
 194    depth.  */
 195 static long *bb_rank;
 196
 197 /* Operand->rank hashtable.  */
 198 static struct pointer_map_t *operand_rank;
 199
 200 /* Forward decls.  */
 201 static long get_rank (tree);
 202
 203
 204 /* Bias amount for loop-carried phis.  We want this to be larger than
 205    the depth of any reassociation tree we can see, but not larger than
 206    the rank difference between two blocks.  */
 207 #define PHI_LOOP_BIAS (1 << 15)
 208
 209 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 210    an innermost loop, and the phi has only a single use which is inside
 211    the loop, then the rank is the block rank of the loop latch plus an
 212    extra bias for the loop-carried dependence.  This causes expressions
 213    calculated into an accumulator variable to be independent for each
 214    iteration of the loop.  If STMT is some other phi, the rank is the
 215    block rank of its containing block.  */
 216 static long
 217 phi_rank (gimple stmt)
 218 {
 219   basic_block bb = gimple_bb (stmt);
 220   struct loop *father = bb->loop_father;
 221   tree res;
 222   unsigned i;
 223   use_operand_p use;
 224   gimple use_stmt;
 225
 226   /* We only care about real loops (those with a latch).  */
 227   if (!father->latch)
 228     return bb_rank[bb->index];
 229
 230   /* Interesting phis must be in headers of innermost loops.  */
 231   if (bb != father->header
 232       || father->inner)
 233     return bb_rank[bb->index];
 234
 235   /* Ignore virtual SSA_NAMEs.  */
 236   res = gimple_phi_result (stmt);
 237   if (virtual_operand_p (res))
 238     return bb_rank[bb->index];
 239
 240   /* The phi definition must have a single use, and that use must be
 241      within the loop.  Otherwise this isn't an accumulator pattern.  */
 242   if (!single_imm_use (res, &use, &use_stmt)
 243       || gimple_bb (use_stmt)->loop_father != father)
 244     return bb_rank[bb->index];
 245
 246   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 247   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 248     {
 249       tree arg = gimple_phi_arg_def (stmt, i);
 250       if (TREE_CODE (arg) == SSA_NAME
 251           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 252         {
 253           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 254           if (gimple_bb (def_stmt)->loop_father == father)
 255             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 256         }
 257     }
 258
 259   /* Must be an uninteresting phi.  */
 260   return bb_rank[bb->index];
 261 }
 262
 263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 264    loop-carried dependence of an innermost loop, return TRUE; else
 265    return FALSE.  */
 266 static bool
 267 loop_carried_phi (tree exp)
 268 {
 269   gimple phi_stmt;
 270   long block_rank;
 271
 272   if (TREE_CODE (exp) != SSA_NAME
 273       || SSA_NAME_IS_DEFAULT_DEF (exp))
 274     return false;
 275
 276   phi_stmt = SSA_NAME_DEF_STMT (exp);
 277
 278   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 279     return false;
 280
 281   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 282      an additional bias added in.  If this phi doesn't have block rank,
 283      it's biased and should not be propagated.  */
 284   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 285
 286   if (phi_rank (phi_stmt) != block_rank)
 287     return true;
 288
 289   return false;
 290 }
 291
 292 /* Return the maximum of RANK and the rank that should be propagated
 293    from expression OP.  For most operands, this is just the rank of OP.
 294    For loop-carried phis, the value is zero to avoid undoing the bias
 295    in favor of the phi.  */
 296 static long
 297 propagate_rank (long rank, tree op)
 298 {
 299   long op_rank;
 300
 301   if (loop_carried_phi (op))
 302     return rank;
 303
 304   op_rank = get_rank (op);
 305
 306   return MAX (rank, op_rank);
 307 }
 308
 309 /* Look up the operand rank structure for expression E.  */
 310
 311 static inline long
 312 find_operand_rank (tree e)
 313 {
 314   void **slot = pointer_map_contains (operand_rank, e);
 315   return slot ? (long) (intptr_t) *slot : -1;
 316 }
 317
 318 /* Insert {E,RANK} into the operand rank hashtable.  */
 319
 320 static inline void
 321 insert_operand_rank (tree e, long rank)
 322 {
 323   void **slot;
 324   gcc_assert (rank > 0);
 325   slot = pointer_map_insert (operand_rank, e);
 326   gcc_assert (!*slot);
 327   *slot = (void *) (intptr_t) rank;
 328 }
 329
 330 /* Given an expression E, return the rank of the expression.  */
 331
 332 static long
 333 get_rank (tree e)
 334 {
 335   /* Constants have rank 0.  */
 336   if (is_gimple_min_invariant (e))
 337     return 0;
 338
 339   /* SSA_NAME's have the rank of the expression they are the result
 340      of.
 341      For globals and uninitialized values, the rank is 0.
 342      For function arguments, use the pre-setup rank.
 343      For PHI nodes, stores, asm statements, etc, we use the rank of
 344      the BB.
 345      For simple operations, the rank is the maximum rank of any of
 346      its operands, or the bb_rank, whichever is less.
 347      I make no claims that this is optimal, however, it gives good
 348      results.  */
 349
 350   /* We make an exception to the normal ranking system to break
 351      dependences of accumulator variables in loops.  Suppose we
 352      have a simple one-block loop containing:
 353
 354        x_1 = phi(x_0, x_2)
 355        b = a + x_1
 356        c = b + d
 357        x_2 = c + e
 358
 359      As shown, each iteration of the calculation into x is fully
 360      dependent upon the iteration before it.  We would prefer to
 361      see this in the form:
 362
 363        x_1 = phi(x_0, x_2)
 364        b = a + d
 365        c = b + e
 366        x_2 = c + x_1
 367
 368      If the loop is unrolled, the calculations of b and c from
 369      different iterations can be interleaved.
 370
 371      To obtain this result during reassociation, we bias the rank
 372      of the phi definition x_1 upward, when it is recognized as an
 373      accumulator pattern.  The artificial rank causes it to be
 374      added last, providing the desired independence.  */
 375
 376   if (TREE_CODE (e) == SSA_NAME)
 377     {
 378       gimple stmt;
 379       long rank;
 380       int i, n;
 381       tree op;
 382
 383       if (SSA_NAME_IS_DEFAULT_DEF (e))
 384         return find_operand_rank (e);
 385
 386       stmt = SSA_NAME_DEF_STMT (e);
 387       if (gimple_code (stmt) == GIMPLE_PHI)
 388         return phi_rank (stmt);
 389
 390       if (!is_gimple_assign (stmt)
 391           || gimple_vdef (stmt))
 392         return bb_rank[gimple_bb (stmt)->index];
 393
 394       /* If we already have a rank for this expression, use that.  */
 395       rank = find_operand_rank (e);
 396       if (rank != -1)
 397         return rank;
 398
 399       /* Otherwise, find the maximum rank for the operands.  As an
 400          exception, remove the bias from loop-carried phis when propagating
 401          the rank so that dependent operations are not also biased.  */
 402       rank = 0;
 403       if (gimple_assign_single_p (stmt))
 404         {
 405           tree rhs = gimple_assign_rhs1 (stmt);
 406           n = TREE_OPERAND_LENGTH (rhs);
 407           if (n == 0)
 408             rank = propagate_rank (rank, rhs);
 409           else
 410             {
 411               for (i = 0; i < n; i++)
 412                 {
 413                   op = TREE_OPERAND (rhs, i);
 414
 415                   if (op != NULL_TREE)
 416                     rank = propagate_rank (rank, op);
 417                 }
 418             }
 419         }
 420       else
 421         {
 422           n = gimple_num_ops (stmt);
 423           for (i = 1; i < n; i++)
 424             {
 425               op = gimple_op (stmt, i);
 426               gcc_assert (op);
 427               rank = propagate_rank (rank, op);
 428             }
 429         }
 430
 431       if (dump_file && (dump_flags & TDF_DETAILS))
 432         {
 433           fprintf (dump_file, "Rank for ");
 434           print_generic_expr (dump_file, e, 0);
 435           fprintf (dump_file, " is %ld\n", (rank + 1));
 436         }
 437
 438       /* Note the rank in the hashtable so we don't recompute it.  */
 439       insert_operand_rank (e, (rank + 1));
 440       return (rank + 1);
 441     }
 442
 443   /* Globals, etc,  are rank 0 */
 444   return 0;
 445 }
 446
 447 DEF_VEC_P(operand_entry_t);
 448 DEF_VEC_ALLOC_P(operand_entry_t, heap);
 449
 450 /* We want integer ones to end up last no matter what, since they are
 451    the ones we can do the most with.  */
 452 #define INTEGER_CONST_TYPE 1 << 3
 453 #define FLOAT_CONST_TYPE 1 << 2
 454 #define OTHER_CONST_TYPE 1 << 1
 455
 456 /* Classify an invariant tree into integer, float, or other, so that
 457    we can sort them to be near other constants of the same type.  */
 458 static inline int
 459 constant_type (tree t)
 460 {
 461   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 462     return INTEGER_CONST_TYPE;
 463   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 464     return FLOAT_CONST_TYPE;
 465   else
 466     return OTHER_CONST_TYPE;
 467 }
 468
 469 /* qsort comparison function to sort operand entries PA and PB by rank
 470    so that the sorted array is ordered by rank in decreasing order.  */
 471 static int
 472 sort_by_operand_rank (const void *pa, const void *pb)
 473 {
 474   const operand_entry_t oea = *(const operand_entry_t *)pa;
 475   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 476
 477   /* It's nicer for optimize_expression if constants that are likely
 478      to fold when added/multiplied//whatever are put next to each
 479      other.  Since all constants have rank 0, order them by type.  */
 480   if (oeb->rank == 0 && oea->rank == 0)
 481     {
 482       if (constant_type (oeb->op) != constant_type (oea->op))
 483         return constant_type (oeb->op) - constant_type (oea->op);
 484       else
 485         /* To make sorting result stable, we use unique IDs to determine
 486            order.  */
 487         return oeb->id - oea->id;
 488     }
 489
 490   /* Lastly, make sure the versions that are the same go next to each
 491      other.  We use SSA_NAME_VERSION because it's stable.  */
 492   if ((oeb->rank - oea->rank == 0)
 493       && TREE_CODE (oea->op) == SSA_NAME
 494       && TREE_CODE (oeb->op) == SSA_NAME)
 495     {
 496       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 497         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 498       else
 499         return oeb->id - oea->id;
 500     }
 501
 502   if (oeb->rank != oea->rank)
 503     return oeb->rank - oea->rank;
 504   else
 505     return oeb->id - oea->id;
 506 }
 507
 508 /* Add an operand entry to *OPS for the tree operand OP.  */
 509
 510 static void
 511 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
 512 {
 513   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 514
 515   oe->op = op;
 516   oe->rank = get_rank (op);
 517   oe->id = next_operand_entry_id++;
 518   oe->count = 1;
 519   VEC_safe_push (operand_entry_t, heap, *ops, oe);
 520 }
 521
 522 /* Add an operand entry to *OPS for the tree operand OP with repeat
 523    count REPEAT.  */
 524
 525 static void
 526 add_repeat_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op,
 527                        HOST_WIDE_INT repeat)
 528 {
 529   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 530
 531   oe->op = op;
 532   oe->rank = get_rank (op);
 533   oe->id = next_operand_entry_id++;
 534   oe->count = repeat;
 535   VEC_safe_push (operand_entry_t, heap, *ops, oe);
 536
 537   reassociate_stats.pows_encountered++;
 538 }
 539
 540 /* Return true if STMT is reassociable operation containing a binary
 541    operation with tree code CODE, and is inside LOOP.  */
 542
 543 static bool
 544 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 545 {
 546   basic_block bb = gimple_bb (stmt);
 547
 548   if (gimple_bb (stmt) == NULL)
 549     return false;
 550
 551   if (!flow_bb_inside_loop_p (loop, bb))
 552     return false;
 553
 554   if (is_gimple_assign (stmt)
 555       && gimple_assign_rhs_code (stmt) == code
 556       && has_single_use (gimple_assign_lhs (stmt)))
 557     return true;
 558
 559   return false;
 560 }
 561
 562
 563 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 564    operand of the negate operation.  Otherwise, return NULL.  */
 565
 566 static tree
 567 get_unary_op (tree name, enum tree_code opcode)
 568 {
 569   gimple stmt = SSA_NAME_DEF_STMT (name);
 570
 571   if (!is_gimple_assign (stmt))
 572     return NULL_TREE;
 573
 574   if (gimple_assign_rhs_code (stmt) == opcode)
 575     return gimple_assign_rhs1 (stmt);
 576   return NULL_TREE;
 577 }
 578
 579 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 580    eliminate through equivalences, do so, remove them from OPS, and
 581    return true.  Otherwise, return false.  */
 582
 583 static bool
 584 eliminate_duplicate_pair (enum tree_code opcode,
 585                           VEC (operand_entry_t, heap) **ops,
 586                           bool *all_done,
 587                           unsigned int i,
 588                           operand_entry_t curr,
 589                           operand_entry_t last)
 590 {
 591
 592   /* If we have two of the same op, and the opcode is & |, min, or max,
 593      we can eliminate one of them.
 594      If we have two of the same op, and the opcode is ^, we can
 595      eliminate both of them.  */
 596
 597   if (last && last->op == curr->op)
 598     {
 599       switch (opcode)
 600         {
 601         case MAX_EXPR:
 602         case MIN_EXPR:
 603         case BIT_IOR_EXPR:
 604         case BIT_AND_EXPR:
 605           if (dump_file && (dump_flags & TDF_DETAILS))
 606             {
 607               fprintf (dump_file, "Equivalence: ");
 608               print_generic_expr (dump_file, curr->op, 0);
 609               fprintf (dump_file, " [&|minmax] ");
 610               print_generic_expr (dump_file, last->op, 0);
 611               fprintf (dump_file, " -> ");
 612               print_generic_stmt (dump_file, last->op, 0);
 613             }
 614
 615           VEC_ordered_remove (operand_entry_t, *ops, i);
 616           reassociate_stats.ops_eliminated ++;
 617
 618           return true;
 619
 620         case BIT_XOR_EXPR:
 621           if (dump_file && (dump_flags & TDF_DETAILS))
 622             {
 623               fprintf (dump_file, "Equivalence: ");
 624               print_generic_expr (dump_file, curr->op, 0);
 625               fprintf (dump_file, " ^ ");
 626               print_generic_expr (dump_file, last->op, 0);
 627               fprintf (dump_file, " -> nothing\n");
 628             }
 629
 630           reassociate_stats.ops_eliminated += 2;
 631
 632           if (VEC_length (operand_entry_t, *ops) == 2)
 633             {
 634               VEC_free (operand_entry_t, heap, *ops);
 635               *ops = NULL;
 636               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 637               *all_done = true;
 638             }
 639           else
 640             {
 641               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 642               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 643             }
 644
 645           return true;
 646
 647         default:
 648           break;
 649         }
 650     }
 651   return false;
 652 }
 653
 654 static VEC(tree, heap) *plus_negates;
 655
 656 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 657    expression, look in OPS for a corresponding positive operation to cancel
 658    it out.  If we find one, remove the other from OPS, replace
 659    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 660    return false. */
 661
 662 static bool
 663 eliminate_plus_minus_pair (enum tree_code opcode,
 664                            VEC (operand_entry_t, heap) **ops,
 665                            unsigned int currindex,
 666                            operand_entry_t curr)
 667 {
 668   tree negateop;
 669   tree notop;
 670   unsigned int i;
 671   operand_entry_t oe;
 672
 673   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 674     return false;
 675
 676   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 677   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 678   if (negateop == NULL_TREE && notop == NULL_TREE)
 679     return false;
 680
 681   /* Any non-negated version will have a rank that is one less than
 682      the current rank.  So once we hit those ranks, if we don't find
 683      one, we can stop.  */
 684
 685   for (i = currindex + 1;
 686        VEC_iterate (operand_entry_t, *ops, i, oe)
 687        && oe->rank >= curr->rank - 1 ;
 688        i++)
 689     {
 690       if (oe->op == negateop)
 691         {
 692
 693           if (dump_file && (dump_flags & TDF_DETAILS))
 694             {
 695               fprintf (dump_file, "Equivalence: ");
 696               print_generic_expr (dump_file, negateop, 0);
 697               fprintf (dump_file, " + -");
 698               print_generic_expr (dump_file, oe->op, 0);
 699               fprintf (dump_file, " -> 0\n");
 700             }
 701
 702           VEC_ordered_remove (operand_entry_t, *ops, i);
 703           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 704           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 705           reassociate_stats.ops_eliminated ++;
 706
 707           return true;
 708         }
 709       else if (oe->op == notop)
 710         {
 711           tree op_type = TREE_TYPE (oe->op);
 712
 713           if (dump_file && (dump_flags & TDF_DETAILS))
 714             {
 715               fprintf (dump_file, "Equivalence: ");
 716               print_generic_expr (dump_file, notop, 0);
 717               fprintf (dump_file, " + ~");
 718               print_generic_expr (dump_file, oe->op, 0);
 719               fprintf (dump_file, " -> -1\n");
 720             }
 721
 722           VEC_ordered_remove (operand_entry_t, *ops, i);
 723           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 724           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 725           reassociate_stats.ops_eliminated ++;
 726
 727           return true;
 728         }
 729     }
 730
 731   /* CURR->OP is a negate expr in a plus expr: save it for later
 732      inspection in repropagate_negates().  */
 733   if (negateop != NULL_TREE)
 734     VEC_safe_push (tree, heap, plus_negates, curr->op);
 735
 736   return false;
 737 }
 738
 739 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 740    bitwise not expression, look in OPS for a corresponding operand to
 741    cancel it out.  If we find one, remove the other from OPS, replace
 742    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 743    false. */
 744
 745 static bool
 746 eliminate_not_pairs (enum tree_code opcode,
 747                      VEC (operand_entry_t, heap) **ops,
 748                      unsigned int currindex,
 749                      operand_entry_t curr)
 750 {
 751   tree notop;
 752   unsigned int i;
 753   operand_entry_t oe;
 754
 755   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 756       || TREE_CODE (curr->op) != SSA_NAME)
 757     return false;
 758
 759   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 760   if (notop == NULL_TREE)
 761     return false;
 762
 763   /* Any non-not version will have a rank that is one less than
 764      the current rank.  So once we hit those ranks, if we don't find
 765      one, we can stop.  */
 766
 767   for (i = currindex + 1;
 768        VEC_iterate (operand_entry_t, *ops, i, oe)
 769        && oe->rank >= curr->rank - 1;
 770        i++)
 771     {
 772       if (oe->op == notop)
 773         {
 774           if (dump_file && (dump_flags & TDF_DETAILS))
 775             {
 776               fprintf (dump_file, "Equivalence: ");
 777               print_generic_expr (dump_file, notop, 0);
 778               if (opcode == BIT_AND_EXPR)
 779                 fprintf (dump_file, " & ~");
 780               else if (opcode == BIT_IOR_EXPR)
 781                 fprintf (dump_file, " | ~");
 782               print_generic_expr (dump_file, oe->op, 0);
 783               if (opcode == BIT_AND_EXPR)
 784                 fprintf (dump_file, " -> 0\n");
 785               else if (opcode == BIT_IOR_EXPR)
 786                 fprintf (dump_file, " -> -1\n");
 787             }
 788
 789           if (opcode == BIT_AND_EXPR)
 790             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 791           else if (opcode == BIT_IOR_EXPR)
 792             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 793                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 794
 795           reassociate_stats.ops_eliminated
 796             += VEC_length (operand_entry_t, *ops) - 1;
 797           VEC_free (operand_entry_t, heap, *ops);
 798           *ops = NULL;
 799           VEC_safe_push (operand_entry_t, heap, *ops, oe);
 800           return true;
 801         }
 802     }
 803
 804   return false;
 805 }
 806
 807 /* Use constant value that may be present in OPS to try to eliminate
 808    operands.  Note that this function is only really used when we've
 809    eliminated ops for other reasons, or merged constants.  Across
 810    single statements, fold already does all of this, plus more.  There
 811    is little point in duplicating logic, so I've only included the
 812    identities that I could ever construct testcases to trigger.  */
 813
 814 static void
 815 eliminate_using_constants (enum tree_code opcode,
 816                            VEC(operand_entry_t, heap) **ops)
 817 {
 818   operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
 819   tree type = TREE_TYPE (oelast->op);
 820
 821   if (oelast->rank == 0
 822       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 823     {
 824       switch (opcode)
 825         {
 826         case BIT_AND_EXPR:
 827           if (integer_zerop (oelast->op))
 828             {
 829               if (VEC_length (operand_entry_t, *ops) != 1)
 830                 {
 831                   if (dump_file && (dump_flags & TDF_DETAILS))
 832                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 833
 834                   reassociate_stats.ops_eliminated
 835                     += VEC_length (operand_entry_t, *ops) - 1;
 836
 837                   VEC_free (operand_entry_t, heap, *ops);
 838                   *ops = NULL;
 839                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 840                   return;
 841                 }
 842             }
 843           else if (integer_all_onesp (oelast->op))
 844             {
 845               if (VEC_length (operand_entry_t, *ops) != 1)
 846                 {
 847                   if (dump_file && (dump_flags & TDF_DETAILS))
 848                     fprintf (dump_file, "Found & -1, removing\n");
 849                   VEC_pop (operand_entry_t, *ops);
 850                   reassociate_stats.ops_eliminated++;
 851                 }
 852             }
 853           break;
 854         case BIT_IOR_EXPR:
 855           if (integer_all_onesp (oelast->op))
 856             {
 857               if (VEC_length (operand_entry_t, *ops) != 1)
 858                 {
 859                   if (dump_file && (dump_flags & TDF_DETAILS))
 860                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 861
 862                   reassociate_stats.ops_eliminated
 863                     += VEC_length (operand_entry_t, *ops) - 1;
 864
 865                   VEC_free (operand_entry_t, heap, *ops);
 866                   *ops = NULL;
 867                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 868                   return;
 869                 }
 870             }
 871           else if (integer_zerop (oelast->op))
 872             {
 873               if (VEC_length (operand_entry_t, *ops) != 1)
 874                 {
 875                   if (dump_file && (dump_flags & TDF_DETAILS))
 876                     fprintf (dump_file, "Found | 0, removing\n");
 877                   VEC_pop (operand_entry_t, *ops);
 878                   reassociate_stats.ops_eliminated++;
 879                 }
 880             }
 881           break;
 882         case MULT_EXPR:
 883           if (integer_zerop (oelast->op)
 884               || (FLOAT_TYPE_P (type)
 885                   && !HONOR_NANS (TYPE_MODE (type))
 886                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 887                   && real_zerop (oelast->op)))
 888             {
 889               if (VEC_length (operand_entry_t, *ops) != 1)
 890                 {
 891                   if (dump_file && (dump_flags & TDF_DETAILS))
 892                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 893
 894                   reassociate_stats.ops_eliminated
 895                     += VEC_length (operand_entry_t, *ops) - 1;
 896                   VEC_free (operand_entry_t, heap, *ops);
 897                   *ops = NULL;
 898                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 899                   return;
 900                 }
 901             }
 902           else if (integer_onep (oelast->op)
 903                    || (FLOAT_TYPE_P (type)
 904                        && !HONOR_SNANS (TYPE_MODE (type))
 905                        && real_onep (oelast->op)))
 906             {
 907               if (VEC_length (operand_entry_t, *ops) != 1)
 908                 {
 909                   if (dump_file && (dump_flags & TDF_DETAILS))
 910                     fprintf (dump_file, "Found * 1, removing\n");
 911                   VEC_pop (operand_entry_t, *ops);
 912                   reassociate_stats.ops_eliminated++;
 913                   return;
 914                 }
 915             }
 916           break;
 917         case BIT_XOR_EXPR:
 918         case PLUS_EXPR:
 919         case MINUS_EXPR:
 920           if (integer_zerop (oelast->op)
 921               || (FLOAT_TYPE_P (type)
 922                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 923                   && fold_real_zero_addition_p (type, oelast->op,
 924                                                 opcode == MINUS_EXPR)))
 925             {
 926               if (VEC_length (operand_entry_t, *ops) != 1)
 927                 {
 928                   if (dump_file && (dump_flags & TDF_DETAILS))
 929                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 930                   VEC_pop (operand_entry_t, *ops);
 931                   reassociate_stats.ops_eliminated++;
 932                   return;
 933                 }
 934             }
 935           break;
 936         default:
 937           break;
 938         }
 939     }
 940 }
 941
 942
 943 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
 944                                  bool, bool);
 945
 946 /* Structure for tracking and counting operands.  */
 947 typedef struct oecount_s {
 948   int cnt;
 949   int id;
 950   enum tree_code oecode;
 951   tree op;
 952 } oecount;
 953
 954 DEF_VEC_O(oecount);
 955 DEF_VEC_ALLOC_O(oecount,heap);
 956
 957 /* The heap for the oecount hashtable and the sorted list of operands.  */
 958 static VEC (oecount, heap) *cvec;
 959
 960 /* Hash function for oecount.  */
 961
 962 static hashval_t
 963 oecount_hash (const void *p)
 964 {
 965   const oecount *c = &VEC_index (oecount, cvec, (size_t)p - 42);
 966   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 967 }
 968
 969 /* Comparison function for oecount.  */
 970
 971 static int
 972 oecount_eq (const void *p1, const void *p2)
 973 {
 974   const oecount *c1 = &VEC_index (oecount, cvec, (size_t)p1 - 42);
 975   const oecount *c2 = &VEC_index (oecount, cvec, (size_t)p2 - 42);
 976   return (c1->oecode == c2->oecode
 977           && c1->op == c2->op);
 978 }
 979
 980 /* Comparison function for qsort sorting oecount elements by count.  */
 981
 982 static int
 983 oecount_cmp (const void *p1, const void *p2)
 984 {
 985   const oecount *c1 = (const oecount *)p1;
 986   const oecount *c2 = (const oecount *)p2;
 987   if (c1->cnt != c2->cnt)
 988     return c1->cnt - c2->cnt;
 989   else
 990     /* If counts are identical, use unique IDs to stabilize qsort.  */
 991     return c1->id - c2->id;
 992 }
 993
 994 /* Return TRUE iff STMT represents a builtin call that raises OP
 995    to some exponent.  */
 996
 997 static bool
 998 stmt_is_power_of_op (gimple stmt, tree op)
 999 {
1000   tree fndecl;
1001
1002   if (!is_gimple_call (stmt))
1003     return false;
1004
1005   fndecl = gimple_call_fndecl (stmt);
1006
1007   if (!fndecl
1008       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1009     return false;
1010
1011   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1012     {
1013     CASE_FLT_FN (BUILT_IN_POW):
1014     CASE_FLT_FN (BUILT_IN_POWI):
1015       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1016
1017     default:
1018       return false;
1019     }
1020 }
1021
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023    in place and return the result.  Assumes that stmt_is_power_of_op
1024    was previously called for STMT and returned TRUE.  */
1025
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt)
1028 {
1029   REAL_VALUE_TYPE c, cint;
1030   HOST_WIDE_INT power;
1031   tree arg1;
1032
1033   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1034     {
1035     CASE_FLT_FN (BUILT_IN_POW):
1036       arg1 = gimple_call_arg (stmt, 1);
1037       c = TREE_REAL_CST (arg1);
1038       power = real_to_integer (&c) - 1;
1039       real_from_integer (&cint, VOIDmode, power, 0, 0);
1040       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1041       return power;
1042
1043     CASE_FLT_FN (BUILT_IN_POWI):
1044       arg1 = gimple_call_arg (stmt, 1);
1045       power = TREE_INT_CST_LOW (arg1) - 1;
1046       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1047       return power;
1048
1049     default:
1050       gcc_unreachable ();
1051     }
1052 }
1053
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1056    replace *DEF with OP as well.  */
1057
1058 static void
1059 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1060 {
1061   tree lhs;
1062   gimple use_stmt;
1063   use_operand_p use;
1064   gimple_stmt_iterator gsi;
1065
1066   if (is_gimple_call (stmt))
1067     lhs = gimple_call_lhs (stmt);
1068   else
1069     lhs = gimple_assign_lhs (stmt);
1070
1071   gcc_assert (has_single_use (lhs));
1072   single_imm_use (lhs, &use, &use_stmt);
1073   if (lhs == *def)
1074     *def = op;
1075   SET_USE (use, op);
1076   if (TREE_CODE (op) != SSA_NAME)
1077     update_stmt (use_stmt);
1078   gsi = gsi_for_stmt (stmt);
1079   gsi_remove (&gsi, true);
1080   release_defs (stmt);
1081
1082   if (is_gimple_call (stmt))
1083     unlink_stmt_vdef (stmt);
1084 }
1085
1086 /* Walks the linear chain with result *DEF searching for an operation
1087    with operand OP and code OPCODE removing that from the chain.  *DEF
1088    is updated if there is only one operand but no operation left.  */
1089
1090 static void
1091 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1092 {
1093   gimple stmt = SSA_NAME_DEF_STMT (*def);
1094
1095   do
1096     {
1097       tree name;
1098
1099       if (opcode == MULT_EXPR
1100           && stmt_is_power_of_op (stmt, op))
1101         {
1102           if (decrement_power (stmt) == 1)
1103             propagate_op_to_single_use (op, stmt, def);
1104           return;
1105         }
1106
1107       name = gimple_assign_rhs1 (stmt);
1108
1109       /* If this is the operation we look for and one of the operands
1110          is ours simply propagate the other operand into the stmts
1111          single use.  */
1112       if (gimple_assign_rhs_code (stmt) == opcode
1113           && (name == op
1114               || gimple_assign_rhs2 (stmt) == op))
1115         {
1116           if (name == op)
1117             name = gimple_assign_rhs2 (stmt);
1118           propagate_op_to_single_use (name, stmt, def);
1119           return;
1120         }
1121
1122       /* We might have a multiply of two __builtin_pow* calls, and
1123          the operand might be hiding in the rightmost one.  */
1124       if (opcode == MULT_EXPR
1125           && gimple_assign_rhs_code (stmt) == opcode
1126           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1127         {
1128           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1129           if (stmt_is_power_of_op (stmt2, op))
1130             {
1131               if (decrement_power (stmt2) == 1)
1132                 propagate_op_to_single_use (op, stmt2, def);
1133               return;
1134             }
1135         }
1136
1137       /* Continue walking the chain.  */
1138       gcc_assert (name != op
1139                   && TREE_CODE (name) == SSA_NAME);
1140       stmt = SSA_NAME_DEF_STMT (name);
1141     }
1142   while (1);
1143 }
1144
1145 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1146    the result.  Places the statement after the definition of either
1147    OP1 or OP2.  Returns the new statement.  */
1148
1149 static gimple
1150 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1151 {
1152   gimple op1def = NULL, op2def = NULL;
1153   gimple_stmt_iterator gsi;
1154   tree op;
1155   gimple sum;
1156
1157   /* Create the addition statement.  */
1158   op = make_ssa_name (type, NULL);
1159   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1160
1161   /* Find an insertion place and insert.  */
1162   if (TREE_CODE (op1) == SSA_NAME)
1163     op1def = SSA_NAME_DEF_STMT (op1);
1164   if (TREE_CODE (op2) == SSA_NAME)
1165     op2def = SSA_NAME_DEF_STMT (op2);
1166   if ((!op1def || gimple_nop_p (op1def))
1167       && (!op2def || gimple_nop_p (op2def)))
1168     {
1169       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1170       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1171     }
1172   else if ((!op1def || gimple_nop_p (op1def))
1173            || (op2def && !gimple_nop_p (op2def)
1174                && stmt_dominates_stmt_p (op1def, op2def)))
1175     {
1176       if (gimple_code (op2def) == GIMPLE_PHI)
1177         {
1178           gsi = gsi_after_labels (gimple_bb (op2def));
1179           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1180         }
1181       else
1182         {
1183           if (!stmt_ends_bb_p (op2def))
1184             {
1185               gsi = gsi_for_stmt (op2def);
1186               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1187             }
1188           else
1189             {
1190               edge e;
1191               edge_iterator ei;
1192
1193               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1194                 if (e->flags & EDGE_FALLTHRU)
1195                   gsi_insert_on_edge_immediate (e, sum);
1196             }
1197         }
1198     }
1199   else
1200     {
1201       if (gimple_code (op1def) == GIMPLE_PHI)
1202         {
1203           gsi = gsi_after_labels (gimple_bb (op1def));
1204           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1205         }
1206       else
1207         {
1208           if (!stmt_ends_bb_p (op1def))
1209             {
1210               gsi = gsi_for_stmt (op1def);
1211               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1212             }
1213           else
1214             {
1215               edge e;
1216               edge_iterator ei;
1217
1218               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1219                 if (e->flags & EDGE_FALLTHRU)
1220                   gsi_insert_on_edge_immediate (e, sum);
1221             }
1222         }
1223     }
1224   update_stmt (sum);
1225
1226   return sum;
1227 }
1228
1229 /* Perform un-distribution of divisions and multiplications.
1230    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1231    to (A + B) / X for real X.
1232
1233    The algorithm is organized as follows.
1234
1235     - First we walk the addition chain *OPS looking for summands that
1236       are defined by a multiplication or a real division.  This results
1237       in the candidates bitmap with relevant indices into *OPS.
1238
1239     - Second we build the chains of multiplications or divisions for
1240       these candidates, counting the number of occurrences of (operand, code)
1241       pairs in all of the candidates chains.
1242
1243     - Third we sort the (operand, code) pairs by number of occurrence and
1244       process them starting with the pair with the most uses.
1245
1246       * For each such pair we walk the candidates again to build a
1247         second candidate bitmap noting all multiplication/division chains
1248         that have at least one occurrence of (operand, code).
1249
1250       * We build an alternate addition chain only covering these
1251         candidates with one (operand, code) operation removed from their
1252         multiplication/division chain.
1253
1254       * The first candidate gets replaced by the alternate addition chain
1255         multiplied/divided by the operand.
1256
1257       * All candidate chains get disabled for further processing and
1258         processing of (operand, code) pairs continues.
1259
1260   The alternate addition chains built are re-processed by the main
1261   reassociation algorithm which allows optimizing a * x * y + b * y * x
1262   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1263
1264 static bool
1265 undistribute_ops_list (enum tree_code opcode,
1266                        VEC (operand_entry_t, heap) **ops, struct loop *loop)
1267 {
1268   unsigned int length = VEC_length (operand_entry_t, *ops);
1269   operand_entry_t oe1;
1270   unsigned i, j;
1271   sbitmap candidates, candidates2;
1272   unsigned nr_candidates, nr_candidates2;
1273   sbitmap_iterator sbi0;
1274   VEC (operand_entry_t, heap) **subops;
1275   htab_t ctable;
1276   bool changed = false;
1277   int next_oecount_id = 0;
1278
1279   if (length <= 1
1280       || opcode != PLUS_EXPR)
1281     return false;
1282
1283   /* Build a list of candidates to process.  */
1284   candidates = sbitmap_alloc (length);
1285   bitmap_clear (candidates);
1286   nr_candidates = 0;
1287   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1288     {
1289       enum tree_code dcode;
1290       gimple oe1def;
1291
1292       if (TREE_CODE (oe1->op) != SSA_NAME)
1293         continue;
1294       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1295       if (!is_gimple_assign (oe1def))
1296         continue;
1297       dcode = gimple_assign_rhs_code (oe1def);
1298       if ((dcode != MULT_EXPR
1299            && dcode != RDIV_EXPR)
1300           || !is_reassociable_op (oe1def, dcode, loop))
1301         continue;
1302
1303       SET_BIT (candidates, i);
1304       nr_candidates++;
1305     }
1306
1307   if (nr_candidates < 2)
1308     {
1309       sbitmap_free (candidates);
1310       return false;
1311     }
1312
1313   if (dump_file && (dump_flags & TDF_DETAILS))
1314     {
1315       fprintf (dump_file, "searching for un-distribute opportunities ");
1316       print_generic_expr (dump_file,
1317         VEC_index (operand_entry_t, *ops,
1318                    bitmap_first_set_bit (candidates))->op, 0);
1319       fprintf (dump_file, " %d\n", nr_candidates);
1320     }
1321
1322   /* Build linearized sub-operand lists and the counting table.  */
1323   cvec = NULL;
1324   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1325   subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1326                      VEC_length (operand_entry_t, *ops));
1327   EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1328     {
1329       gimple oedef;
1330       enum tree_code oecode;
1331       unsigned j;
1332
1333       oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1334       oecode = gimple_assign_rhs_code (oedef);
1335       linearize_expr_tree (&subops[i], oedef,
1336                            associative_tree_code (oecode), false);
1337
1338       FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1339         {
1340           oecount c;
1341           void **slot;
1342           size_t idx;
1343           c.oecode = oecode;
1344           c.cnt = 1;
1345           c.id = next_oecount_id++;
1346           c.op = oe1->op;
1347           VEC_safe_push (oecount, heap, cvec, c);
1348           idx = VEC_length (oecount, cvec) + 41;
1349           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1350           if (!*slot)
1351             {
1352               *slot = (void *)idx;
1353             }
1354           else
1355             {
1356               VEC_pop (oecount, cvec);
1357               VEC_index (oecount, cvec, (size_t)*slot - 42).cnt++;
1358             }
1359         }
1360     }
1361   htab_delete (ctable);
1362
1363   /* Sort the counting table.  */
1364   VEC_qsort (oecount, cvec, oecount_cmp);
1365
1366   if (dump_file && (dump_flags & TDF_DETAILS))
1367     {
1368       oecount *c;
1369       fprintf (dump_file, "Candidates:\n");
1370       FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1371         {
1372           fprintf (dump_file, "  %u %s: ", c->cnt,
1373                    c->oecode == MULT_EXPR
1374                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1375           print_generic_expr (dump_file, c->op, 0);
1376           fprintf (dump_file, "\n");
1377         }
1378     }
1379
1380   /* Process the (operand, code) pairs in order of most occurence.  */
1381   candidates2 = sbitmap_alloc (length);
1382   while (!VEC_empty (oecount, cvec))
1383     {
1384       oecount *c = &VEC_last (oecount, cvec);
1385       if (c->cnt < 2)
1386         break;
1387
1388       /* Now collect the operands in the outer chain that contain
1389          the common operand in their inner chain.  */
1390       bitmap_clear (candidates2);
1391       nr_candidates2 = 0;
1392       EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1393         {
1394           gimple oedef;
1395           enum tree_code oecode;
1396           unsigned j;
1397           tree op = VEC_index (operand_entry_t, *ops, i)->op;
1398
1399           /* If we undistributed in this chain already this may be
1400              a constant.  */
1401           if (TREE_CODE (op) != SSA_NAME)
1402             continue;
1403
1404           oedef = SSA_NAME_DEF_STMT (op);
1405           oecode = gimple_assign_rhs_code (oedef);
1406           if (oecode != c->oecode)
1407             continue;
1408
1409           FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1410             {
1411               if (oe1->op == c->op)
1412                 {
1413                   SET_BIT (candidates2, i);
1414                   ++nr_candidates2;
1415                   break;
1416                 }
1417             }
1418         }
1419
1420       if (nr_candidates2 >= 2)
1421         {
1422           operand_entry_t oe1, oe2;
1423           gimple prod;
1424           int first = bitmap_first_set_bit (candidates2);
1425
1426           /* Build the new addition chain.  */
1427           oe1 = VEC_index (operand_entry_t, *ops, first);
1428           if (dump_file && (dump_flags & TDF_DETAILS))
1429             {
1430               fprintf (dump_file, "Building (");
1431               print_generic_expr (dump_file, oe1->op, 0);
1432             }
1433           zero_one_operation (&oe1->op, c->oecode, c->op);
1434           EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1435             {
1436               gimple sum;
1437               oe2 = VEC_index (operand_entry_t, *ops, i);
1438               if (dump_file && (dump_flags & TDF_DETAILS))
1439                 {
1440                   fprintf (dump_file, " + ");
1441                   print_generic_expr (dump_file, oe2->op, 0);
1442                 }
1443               zero_one_operation (&oe2->op, c->oecode, c->op);
1444               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1445                                        oe1->op, oe2->op, opcode);
1446               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1447               oe2->rank = 0;
1448               oe1->op = gimple_get_lhs (sum);
1449             }
1450
1451           /* Apply the multiplication/division.  */
1452           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1453                                     oe1->op, c->op, c->oecode);
1454           if (dump_file && (dump_flags & TDF_DETAILS))
1455             {
1456               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1457               print_generic_expr (dump_file, c->op, 0);
1458               fprintf (dump_file, "\n");
1459             }
1460
1461           /* Record it in the addition chain and disable further
1462              undistribution with this op.  */
1463           oe1->op = gimple_assign_lhs (prod);
1464           oe1->rank = get_rank (oe1->op);
1465           VEC_free (operand_entry_t, heap, subops[first]);
1466
1467           changed = true;
1468         }
1469
1470       VEC_pop (oecount, cvec);
1471     }
1472
1473   for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1474     VEC_free (operand_entry_t, heap, subops[i]);
1475   free (subops);
1476   VEC_free (oecount, heap, cvec);
1477   sbitmap_free (candidates);
1478   sbitmap_free (candidates2);
1479
1480   return changed;
1481 }
1482
1483 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1484    expression, examine the other OPS to see if any of them are comparisons
1485    of the same values, which we may be able to combine or eliminate.
1486    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1487
1488 static bool
1489 eliminate_redundant_comparison (enum tree_code opcode,
1490                                 VEC (operand_entry_t, heap) **ops,
1491                                 unsigned int currindex,
1492                                 operand_entry_t curr)
1493 {
1494   tree op1, op2;
1495   enum tree_code lcode, rcode;
1496   gimple def1, def2;
1497   int i;
1498   operand_entry_t oe;
1499
1500   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1501     return false;
1502
1503   /* Check that CURR is a comparison.  */
1504   if (TREE_CODE (curr->op) != SSA_NAME)
1505     return false;
1506   def1 = SSA_NAME_DEF_STMT (curr->op);
1507   if (!is_gimple_assign (def1))
1508     return false;
1509   lcode = gimple_assign_rhs_code (def1);
1510   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1511     return false;
1512   op1 = gimple_assign_rhs1 (def1);
1513   op2 = gimple_assign_rhs2 (def1);
1514
1515   /* Now look for a similar comparison in the remaining OPS.  */
1516   for (i = currindex + 1;
1517        VEC_iterate (operand_entry_t, *ops, i, oe);
1518        i++)
1519     {
1520       tree t;
1521
1522       if (TREE_CODE (oe->op) != SSA_NAME)
1523         continue;
1524       def2 = SSA_NAME_DEF_STMT (oe->op);
1525       if (!is_gimple_assign (def2))
1526         continue;
1527       rcode = gimple_assign_rhs_code (def2);
1528       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1529         continue;
1530
1531       /* If we got here, we have a match.  See if we can combine the
1532          two comparisons.  */
1533       if (opcode == BIT_IOR_EXPR)
1534         t = maybe_fold_or_comparisons (lcode, op1, op2,
1535                                        rcode, gimple_assign_rhs1 (def2),
1536                                        gimple_assign_rhs2 (def2));
1537       else
1538         t = maybe_fold_and_comparisons (lcode, op1, op2,
1539                                         rcode, gimple_assign_rhs1 (def2),
1540                                         gimple_assign_rhs2 (def2));
1541       if (!t)
1542         continue;
1543
1544       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1545          always give us a boolean_type_node value back.  If the original
1546          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1547          we need to convert.  */
1548       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1549         t = fold_convert (TREE_TYPE (curr->op), t);
1550
1551       if (TREE_CODE (t) != INTEGER_CST
1552           && !operand_equal_p (t, curr->op, 0))
1553         {
1554           enum tree_code subcode;
1555           tree newop1, newop2;
1556           if (!COMPARISON_CLASS_P (t))
1557             continue;
1558           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1559           STRIP_USELESS_TYPE_CONVERSION (newop1);
1560           STRIP_USELESS_TYPE_CONVERSION (newop2);
1561           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1562             continue;
1563         }
1564
1565       if (dump_file && (dump_flags & TDF_DETAILS))
1566         {
1567           fprintf (dump_file, "Equivalence: ");
1568           print_generic_expr (dump_file, curr->op, 0);
1569           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1570           print_generic_expr (dump_file, oe->op, 0);
1571           fprintf (dump_file, " -> ");
1572           print_generic_expr (dump_file, t, 0);
1573           fprintf (dump_file, "\n");
1574         }
1575
1576       /* Now we can delete oe, as it has been subsumed by the new combined
1577          expression t.  */
1578       VEC_ordered_remove (operand_entry_t, *ops, i);
1579       reassociate_stats.ops_eliminated ++;
1580
1581       /* If t is the same as curr->op, we're done.  Otherwise we must
1582          replace curr->op with t.  Special case is if we got a constant
1583          back, in which case we add it to the end instead of in place of
1584          the current entry.  */
1585       if (TREE_CODE (t) == INTEGER_CST)
1586         {
1587           VEC_ordered_remove (operand_entry_t, *ops, currindex);
1588           add_to_ops_vec (ops, t);
1589         }
1590       else if (!operand_equal_p (t, curr->op, 0))
1591         {
1592           gimple sum;
1593           enum tree_code subcode;
1594           tree newop1;
1595           tree newop2;
1596           gcc_assert (COMPARISON_CLASS_P (t));
1597           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1598           STRIP_USELESS_TYPE_CONVERSION (newop1);
1599           STRIP_USELESS_TYPE_CONVERSION (newop2);
1600           gcc_checking_assert (is_gimple_val (newop1)
1601                                && is_gimple_val (newop2));
1602           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1603           curr->op = gimple_get_lhs (sum);
1604         }
1605       return true;
1606     }
1607
1608   return false;
1609 }
1610
1611 /* Perform various identities and other optimizations on the list of
1612    operand entries, stored in OPS.  The tree code for the binary
1613    operation between all the operands is OPCODE.  */
1614
1615 static void
1616 optimize_ops_list (enum tree_code opcode,
1617                    VEC (operand_entry_t, heap) **ops)
1618 {
1619   unsigned int length = VEC_length (operand_entry_t, *ops);
1620   unsigned int i;
1621   operand_entry_t oe;
1622   operand_entry_t oelast = NULL;
1623   bool iterate = false;
1624
1625   if (length == 1)
1626     return;
1627
1628   oelast = VEC_last (operand_entry_t, *ops);
1629
1630   /* If the last two are constants, pop the constants off, merge them
1631      and try the next two.  */
1632   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1633     {
1634       operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1635
1636       if (oelm1->rank == 0
1637           && is_gimple_min_invariant (oelm1->op)
1638           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1639                                        TREE_TYPE (oelast->op)))
1640         {
1641           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1642                                      oelm1->op, oelast->op);
1643
1644           if (folded && is_gimple_min_invariant (folded))
1645             {
1646               if (dump_file && (dump_flags & TDF_DETAILS))
1647                 fprintf (dump_file, "Merging constants\n");
1648
1649               VEC_pop (operand_entry_t, *ops);
1650               VEC_pop (operand_entry_t, *ops);
1651
1652               add_to_ops_vec (ops, folded);
1653               reassociate_stats.constants_eliminated++;
1654
1655               optimize_ops_list (opcode, ops);
1656               return;
1657             }
1658         }
1659     }
1660
1661   eliminate_using_constants (opcode, ops);
1662   oelast = NULL;
1663
1664   for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1665     {
1666       bool done = false;
1667
1668       if (eliminate_not_pairs (opcode, ops, i, oe))
1669         return;
1670       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1671           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1672           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1673         {
1674           if (done)
1675             return;
1676           iterate = true;
1677           oelast = NULL;
1678           continue;
1679         }
1680       oelast = oe;
1681       i++;
1682     }
1683
1684   length  = VEC_length (operand_entry_t, *ops);
1685   oelast = VEC_last (operand_entry_t, *ops);
1686
1687   if (iterate)
1688     optimize_ops_list (opcode, ops);
1689 }
1690
1691 /* The following functions are subroutines to optimize_range_tests and allow
1692    it to try to change a logical combination of comparisons into a range
1693    test.
1694
1695    For example, both
1696         X == 2 || X == 5 || X == 3 || X == 4
1697    and
1698         X >= 2 && X <= 5
1699    are converted to
1700         (unsigned) (X - 2) <= 3
1701
1702    For more information see comments above fold_test_range in fold-const.c,
1703    this implementation is for GIMPLE.  */
1704
1705 struct range_entry
1706 {
1707   tree exp;
1708   tree low;
1709   tree high;
1710   bool in_p;
1711   bool strict_overflow_p;
1712   unsigned int idx, next;
1713 };
1714
1715 /* This is similar to make_range in fold-const.c, but on top of
1716    GIMPLE instead of trees.  */
1717
1718 static void
1719 init_range_entry (struct range_entry *r, tree exp)
1720 {
1721   int in_p;
1722   tree low, high;
1723   bool is_bool, strict_overflow_p;
1724
1725   r->exp = NULL_TREE;
1726   r->in_p = false;
1727   r->strict_overflow_p = false;
1728   r->low = NULL_TREE;
1729   r->high = NULL_TREE;
1730   if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1731     return;
1732
1733   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1734      and see if we can refine the range.  Some of the cases below may not
1735      happen, but it doesn't seem worth worrying about this.  We "continue"
1736      the outer loop when we've changed something; otherwise we "break"
1737      the switch, which will "break" the while.  */
1738   low = build_int_cst (TREE_TYPE (exp), 0);
1739   high = low;
1740   in_p = 0;
1741   strict_overflow_p = false;
1742   is_bool = false;
1743   if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1744     {
1745       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1746         is_bool = true;
1747       else
1748         return;
1749     }
1750   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1751     is_bool = true;
1752
1753   while (1)
1754     {
1755       gimple stmt;
1756       enum tree_code code;
1757       tree arg0, arg1, exp_type;
1758       tree nexp;
1759       location_t loc;
1760
1761       if (TREE_CODE (exp) != SSA_NAME)
1762         break;
1763
1764       stmt = SSA_NAME_DEF_STMT (exp);
1765       if (!is_gimple_assign (stmt))
1766         break;
1767
1768       code = gimple_assign_rhs_code (stmt);
1769       arg0 = gimple_assign_rhs1 (stmt);
1770       if (TREE_CODE (arg0) != SSA_NAME)
1771         break;
1772       arg1 = gimple_assign_rhs2 (stmt);
1773       exp_type = TREE_TYPE (exp);
1774       loc = gimple_location (stmt);
1775       switch (code)
1776         {
1777         case BIT_NOT_EXPR:
1778           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1779             {
1780               in_p = !in_p;
1781               exp = arg0;
1782               continue;
1783             }
1784           break;
1785         case SSA_NAME:
1786           exp = arg0;
1787           continue;
1788         CASE_CONVERT:
1789           if (is_bool)
1790             goto do_default;
1791           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1792             {
1793               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1794                 is_bool = true;
1795               else
1796                 return;
1797             }
1798           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1799             is_bool = true;
1800           goto do_default;
1801         case EQ_EXPR:
1802         case NE_EXPR:
1803         case LT_EXPR:
1804         case LE_EXPR:
1805         case GE_EXPR:
1806         case GT_EXPR:
1807           is_bool = true;
1808           /* FALLTHRU */
1809         default:
1810           if (!is_bool)
1811             return;
1812         do_default:
1813           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1814                                   &low, &high, &in_p,
1815                                   &strict_overflow_p);
1816           if (nexp != NULL_TREE)
1817             {
1818               exp = nexp;
1819               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1820               continue;
1821             }
1822           break;
1823         }
1824       break;
1825     }
1826   if (is_bool)
1827     {
1828       r->exp = exp;
1829       r->in_p = in_p;
1830       r->low = low;
1831       r->high = high;
1832       r->strict_overflow_p = strict_overflow_p;
1833     }
1834 }
1835
1836 /* Comparison function for qsort.  Sort entries
1837    without SSA_NAME exp first, then with SSA_NAMEs sorted
1838    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1839    by increasing ->low and if ->low is the same, by increasing
1840    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1841    maximum.  */
1842
1843 static int
1844 range_entry_cmp (const void *a, const void *b)
1845 {
1846   const struct range_entry *p = (const struct range_entry *) a;
1847   const struct range_entry *q = (const struct range_entry *) b;
1848
1849   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1850     {
1851       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1852         {
1853           /* Group range_entries for the same SSA_NAME together.  */
1854           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1855             return -1;
1856           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1857             return 1;
1858           /* If ->low is different, NULL low goes first, then by
1859              ascending low.  */
1860           if (p->low != NULL_TREE)
1861             {
1862               if (q->low != NULL_TREE)
1863                 {
1864                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1865                                           p->low, q->low);
1866                   if (tem && integer_onep (tem))
1867                     return -1;
1868                   tem = fold_binary (GT_EXPR, boolean_type_node,
1869                                      p->low, q->low);
1870                   if (tem && integer_onep (tem))
1871                     return 1;
1872                 }
1873               else
1874                 return 1;
1875             }
1876           else if (q->low != NULL_TREE)
1877             return -1;
1878           /* If ->high is different, NULL high goes last, before that by
1879              ascending high.  */
1880           if (p->high != NULL_TREE)
1881             {
1882               if (q->high != NULL_TREE)
1883                 {
1884                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1885                                           p->high, q->high);
1886                   if (tem && integer_onep (tem))
1887                     return -1;
1888                   tem = fold_binary (GT_EXPR, boolean_type_node,
1889                                      p->high, q->high);
1890                   if (tem && integer_onep (tem))
1891                     return 1;
1892                 }
1893               else
1894                 return -1;
1895             }
1896           else if (p->high != NULL_TREE)
1897             return 1;
1898           /* If both ranges are the same, sort below by ascending idx.  */
1899         }
1900       else
1901         return 1;
1902     }
1903   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1904     return -1;
1905
1906   if (p->idx < q->idx)
1907     return -1;
1908   else
1909     {
1910       gcc_checking_assert (p->idx > q->idx);
1911       return 1;
1912     }
1913 }
1914
1915 /* Helper routine of optimize_range_test.
1916    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1917    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1918    OPCODE and OPS are arguments of optimize_range_tests.  Return
1919    true if the range merge has been successful.  */
1920
1921 static bool
1922 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1923                    unsigned int count, enum tree_code opcode,
1924                    VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1925                    tree low, tree high, bool strict_overflow_p)
1926 {
1927   tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1928   location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1929   tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1930   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1931   gimple_stmt_iterator gsi;
1932
1933   if (tem == NULL_TREE)
1934     return false;
1935
1936   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1937     warning_at (loc, OPT_Wstrict_overflow,
1938                 "assuming signed overflow does not occur "
1939                 "when simplifying range test");
1940
1941   if (dump_file && (dump_flags & TDF_DETAILS))
1942     {
1943       struct range_entry *r;
1944       fprintf (dump_file, "Optimizing range tests ");
1945       print_generic_expr (dump_file, range->exp, 0);
1946       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1947       print_generic_expr (dump_file, range->low, 0);
1948       fprintf (dump_file, ", ");
1949       print_generic_expr (dump_file, range->high, 0);
1950       fprintf (dump_file, "]");
1951       for (r = otherrange; r < otherrange + count; r++)
1952         {
1953           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1954           print_generic_expr (dump_file, r->low, 0);
1955           fprintf (dump_file, ", ");
1956           print_generic_expr (dump_file, r->high, 0);
1957           fprintf (dump_file, "]");
1958         }
1959       fprintf (dump_file, "\n into ");
1960       print_generic_expr (dump_file, tem, 0);
1961       fprintf (dump_file, "\n");
1962     }
1963
1964   if (opcode == BIT_IOR_EXPR)
1965     tem = invert_truthvalue_loc (loc, tem);
1966
1967   tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1968   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1969   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1970                                   GSI_SAME_STMT);
1971
1972   VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1973   range->exp = exp;
1974   range->low = low;
1975   range->high = high;
1976   range->in_p = in_p;
1977   range->strict_overflow_p = false;
1978
1979   for (range = otherrange; range < otherrange + count; range++)
1980     {
1981       VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1982       range->exp = NULL_TREE;
1983     }
1984   return true;
1985 }
1986
1987 /* Optimize range tests, similarly how fold_range_test optimizes
1988    it on trees.  The tree code for the binary
1989    operation between all the operands is OPCODE.  */
1990
1991 static void
1992 optimize_range_tests (enum tree_code opcode,
1993                       VEC (operand_entry_t, heap) **ops)
1994 {
1995   unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
1996   operand_entry_t oe;
1997   struct range_entry *ranges;
1998   bool any_changes = false;
1999
2000   if (length == 1)
2001     return;
2002
2003   ranges = XNEWVEC (struct range_entry, length);
2004   for (i = 0; i < length; i++)
2005     {
2006       ranges[i].idx = i;
2007       init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
2008       /* For | invert it now, we will invert it again before emitting
2009          the optimized expression.  */
2010       if (opcode == BIT_IOR_EXPR)
2011         ranges[i].in_p = !ranges[i].in_p;
2012     }
2013
2014   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2015   for (i = 0; i < length; i++)
2016     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2017       break;
2018
2019   /* Try to merge ranges.  */
2020   for (first = i; i < length; i++)
2021     {
2022       tree low = ranges[i].low;
2023       tree high = ranges[i].high;
2024       int in_p = ranges[i].in_p;
2025       bool strict_overflow_p = ranges[i].strict_overflow_p;
2026       int update_fail_count = 0;
2027
2028       for (j = i + 1; j < length; j++)
2029         {
2030           if (ranges[i].exp != ranges[j].exp)
2031             break;
2032           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2033                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2034             break;
2035           strict_overflow_p |= ranges[j].strict_overflow_p;
2036         }
2037
2038       if (j == i + 1)
2039         continue;
2040
2041       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2042                              ops, ranges[i].exp, in_p, low, high,
2043                              strict_overflow_p))
2044         {
2045           i = j - 1;
2046           any_changes = true;
2047         }
2048       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2049          while update_range_test would fail.  */
2050       else if (update_fail_count == 64)
2051         i = j - 1;
2052       else
2053         ++update_fail_count;
2054     }
2055
2056   /* Optimize X == CST1 || X == CST2
2057      if popcount (CST1 ^ CST2) == 1 into
2058      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2059      Similarly for ranges.  E.g.
2060      X != 2 && X != 3 && X != 10 && X != 11
2061      will be transformed by the above loop into
2062      (X - 2U) <= 1U && (X - 10U) <= 1U
2063      and this loop can transform that into
2064      ((X & ~8) - 2U) <= 1U.  */
2065   for (i = first; i < length; i++)
2066     {
2067       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2068
2069       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2070         continue;
2071       type = TREE_TYPE (ranges[i].exp);
2072       if (!INTEGRAL_TYPE_P (type))
2073         continue;
2074       lowi = ranges[i].low;
2075       if (lowi == NULL_TREE)
2076         lowi = TYPE_MIN_VALUE (type);
2077       highi = ranges[i].high;
2078       if (highi == NULL_TREE)
2079         continue;
2080       for (j = i + 1; j < length && j < i + 64; j++)
2081         {
2082           if (ranges[j].exp == NULL_TREE)
2083             continue;
2084           if (ranges[i].exp != ranges[j].exp)
2085             break;
2086           if (ranges[j].in_p)
2087             continue;
2088           lowj = ranges[j].low;
2089           if (lowj == NULL_TREE)
2090             continue;
2091           highj = ranges[j].high;
2092           if (highj == NULL_TREE)
2093             highj = TYPE_MAX_VALUE (type);
2094           tem = fold_binary (GT_EXPR, boolean_type_node,
2095                              lowj, highi);
2096           if (tem == NULL_TREE || !integer_onep (tem))
2097             continue;
2098           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2099           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2100             continue;
2101           gcc_checking_assert (!integer_zerop (lowxor));
2102           tem = fold_binary (MINUS_EXPR, type, lowxor,
2103                              build_int_cst (type, 1));
2104           if (tem == NULL_TREE)
2105             continue;
2106           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2107           if (tem == NULL_TREE || !integer_zerop (tem))
2108             continue;
2109           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2110           if (!tree_int_cst_equal (lowxor, highxor))
2111             continue;
2112           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2113           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2114           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2115           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2116           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2117                                  ranges[i].in_p, lowj, highj,
2118                                  ranges[i].strict_overflow_p
2119                                  || ranges[j].strict_overflow_p))
2120             {
2121               any_changes = true;
2122               break;
2123             }
2124         }
2125     }
2126
2127   if (any_changes)
2128     {
2129       j = 0;
2130       FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2131         {
2132           if (oe->op == error_mark_node)
2133             continue;
2134           else if (i != j)
2135             VEC_replace (operand_entry_t, *ops, j, oe);
2136           j++;
2137         }
2138       VEC_truncate (operand_entry_t, *ops, j);
2139     }
2140
2141   XDELETEVEC (ranges);
2142 }
2143
2144 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2145    of STMT in it's operands.  This is also known as a "destructive
2146    update" operation.  */
2147
2148 static bool
2149 is_phi_for_stmt (gimple stmt, tree operand)
2150 {
2151   gimple def_stmt;
2152   tree lhs;
2153   use_operand_p arg_p;
2154   ssa_op_iter i;
2155
2156   if (TREE_CODE (operand) != SSA_NAME)
2157     return false;
2158
2159   lhs = gimple_assign_lhs (stmt);
2160
2161   def_stmt = SSA_NAME_DEF_STMT (operand);
2162   if (gimple_code (def_stmt) != GIMPLE_PHI)
2163     return false;
2164
2165   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2166     if (lhs == USE_FROM_PTR (arg_p))
2167       return true;
2168   return false;
2169 }
2170
2171 /* Remove def stmt of VAR if VAR has zero uses and recurse
2172    on rhs1 operand if so.  */
2173
2174 static void
2175 remove_visited_stmt_chain (tree var)
2176 {
2177   gimple stmt;
2178   gimple_stmt_iterator gsi;
2179
2180   while (1)
2181     {
2182       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2183         return;
2184       stmt = SSA_NAME_DEF_STMT (var);
2185       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2186         {
2187           var = gimple_assign_rhs1 (stmt);
2188           gsi = gsi_for_stmt (stmt);
2189           gsi_remove (&gsi, true);
2190           release_defs (stmt);
2191         }
2192       else
2193         return;
2194     }
2195 }
2196
2197 /* This function checks three consequtive operands in
2198    passed operands vector OPS starting from OPINDEX and
2199    swaps two operands if it is profitable for binary operation
2200    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2201
2202    We pair ops with the same rank if possible.
2203
2204    The alternative we try is to see if STMT is a destructive
2205    update style statement, which is like:
2206    b = phi (a, ...)
2207    a = c + b;
2208    In that case, we want to use the destructive update form to
2209    expose the possible vectorizer sum reduction opportunity.
2210    In that case, the third operand will be the phi node. This
2211    check is not performed if STMT is null.
2212
2213    We could, of course, try to be better as noted above, and do a
2214    lot of work to try to find these opportunities in >3 operand
2215    cases, but it is unlikely to be worth it.  */
2216
2217 static void
2218 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2219                           unsigned int opindex, gimple stmt)
2220 {
2221   operand_entry_t oe1, oe2, oe3;
2222
2223   oe1 = VEC_index (operand_entry_t, ops, opindex);
2224   oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2225   oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2226
2227   if ((oe1->rank == oe2->rank
2228        && oe2->rank != oe3->rank)
2229       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2230           && !is_phi_for_stmt (stmt, oe1->op)
2231           && !is_phi_for_stmt (stmt, oe2->op)))
2232     {
2233       struct operand_entry temp = *oe3;
2234       oe3->op = oe1->op;
2235       oe3->rank = oe1->rank;
2236       oe1->op = temp.op;
2237       oe1->rank= temp.rank;
2238     }
2239   else if ((oe1->rank == oe3->rank
2240             && oe2->rank != oe3->rank)
2241            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2242                && !is_phi_for_stmt (stmt, oe1->op)
2243                && !is_phi_for_stmt (stmt, oe3->op)))
2244     {
2245       struct operand_entry temp = *oe2;
2246       oe2->op = oe1->op;
2247       oe2->rank = oe1->rank;
2248       oe1->op = temp.op;
2249       oe1->rank= temp.rank;
2250     }
2251 }
2252
2253 /* Recursively rewrite our linearized statements so that the operators
2254    match those in OPS[OPINDEX], putting the computation in rank
2255    order.  */
2256
2257 static void
2258 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2259                    VEC(operand_entry_t, heap) * ops, bool moved)
2260 {
2261   tree rhs1 = gimple_assign_rhs1 (stmt);
2262   tree rhs2 = gimple_assign_rhs2 (stmt);
2263   operand_entry_t oe;
2264
2265   /* If we have three operands left, then we want to make sure the ones
2266      that get the double binary op are chosen wisely.  */
2267   if (opindex + 3 == VEC_length (operand_entry_t, ops))
2268     swap_ops_for_binary_stmt (ops, opindex, stmt);
2269
2270   /* The final recursion case for this function is that you have
2271      exactly two operations left.
2272      If we had one exactly one op in the entire list to start with, we
2273      would have never called this function, and the tail recursion
2274      rewrites them one at a time.  */
2275   if (opindex + 2 == VEC_length (operand_entry_t, ops))
2276     {
2277       operand_entry_t oe1, oe2;
2278
2279       oe1 = VEC_index (operand_entry_t, ops, opindex);
2280       oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2281
2282       if (rhs1 != oe1->op || rhs2 != oe2->op)
2283         {
2284           if (dump_file && (dump_flags & TDF_DETAILS))
2285             {
2286               fprintf (dump_file, "Transforming ");
2287               print_gimple_stmt (dump_file, stmt, 0, 0);
2288             }
2289
2290           gimple_assign_set_rhs1 (stmt, oe1->op);
2291           gimple_assign_set_rhs2 (stmt, oe2->op);
2292           update_stmt (stmt);
2293           if (rhs1 != oe1->op && rhs1 != oe2->op)
2294             remove_visited_stmt_chain (rhs1);
2295
2296           if (dump_file && (dump_flags & TDF_DETAILS))
2297             {
2298               fprintf (dump_file, " into ");
2299               print_gimple_stmt (dump_file, stmt, 0, 0);
2300             }
2301         }
2302       return;
2303     }
2304
2305   /* If we hit here, we should have 3 or more ops left.  */
2306   gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2307
2308   /* Rewrite the next operator.  */
2309   oe = VEC_index (operand_entry_t, ops, opindex);
2310
2311   if (oe->op != rhs2)
2312     {
2313       if (!moved)
2314         {
2315           gimple_stmt_iterator gsinow, gsirhs1;
2316           gimple stmt1 = stmt, stmt2;
2317           unsigned int count;
2318
2319           gsinow = gsi_for_stmt (stmt);
2320           count = VEC_length (operand_entry_t, ops) - opindex - 2;
2321           while (count-- != 0)
2322             {
2323               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2324               gsirhs1 = gsi_for_stmt (stmt2);
2325               gsi_move_before (&gsirhs1, &gsinow);
2326               gsi_prev (&gsinow);
2327               stmt1 = stmt2;
2328             }
2329           moved = true;
2330         }
2331
2332       if (dump_file && (dump_flags & TDF_DETAILS))
2333         {
2334           fprintf (dump_file, "Transforming ");
2335           print_gimple_stmt (dump_file, stmt, 0, 0);
2336         }
2337
2338       gimple_assign_set_rhs2 (stmt, oe->op);
2339       update_stmt (stmt);
2340
2341       if (dump_file && (dump_flags & TDF_DETAILS))
2342         {
2343           fprintf (dump_file, " into ");
2344           print_gimple_stmt (dump_file, stmt, 0, 0);
2345         }
2346     }
2347   /* Recurse on the LHS of the binary operator, which is guaranteed to
2348      be the non-leaf side.  */
2349   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2350 }
2351
2352 /* Find out how many cycles we need to compute statements chain.
2353    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2354    maximum number of independent statements we may execute per cycle.  */
2355
2356 static int
2357 get_required_cycles (int ops_num, int cpu_width)
2358 {
2359   int res;
2360   int elog;
2361   unsigned int rest;
2362
2363   /* While we have more than 2 * cpu_width operands
2364      we may reduce number of operands by cpu_width
2365      per cycle.  */
2366   res = ops_num / (2 * cpu_width);
2367
2368   /* Remained operands count may be reduced twice per cycle
2369      until we have only one operand.  */
2370   rest = (unsigned)(ops_num - res * cpu_width);
2371   elog = exact_log2 (rest);
2372   if (elog >= 0)
2373     res += elog;
2374   else
2375     res += floor_log2 (rest) + 1;
2376
2377   return res;
2378 }
2379
2380 /* Returns an optimal number of registers to use for computation of
2381    given statements.  */
2382
2383 static int
2384 get_reassociation_width (int ops_num, enum tree_code opc,
2385                          enum machine_mode mode)
2386 {
2387   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2388   int width;
2389   int width_min;
2390   int cycles_best;
2391
2392   if (param_width > 0)
2393     width = param_width;
2394   else
2395     width = targetm.sched.reassociation_width (opc, mode);
2396
2397   if (width == 1)
2398     return width;
2399
2400   /* Get the minimal time required for sequence computation.  */
2401   cycles_best = get_required_cycles (ops_num, width);
2402
2403   /* Check if we may use less width and still compute sequence for
2404      the same time.  It will allow us to reduce registers usage.
2405      get_required_cycles is monotonically increasing with lower width
2406      so we can perform a binary search for the minimal width that still
2407      results in the optimal cycle count.  */
2408   width_min = 1;
2409   while (width > width_min)
2410     {
2411       int width_mid = (width + width_min) / 2;
2412
2413       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2414         width = width_mid;
2415       else if (width_min < width_mid)
2416         width_min = width_mid;
2417       else
2418         break;
2419     }
2420
2421   return width;
2422 }
2423
2424 /* Recursively rewrite our linearized statements so that the operators
2425    match those in OPS[OPINDEX], putting the computation in rank
2426    order and trying to allow operations to be executed in
2427    parallel.  */
2428
2429 static void
2430 rewrite_expr_tree_parallel (gimple stmt, int width,
2431                             VEC(operand_entry_t, heap) * ops)
2432 {
2433   enum tree_code opcode = gimple_assign_rhs_code (stmt);
2434   int op_num = VEC_length (operand_entry_t, ops);
2435   int stmt_num = op_num - 1;
2436   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2437   int op_index = op_num - 1;
2438   int stmt_index = 0;
2439   int ready_stmts_end = 0;
2440   int i = 0;
2441   tree last_rhs1 = gimple_assign_rhs1 (stmt);
2442
2443   /* We start expression rewriting from the top statements.
2444      So, in this loop we create a full list of statements
2445      we will work with.  */
2446   stmts[stmt_num - 1] = stmt;
2447   for (i = stmt_num - 2; i >= 0; i--)
2448     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2449
2450   for (i = 0; i < stmt_num; i++)
2451     {
2452       tree op1, op2;
2453
2454       /* Determine whether we should use results of
2455          already handled statements or not.  */
2456       if (ready_stmts_end == 0
2457           && (i - stmt_index >= width || op_index < 1))
2458         ready_stmts_end = i;
2459
2460       /* Now we choose operands for the next statement.  Non zero
2461          value in ready_stmts_end means here that we should use
2462          the result of already generated statements as new operand.  */
2463       if (ready_stmts_end > 0)
2464         {
2465           op1 = gimple_assign_lhs (stmts[stmt_index++]);
2466           if (ready_stmts_end > stmt_index)
2467             op2 = gimple_assign_lhs (stmts[stmt_index++]);
2468           else if (op_index >= 0)
2469             op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2470           else
2471             {
2472               gcc_assert (stmt_index < i);
2473               op2 = gimple_assign_lhs (stmts[stmt_index++]);
2474             }
2475
2476           if (stmt_index >= ready_stmts_end)
2477             ready_stmts_end = 0;
2478         }
2479       else
2480         {
2481           if (op_index > 1)
2482             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2483           op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2484           op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2485         }
2486
2487       /* If we emit the last statement then we should put
2488          operands into the last statement.  It will also
2489          break the loop.  */
2490       if (op_index < 0 && stmt_index == i)
2491         i = stmt_num - 1;
2492
2493       if (dump_file && (dump_flags & TDF_DETAILS))
2494         {
2495           fprintf (dump_file, "Transforming ");
2496           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2497         }
2498
2499       /* We keep original statement only for the last one.  All
2500          others are recreated.  */
2501       if (i == stmt_num - 1)
2502         {
2503           gimple_assign_set_rhs1 (stmts[i], op1);
2504           gimple_assign_set_rhs2 (stmts[i], op2);
2505           update_stmt (stmts[i]);
2506         }
2507       else
2508         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
2509
2510       if (dump_file && (dump_flags & TDF_DETAILS))
2511         {
2512           fprintf (dump_file, " into ");
2513           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2514         }
2515     }
2516
2517   remove_visited_stmt_chain (last_rhs1);
2518 }
2519
2520 /* Transform STMT, which is really (A +B) + (C + D) into the left
2521    linear form, ((A+B)+C)+D.
2522    Recurse on D if necessary.  */
2523
2524 static void
2525 linearize_expr (gimple stmt)
2526 {
2527   gimple_stmt_iterator gsinow, gsirhs;
2528   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2529   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2530   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2531   gimple newbinrhs = NULL;
2532   struct loop *loop = loop_containing_stmt (stmt);
2533
2534   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2535               && is_reassociable_op (binrhs, rhscode, loop));
2536
2537   gsinow = gsi_for_stmt (stmt);
2538   gsirhs = gsi_for_stmt (binrhs);
2539   gsi_move_before (&gsirhs, &gsinow);
2540
2541   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2542   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2543   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2544
2545   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2546     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2547
2548   if (dump_file && (dump_flags & TDF_DETAILS))
2549     {
2550       fprintf (dump_file, "Linearized: ");
2551       print_gimple_stmt (dump_file, stmt, 0, 0);
2552     }
2553
2554   reassociate_stats.linearized++;
2555   update_stmt (binrhs);
2556   update_stmt (binlhs);
2557   update_stmt (stmt);
2558
2559   gimple_set_visited (stmt, true);
2560   gimple_set_visited (binlhs, true);
2561   gimple_set_visited (binrhs, true);
2562
2563   /* Tail recurse on the new rhs if it still needs reassociation.  */
2564   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2565     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2566            want to change the algorithm while converting to tuples.  */
2567     linearize_expr (stmt);
2568 }
2569
2570 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2571    it.  Otherwise, return NULL.  */
2572
2573 static gimple
2574 get_single_immediate_use (tree lhs)
2575 {
2576   use_operand_p immuse;
2577   gimple immusestmt;
2578
2579   if (TREE_CODE (lhs) == SSA_NAME
2580       && single_imm_use (lhs, &immuse, &immusestmt)
2581       && is_gimple_assign (immusestmt))
2582     return immusestmt;
2583
2584   return NULL;
2585 }
2586
2587 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2588    representing the negated value.  Insertions of any necessary
2589    instructions go before GSI.
2590    This function is recursive in that, if you hand it "a_5" as the
2591    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2592    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
2593
2594 static tree
2595 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2596 {
2597   gimple negatedefstmt= NULL;
2598   tree resultofnegate;
2599
2600   /* If we are trying to negate a name, defined by an add, negate the
2601      add operands instead.  */
2602   if (TREE_CODE (tonegate) == SSA_NAME)
2603     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2604   if (TREE_CODE (tonegate) == SSA_NAME
2605       && is_gimple_assign (negatedefstmt)
2606       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2607       && has_single_use (gimple_assign_lhs (negatedefstmt))
2608       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2609     {
2610       gimple_stmt_iterator gsi;
2611       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2612       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2613
2614       gsi = gsi_for_stmt (negatedefstmt);
2615       rhs1 = negate_value (rhs1, &gsi);
2616       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2617
2618       gsi = gsi_for_stmt (negatedefstmt);
2619       rhs2 = negate_value (rhs2, &gsi);
2620       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2621
2622       update_stmt (negatedefstmt);
2623       return gimple_assign_lhs (negatedefstmt);
2624     }
2625
2626   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2627   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2628                                              NULL_TREE, true, GSI_SAME_STMT);
2629   return resultofnegate;
2630 }
2631
2632 /* Return true if we should break up the subtract in STMT into an add
2633    with negate.  This is true when we the subtract operands are really
2634    adds, or the subtract itself is used in an add expression.  In
2635    either case, breaking up the subtract into an add with negate
2636    exposes the adds to reassociation.  */
2637
2638 static bool
2639 should_break_up_subtract (gimple stmt)
2640 {
2641   tree lhs = gimple_assign_lhs (stmt);
2642   tree binlhs = gimple_assign_rhs1 (stmt);
2643   tree binrhs = gimple_assign_rhs2 (stmt);
2644   gimple immusestmt;
2645   struct loop *loop = loop_containing_stmt (stmt);
2646
2647   if (TREE_CODE (binlhs) == SSA_NAME
2648       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2649     return true;
2650
2651   if (TREE_CODE (binrhs) == SSA_NAME
2652       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2653     return true;
2654
2655   if (TREE_CODE (lhs) == SSA_NAME
2656       && (immusestmt = get_single_immediate_use (lhs))
2657       && is_gimple_assign (immusestmt)
2658       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2659           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2660     return true;
2661   return false;
2662 }
2663
2664 /* Transform STMT from A - B into A + -B.  */
2665
2666 static void
2667 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2668 {
2669   tree rhs1 = gimple_assign_rhs1 (stmt);
2670   tree rhs2 = gimple_assign_rhs2 (stmt);
2671
2672   if (dump_file && (dump_flags & TDF_DETAILS))
2673     {
2674       fprintf (dump_file, "Breaking up subtract ");
2675       print_gimple_stmt (dump_file, stmt, 0, 0);
2676     }
2677
2678   rhs2 = negate_value (rhs2, gsip);
2679   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2680   update_stmt (stmt);
2681 }
2682
2683 /* Determine whether STMT is a builtin call that raises an SSA name
2684    to an integer power and has only one use.  If so, and this is early
2685    reassociation and unsafe math optimizations are permitted, place
2686    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
2687    If any of these conditions does not hold, return FALSE.  */
2688
2689 static bool
2690 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
2691 {
2692   tree fndecl, arg1;
2693   REAL_VALUE_TYPE c, cint;
2694
2695   if (!first_pass_instance
2696       || !flag_unsafe_math_optimizations
2697       || !is_gimple_call (stmt)
2698       || !has_single_use (gimple_call_lhs (stmt)))
2699     return false;
2700
2701   fndecl = gimple_call_fndecl (stmt);
2702
2703   if (!fndecl
2704       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
2705     return false;
2706
2707   switch (DECL_FUNCTION_CODE (fndecl))
2708     {
2709     CASE_FLT_FN (BUILT_IN_POW):
2710       *base = gimple_call_arg (stmt, 0);
2711       arg1 = gimple_call_arg (stmt, 1);
2712
2713       if (TREE_CODE (arg1) != REAL_CST)
2714         return false;
2715
2716       c = TREE_REAL_CST (arg1);
2717
2718       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
2719         return false;
2720
2721       *exponent = real_to_integer (&c);
2722       real_from_integer (&cint, VOIDmode, *exponent,
2723                          *exponent < 0 ? -1 : 0, 0);
2724       if (!real_identical (&c, &cint))
2725         return false;
2726
2727       break;
2728
2729     CASE_FLT_FN (BUILT_IN_POWI):
2730       *base = gimple_call_arg (stmt, 0);
2731       arg1 = gimple_call_arg (stmt, 1);
2732
2733       if (!host_integerp (arg1, 0))
2734         return false;
2735
2736       *exponent = TREE_INT_CST_LOW (arg1);
2737       break;
2738
2739     default:
2740       return false;
2741     }
2742
2743   /* Expanding negative exponents is generally unproductive, so we don't
2744      complicate matters with those.  Exponents of zero and one should
2745      have been handled by expression folding.  */
2746   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
2747     return false;
2748
2749   return true;
2750 }
2751
2752 /* Recursively linearize a binary expression that is the RHS of STMT.
2753    Place the operands of the expression tree in the vector named OPS.  */
2754
2755 static void
2756 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2757                      bool is_associative, bool set_visited)
2758 {
2759   tree binlhs = gimple_assign_rhs1 (stmt);
2760   tree binrhs = gimple_assign_rhs2 (stmt);
2761   gimple binlhsdef = NULL, binrhsdef = NULL;
2762   bool binlhsisreassoc = false;
2763   bool binrhsisreassoc = false;
2764   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2765   struct loop *loop = loop_containing_stmt (stmt);
2766   tree base = NULL_TREE;
2767   HOST_WIDE_INT exponent = 0;
2768
2769   if (set_visited)
2770     gimple_set_visited (stmt, true);
2771
2772   if (TREE_CODE (binlhs) == SSA_NAME)
2773     {
2774       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2775       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2776                          && !stmt_could_throw_p (binlhsdef));
2777     }
2778
2779   if (TREE_CODE (binrhs) == SSA_NAME)
2780     {
2781       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2782       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2783                          && !stmt_could_throw_p (binrhsdef));
2784     }
2785
2786   /* If the LHS is not reassociable, but the RHS is, we need to swap
2787      them.  If neither is reassociable, there is nothing we can do, so
2788      just put them in the ops vector.  If the LHS is reassociable,
2789      linearize it.  If both are reassociable, then linearize the RHS
2790      and the LHS.  */
2791
2792   if (!binlhsisreassoc)
2793     {
2794       tree temp;
2795
2796       /* If this is not a associative operation like division, give up.  */
2797       if (!is_associative)
2798         {
2799           add_to_ops_vec (ops, binrhs);
2800           return;
2801         }
2802
2803       if (!binrhsisreassoc)
2804         {
2805           if (rhscode == MULT_EXPR
2806               && TREE_CODE (binrhs) == SSA_NAME
2807               && acceptable_pow_call (binrhsdef, &base, &exponent))
2808             {
2809               add_repeat_to_ops_vec (ops, base, exponent);
2810               gimple_set_visited (binrhsdef, true);
2811             }
2812           else
2813             add_to_ops_vec (ops, binrhs);
2814
2815           if (rhscode == MULT_EXPR
2816               && TREE_CODE (binlhs) == SSA_NAME
2817               && acceptable_pow_call (binlhsdef, &base, &exponent))
2818             {
2819               add_repeat_to_ops_vec (ops, base, exponent);
2820               gimple_set_visited (binlhsdef, true);
2821             }
2822           else
2823             add_to_ops_vec (ops, binlhs);
2824
2825           return;
2826         }
2827
2828       if (dump_file && (dump_flags & TDF_DETAILS))
2829         {
2830           fprintf (dump_file, "swapping operands of ");
2831           print_gimple_stmt (dump_file, stmt, 0, 0);
2832         }
2833
2834       swap_tree_operands (stmt,
2835                           gimple_assign_rhs1_ptr (stmt),
2836                           gimple_assign_rhs2_ptr (stmt));
2837       update_stmt (stmt);
2838
2839       if (dump_file && (dump_flags & TDF_DETAILS))
2840         {
2841           fprintf (dump_file, " is now ");
2842           print_gimple_stmt (dump_file, stmt, 0, 0);
2843         }
2844
2845       /* We want to make it so the lhs is always the reassociative op,
2846          so swap.  */
2847       temp = binlhs;
2848       binlhs = binrhs;
2849       binrhs = temp;
2850     }
2851   else if (binrhsisreassoc)
2852     {
2853       linearize_expr (stmt);
2854       binlhs = gimple_assign_rhs1 (stmt);
2855       binrhs = gimple_assign_rhs2 (stmt);
2856     }
2857
2858   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2859               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2860                                       rhscode, loop));
2861   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2862                        is_associative, set_visited);
2863
2864   if (rhscode == MULT_EXPR
2865       && TREE_CODE (binrhs) == SSA_NAME
2866       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
2867     {
2868       add_repeat_to_ops_vec (ops, base, exponent);
2869       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
2870     }
2871   else
2872     add_to_ops_vec (ops, binrhs);
2873 }
2874
2875 /* Repropagate the negates back into subtracts, since no other pass
2876    currently does it.  */
2877
2878 static void
2879 repropagate_negates (void)
2880 {
2881   unsigned int i = 0;
2882   tree negate;
2883
2884   FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2885     {
2886       gimple user = get_single_immediate_use (negate);
2887
2888       if (!user || !is_gimple_assign (user))
2889         continue;
2890
2891       /* The negate operand can be either operand of a PLUS_EXPR
2892          (it can be the LHS if the RHS is a constant for example).
2893
2894          Force the negate operand to the RHS of the PLUS_EXPR, then
2895          transform the PLUS_EXPR into a MINUS_EXPR.  */
2896       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2897         {
2898           /* If the negated operand appears on the LHS of the
2899              PLUS_EXPR, exchange the operands of the PLUS_EXPR
2900              to force the negated operand to the RHS of the PLUS_EXPR.  */
2901           if (gimple_assign_rhs1 (user) == negate)
2902             {
2903               swap_tree_operands (user,
2904                                   gimple_assign_rhs1_ptr (user),
2905                                   gimple_assign_rhs2_ptr (user));
2906             }
2907
2908           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2909              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
2910           if (gimple_assign_rhs2 (user) == negate)
2911             {
2912               tree rhs1 = gimple_assign_rhs1 (user);
2913               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2914               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2915               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2916               update_stmt (user);
2917             }
2918         }
2919       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2920         {
2921           if (gimple_assign_rhs1 (user) == negate)
2922             {
2923               /* We have
2924                    x = -a
2925                    y = x - b
2926                  which we transform into
2927                    x = a + b
2928                    y = -x .
2929                  This pushes down the negate which we possibly can merge
2930                  into some other operation, hence insert it into the
2931                  plus_negates vector.  */
2932               gimple feed = SSA_NAME_DEF_STMT (negate);
2933               tree a = gimple_assign_rhs1 (feed);
2934               tree rhs2 = gimple_assign_rhs2 (user);
2935               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2936               gimple_replace_lhs (feed, negate);
2937               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2938               update_stmt (gsi_stmt (gsi));
2939               gsi2 = gsi_for_stmt (user);
2940               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2941               update_stmt (gsi_stmt (gsi2));
2942               gsi_move_before (&gsi, &gsi2);
2943               VEC_safe_push (tree, heap, plus_negates,
2944                              gimple_assign_lhs (gsi_stmt (gsi2)));
2945             }
2946           else
2947             {
2948               /* Transform "x = -a; y = b - x" into "y = b + a", getting
2949                  rid of one operation.  */
2950               gimple feed = SSA_NAME_DEF_STMT (negate);
2951               tree a = gimple_assign_rhs1 (feed);
2952               tree rhs1 = gimple_assign_rhs1 (user);
2953               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2954               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2955               update_stmt (gsi_stmt (gsi));
2956             }
2957         }
2958     }
2959 }
2960
2961 /* Returns true if OP is of a type for which we can do reassociation.
2962    That is for integral or non-saturating fixed-point types, and for
2963    floating point type when associative-math is enabled.  */
2964
2965 static bool
2966 can_reassociate_p (tree op)
2967 {
2968   tree type = TREE_TYPE (op);
2969   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2970       || NON_SAT_FIXED_POINT_TYPE_P (type)
2971       || (flag_associative_math && FLOAT_TYPE_P (type)))
2972     return true;
2973   return false;
2974 }
2975
2976 /* Break up subtract operations in block BB.
2977
2978    We do this top down because we don't know whether the subtract is
2979    part of a possible chain of reassociation except at the top.
2980
2981    IE given
2982    d = f + g
2983    c = a + e
2984    b = c - d
2985    q = b - r
2986    k = t - q
2987
2988    we want to break up k = t - q, but we won't until we've transformed q
2989    = b - r, which won't be broken up until we transform b = c - d.
2990
2991    En passant, clear the GIMPLE visited flag on every statement.  */
2992
2993 static void
2994 break_up_subtract_bb (basic_block bb)
2995 {
2996   gimple_stmt_iterator gsi;
2997   basic_block son;
2998
2999   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3000     {
3001       gimple stmt = gsi_stmt (gsi);
3002       gimple_set_visited (stmt, false);
3003
3004       if (!is_gimple_assign (stmt)
3005           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3006         continue;
3007
3008       /* Look for simple gimple subtract operations.  */
3009       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3010         {
3011           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3012               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3013             continue;
3014
3015           /* Check for a subtract used only in an addition.  If this
3016              is the case, transform it into add of a negate for better
3017              reassociation.  IE transform C = A-B into C = A + -B if C
3018              is only used in an addition.  */
3019           if (should_break_up_subtract (stmt))
3020             break_up_subtract (stmt, &gsi);
3021         }
3022       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3023                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3024         VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
3025     }
3026   for (son = first_dom_son (CDI_DOMINATORS, bb);
3027        son;
3028        son = next_dom_son (CDI_DOMINATORS, son))
3029     break_up_subtract_bb (son);
3030 }
3031
3032 /* Used for repeated factor analysis.  */
3033 struct repeat_factor_d
3034 {
3035   /* An SSA name that occurs in a multiply chain.  */
3036   tree factor;
3037
3038   /* Cached rank of the factor.  */
3039   unsigned rank;
3040
3041   /* Number of occurrences of the factor in the chain.  */
3042   HOST_WIDE_INT count;
3043
3044   /* An SSA name representing the product of this factor and
3045      all factors appearing later in the repeated factor vector.  */
3046   tree repr;
3047 };
3048
3049 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3050 typedef const struct repeat_factor_d *const_repeat_factor_t;
3051
3052 DEF_VEC_O (repeat_factor);
3053 DEF_VEC_ALLOC_O (repeat_factor, heap);
3054
3055 static VEC (repeat_factor, heap) *repeat_factor_vec;
3056
3057 /* Used for sorting the repeat factor vector.  Sort primarily by
3058    ascending occurrence count, secondarily by descending rank.  */
3059
3060 static int
3061 compare_repeat_factors (const void *x1, const void *x2)
3062 {
3063   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3064   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3065
3066   if (rf1->count != rf2->count)
3067     return rf1->count - rf2->count;
3068
3069   return rf2->rank - rf1->rank;
3070 }
3071
3072 /* Look for repeated operands in OPS in the multiply tree rooted at
3073    STMT.  Replace them with an optimal sequence of multiplies and powi
3074    builtin calls, and remove the used operands from OPS.  Return an
3075    SSA name representing the value of the replacement sequence.  */
3076
3077 static tree
3078 attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops)
3079 {
3080   unsigned i, j, vec_len;
3081   int ii;
3082   operand_entry_t oe;
3083   repeat_factor_t rf1, rf2;
3084   repeat_factor rfnew;
3085   tree result = NULL_TREE;
3086   tree target_ssa, iter_result;
3087   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3088   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3089   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3090   gimple mul_stmt, pow_stmt;
3091
3092   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3093      target.  */
3094   if (!powi_fndecl)
3095     return NULL_TREE;
3096
3097   /* Allocate the repeated factor vector.  */
3098   repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10);
3099
3100   /* Scan the OPS vector for all SSA names in the product and build
3101      up a vector of occurrence counts for each factor.  */
3102   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
3103     {
3104       if (TREE_CODE (oe->op) == SSA_NAME)
3105         {
3106           FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3107             {
3108               if (rf1->factor == oe->op)
3109                 {
3110                   rf1->count += oe->count;
3111                   break;
3112                 }
3113             }
3114
3115           if (j >= VEC_length (repeat_factor, repeat_factor_vec))
3116             {
3117               rfnew.factor = oe->op;
3118               rfnew.rank = oe->rank;
3119               rfnew.count = oe->count;
3120               rfnew.repr = NULL_TREE;
3121               VEC_safe_push (repeat_factor, heap, repeat_factor_vec, rfnew);
3122             }
3123         }
3124     }
3125
3126   /* Sort the repeated factor vector by (a) increasing occurrence count,
3127      and (b) decreasing rank.  */
3128   VEC_qsort (repeat_factor, repeat_factor_vec, compare_repeat_factors);
3129
3130   /* It is generally best to combine as many base factors as possible
3131      into a product before applying __builtin_powi to the result.
3132      However, the sort order chosen for the repeated factor vector
3133      allows us to cache partial results for the product of the base
3134      factors for subsequent use.  When we already have a cached partial
3135      result from a previous iteration, it is best to make use of it
3136      before looking for another __builtin_pow opportunity.
3137
3138      As an example, consider x * x * y * y * y * z * z * z * z.
3139      We want to first compose the product x * y * z, raise it to the
3140      second power, then multiply this by y * z, and finally multiply
3141      by z.  This can be done in 5 multiplies provided we cache y * z
3142      for use in both expressions:
3143
3144         t1 = y * z
3145         t2 = t1 * x
3146         t3 = t2 * t2
3147         t4 = t1 * t3
3148         result = t4 * z
3149
3150      If we instead ignored the cached y * z and first multiplied by
3151      the __builtin_pow opportunity z * z, we would get the inferior:
3152
3153         t1 = y * z
3154         t2 = t1 * x
3155         t3 = t2 * t2
3156         t4 = z * z
3157         t5 = t3 * t4
3158         result = t5 * y  */
3159
3160   vec_len = VEC_length (repeat_factor, repeat_factor_vec);
3161
3162   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3163   while (true)
3164     {
3165       HOST_WIDE_INT power;
3166
3167       /* First look for the largest cached product of factors from
3168          preceding iterations.  If found, create a builtin_powi for
3169          it if the minimum occurrence count for its factors is at
3170          least 2, or just use this cached product as our next
3171          multiplicand if the minimum occurrence count is 1.  */
3172       FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3173         {
3174           if (rf1->repr && rf1->count > 0)
3175             break;
3176         }
3177
3178       if (j < vec_len)
3179         {
3180           power = rf1->count;
3181
3182           if (power == 1)
3183             {
3184               iter_result = rf1->repr;
3185
3186               if (dump_file && (dump_flags & TDF_DETAILS))
3187                 {
3188                   unsigned elt;
3189                   repeat_factor_t rf;
3190                   fputs ("Multiplying by cached product ", dump_file);
3191                   for (elt = j; elt < vec_len; elt++)
3192                     {
3193                       rf = &VEC_index (repeat_factor, repeat_factor_vec, elt);
3194                       print_generic_expr (dump_file, rf->factor, 0);
3195                       if (elt < vec_len - 1)
3196                         fputs (" * ", dump_file);
3197                     }
3198                   fputs ("\n", dump_file);
3199                 }
3200             }
3201           else
3202             {
3203               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3204               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3205                                             build_int_cst (integer_type_node,
3206                                                            power));
3207               gimple_call_set_lhs (pow_stmt, iter_result);
3208               gimple_set_location (pow_stmt, gimple_location (stmt));
3209               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3210
3211               if (dump_file && (dump_flags & TDF_DETAILS))
3212                 {
3213                   unsigned elt;
3214                   repeat_factor_t rf;
3215                   fputs ("Building __builtin_pow call for cached product (",
3216                          dump_file);
3217                   for (elt = j; elt < vec_len; elt++)
3218                     {
3219                       rf = &VEC_index (repeat_factor, repeat_factor_vec, elt);
3220                       print_generic_expr (dump_file, rf->factor, 0);
3221                       if (elt < vec_len - 1)
3222                         fputs (" * ", dump_file);
3223                     }
3224                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3225                            power);
3226                 }
3227             }
3228         }
3229       else
3230         {
3231           /* Otherwise, find the first factor in the repeated factor
3232              vector whose occurrence count is at least 2.  If no such
3233              factor exists, there are no builtin_powi opportunities
3234              remaining.  */
3235           FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3236             {
3237               if (rf1->count >= 2)
3238                 break;
3239             }
3240
3241           if (j >= vec_len)
3242             break;
3243
3244           power = rf1->count;
3245
3246           if (dump_file && (dump_flags & TDF_DETAILS))
3247             {
3248               unsigned elt;
3249               repeat_factor_t rf;
3250               fputs ("Building __builtin_pow call for (", dump_file);
3251               for (elt = j; elt < vec_len; elt++)
3252                 {
3253                   rf = &VEC_index (repeat_factor, repeat_factor_vec, elt);
3254                   print_generic_expr (dump_file, rf->factor, 0);
3255                   if (elt < vec_len - 1)
3256                     fputs (" * ", dump_file);
3257                 }
3258               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3259             }
3260
3261           reassociate_stats.pows_created++;
3262
3263           /* Visit each element of the vector in reverse order (so that
3264              high-occurrence elements are visited first, and within the
3265              same occurrence count, lower-ranked elements are visited
3266              first).  Form a linear product of all elements in this order
3267              whose occurrencce count is at least that of element J.
3268              Record the SSA name representing the product of each element
3269              with all subsequent elements in the vector.  */
3270           if (j == vec_len - 1)
3271             rf1->repr = rf1->factor;
3272           else
3273             {
3274               for (ii = vec_len - 2; ii >= (int)j; ii--)
3275                 {
3276                   tree op1, op2;
3277
3278                   rf1 = &VEC_index (repeat_factor, repeat_factor_vec, ii);
3279                   rf2 = &VEC_index (repeat_factor, repeat_factor_vec, ii + 1);
3280
3281                   /* Init the last factor's representative to be itself.  */
3282                   if (!rf2->repr)
3283                     rf2->repr = rf2->factor;
3284
3285                   op1 = rf1->factor;
3286                   op2 = rf2->repr;
3287
3288                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3289                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3290                                                            target_ssa,
3291                                                            op1, op2);
3292                   gimple_set_location (mul_stmt, gimple_location (stmt));
3293                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3294                   rf1->repr = target_ssa;
3295
3296                   /* Don't reprocess the multiply we just introduced.  */
3297                   gimple_set_visited (mul_stmt, true);
3298                 }
3299             }
3300
3301           /* Form a call to __builtin_powi for the maximum product
3302              just formed, raised to the power obtained earlier.  */
3303           rf1 = &VEC_index (repeat_factor, repeat_factor_vec, j);
3304           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3305           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3306                                         build_int_cst (integer_type_node,
3307                                                        power));
3308           gimple_call_set_lhs (pow_stmt, iter_result);
3309           gimple_set_location (pow_stmt, gimple_location (stmt));
3310           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3311         }
3312
3313       /* If we previously formed at least one other builtin_powi call,
3314          form the product of this one and those others.  */
3315       if (result)
3316         {
3317           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3318           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3319                                                    result, iter_result);
3320           gimple_set_location (mul_stmt, gimple_location (stmt));
3321           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3322           gimple_set_visited (mul_stmt, true);
3323           result = new_result;
3324         }
3325       else
3326         result = iter_result;
3327
3328       /* Decrement the occurrence count of each element in the product
3329          by the count found above, and remove this many copies of each
3330          factor from OPS.  */
3331       for (i = j; i < vec_len; i++)
3332         {
3333           unsigned k = power;
3334           unsigned n;
3335
3336           rf1 = &VEC_index (repeat_factor, repeat_factor_vec, i);
3337           rf1->count -= power;
3338
3339           FOR_EACH_VEC_ELT_REVERSE (operand_entry_t, *ops, n, oe)
3340             {
3341               if (oe->op == rf1->factor)
3342                 {
3343                   if (oe->count <= k)
3344                     {
3345                       VEC_ordered_remove (operand_entry_t, *ops, n);
3346                       k -= oe->count;
3347
3348                       if (k == 0)
3349                         break;
3350                     }
3351                   else
3352                     {
3353                       oe->count -= k;
3354                       break;
3355                     }
3356                 }
3357             }
3358         }
3359     }
3360
3361   /* At this point all elements in the repeated factor vector have a
3362      remaining occurrence count of 0 or 1, and those with a count of 1
3363      don't have cached representatives.  Re-sort the ops vector and
3364      clean up.  */
3365   VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank);
3366   VEC_free (repeat_factor, heap, repeat_factor_vec);
3367
3368   /* Return the final product computed herein.  Note that there may
3369      still be some elements with single occurrence count left in OPS;
3370      those will be handled by the normal reassociation logic.  */
3371   return result;
3372 }
3373
3374 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3375
3376 static void
3377 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3378 {
3379   tree rhs1;
3380
3381   if (dump_file && (dump_flags & TDF_DETAILS))
3382     {
3383       fprintf (dump_file, "Transforming ");
3384       print_gimple_stmt (dump_file, stmt, 0, 0);
3385     }
3386
3387   rhs1 = gimple_assign_rhs1 (stmt);
3388   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3389   update_stmt (stmt);
3390   remove_visited_stmt_chain (rhs1);
3391
3392   if (dump_file && (dump_flags & TDF_DETAILS))
3393     {
3394       fprintf (dump_file, " into ");
3395       print_gimple_stmt (dump_file, stmt, 0, 0);
3396     }
3397 }
3398
3399 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3400
3401 static void
3402 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3403                             tree rhs1, tree rhs2)
3404 {
3405   if (dump_file && (dump_flags & TDF_DETAILS))
3406     {
3407       fprintf (dump_file, "Transforming ");
3408       print_gimple_stmt (dump_file, stmt, 0, 0);
3409     }
3410
3411   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3412   update_stmt (gsi_stmt (*gsi));
3413   remove_visited_stmt_chain (rhs1);
3414
3415   if (dump_file && (dump_flags & TDF_DETAILS))
3416     {
3417       fprintf (dump_file, " into ");
3418       print_gimple_stmt (dump_file, stmt, 0, 0);
3419     }
3420 }
3421
3422 /* Reassociate expressions in basic block BB and its post-dominator as
3423    children.  */
3424
3425 static void
3426 reassociate_bb (basic_block bb)
3427 {
3428   gimple_stmt_iterator gsi;
3429   basic_block son;
3430
3431   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
3432     {
3433       gimple stmt = gsi_stmt (gsi);
3434
3435       if (is_gimple_assign (stmt)
3436           && !stmt_could_throw_p (stmt))
3437         {
3438           tree lhs, rhs1, rhs2;
3439           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
3440
3441           /* If this is not a gimple binary expression, there is
3442              nothing for us to do with it.  */
3443           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
3444             continue;
3445
3446           /* If this was part of an already processed statement,
3447              we don't need to touch it again. */
3448           if (gimple_visited_p (stmt))
3449             {
3450               /* This statement might have become dead because of previous
3451                  reassociations.  */
3452               if (has_zero_uses (gimple_get_lhs (stmt)))
3453                 {
3454                   gsi_remove (&gsi, true);
3455                   release_defs (stmt);
3456                   /* We might end up removing the last stmt above which
3457                      places the iterator to the end of the sequence.
3458                      Reset it to the last stmt in this case which might
3459                      be the end of the sequence as well if we removed
3460                      the last statement of the sequence.  In which case
3461                      we need to bail out.  */
3462                   if (gsi_end_p (gsi))
3463                     {
3464                       gsi = gsi_last_bb (bb);
3465                       if (gsi_end_p (gsi))
3466                         break;
3467                     }
3468                 }
3469               continue;
3470             }
3471
3472           lhs = gimple_assign_lhs (stmt);
3473           rhs1 = gimple_assign_rhs1 (stmt);
3474           rhs2 = gimple_assign_rhs2 (stmt);
3475
3476           /* For non-bit or min/max operations we can't associate
3477              all types.  Verify that here.  */
3478           if (rhs_code != BIT_IOR_EXPR
3479               && rhs_code != BIT_AND_EXPR
3480               && rhs_code != BIT_XOR_EXPR
3481               && rhs_code != MIN_EXPR
3482               && rhs_code != MAX_EXPR
3483               && (!can_reassociate_p (lhs)
3484                   || !can_reassociate_p (rhs1)
3485                   || !can_reassociate_p (rhs2)))
3486             continue;
3487
3488           if (associative_tree_code (rhs_code))
3489             {
3490               VEC(operand_entry_t, heap) *ops = NULL;
3491               tree powi_result = NULL_TREE;
3492
3493               /* There may be no immediate uses left by the time we
3494                  get here because we may have eliminated them all.  */
3495               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
3496                 continue;
3497
3498               gimple_set_visited (stmt, true);
3499               linearize_expr_tree (&ops, stmt, true, true);
3500               VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3501               optimize_ops_list (rhs_code, &ops);
3502               if (undistribute_ops_list (rhs_code, &ops,
3503                                          loop_containing_stmt (stmt)))
3504                 {
3505                   VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3506                   optimize_ops_list (rhs_code, &ops);
3507                 }
3508
3509               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
3510                 optimize_range_tests (rhs_code, &ops);
3511
3512               if (first_pass_instance
3513                   && rhs_code == MULT_EXPR
3514                   && flag_unsafe_math_optimizations)
3515                 powi_result = attempt_builtin_powi (stmt, &ops);
3516
3517               /* If the operand vector is now empty, all operands were
3518                  consumed by the __builtin_powi optimization.  */
3519               if (VEC_length (operand_entry_t, ops) == 0)
3520                 transform_stmt_to_copy (&gsi, stmt, powi_result);
3521               else if (VEC_length (operand_entry_t, ops) == 1)
3522                 {
3523                   tree last_op = VEC_last (operand_entry_t, ops)->op;
3524
3525                   if (powi_result)
3526                     transform_stmt_to_multiply (&gsi, stmt, last_op,
3527                                                 powi_result);
3528                   else
3529                     transform_stmt_to_copy (&gsi, stmt, last_op);
3530                 }
3531               else
3532                 {
3533                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
3534                   int ops_num = VEC_length (operand_entry_t, ops);
3535                   int width = get_reassociation_width (ops_num, rhs_code, mode);
3536
3537                   if (dump_file && (dump_flags & TDF_DETAILS))
3538                     fprintf (dump_file,
3539                              "Width = %d was chosen for reassociation\n", width);
3540
3541                   if (width > 1
3542                       && VEC_length (operand_entry_t, ops) > 3)
3543                     rewrite_expr_tree_parallel (stmt, width, ops);
3544                   else
3545                     rewrite_expr_tree (stmt, 0, ops, false);
3546
3547                   /* If we combined some repeated factors into a
3548                      __builtin_powi call, multiply that result by the
3549                      reassociated operands.  */
3550                   if (powi_result)
3551                     {
3552                       gimple mul_stmt;
3553                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
3554                       tree target_ssa = make_temp_ssa_name (type, NULL,
3555                                                             "reassocpow");
3556                       gimple_set_lhs (stmt, target_ssa);
3557                       update_stmt (stmt);
3558                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
3559                                                                powi_result,
3560                                                                target_ssa);
3561                       gimple_set_location (mul_stmt, gimple_location (stmt));
3562                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
3563                     }
3564                 }
3565
3566               VEC_free (operand_entry_t, heap, ops);
3567             }
3568         }
3569     }
3570   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
3571        son;
3572        son = next_dom_son (CDI_POST_DOMINATORS, son))
3573     reassociate_bb (son);
3574 }
3575
3576 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
3577 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
3578
3579 /* Dump the operand entry vector OPS to FILE.  */
3580
3581 void
3582 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
3583 {
3584   operand_entry_t oe;
3585   unsigned int i;
3586
3587   FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
3588     {
3589       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
3590       print_generic_expr (file, oe->op, 0);
3591     }
3592 }
3593
3594 /* Dump the operand entry vector OPS to STDERR.  */
3595
3596 DEBUG_FUNCTION void
3597 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
3598 {
3599   dump_ops_vector (stderr, ops);
3600 }
3601
3602 static void
3603 do_reassoc (void)
3604 {
3605   break_up_subtract_bb (ENTRY_BLOCK_PTR);
3606   reassociate_bb (EXIT_BLOCK_PTR);
3607 }
3608
3609 /* Initialize the reassociation pass.  */
3610
3611 static void
3612 init_reassoc (void)
3613 {
3614   int i;
3615   long rank = 2;
3616   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
3617
3618   /* Find the loops, so that we can prevent moving calculations in
3619      them.  */
3620   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3621
3622   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3623
3624   operand_entry_pool = create_alloc_pool ("operand entry pool",
3625                                           sizeof (struct operand_entry), 30);
3626   next_operand_entry_id = 0;
3627
3628   /* Reverse RPO (Reverse Post Order) will give us something where
3629      deeper loops come later.  */
3630   pre_and_rev_post_order_compute (NULL, bbs, false);
3631   bb_rank = XCNEWVEC (long, last_basic_block);
3632   operand_rank = pointer_map_create ();
3633
3634   /* Give each default definition a distinct rank.  This includes
3635      parameters and the static chain.  Walk backwards over all
3636      SSA names so that we get proper rank ordering according
3637      to tree_swap_operands_p.  */
3638   for (i = num_ssa_names - 1; i > 0; --i)
3639     {
3640       tree name = ssa_name (i);
3641       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
3642         insert_operand_rank (name, ++rank);
3643     }
3644
3645   /* Set up rank for each BB  */
3646   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3647     bb_rank[bbs[i]] = ++rank  << 16;
3648
3649   free (bbs);
3650   calculate_dominance_info (CDI_POST_DOMINATORS);
3651   plus_negates = NULL;
3652 }
3653
3654 /* Cleanup after the reassociation pass, and print stats if
3655    requested.  */
3656
3657 static void
3658 fini_reassoc (void)
3659 {
3660   statistics_counter_event (cfun, "Linearized",
3661                             reassociate_stats.linearized);
3662   statistics_counter_event (cfun, "Constants eliminated",
3663                             reassociate_stats.constants_eliminated);
3664   statistics_counter_event (cfun, "Ops eliminated",
3665                             reassociate_stats.ops_eliminated);
3666   statistics_counter_event (cfun, "Statements rewritten",
3667                             reassociate_stats.rewritten);
3668   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
3669                             reassociate_stats.pows_encountered);
3670   statistics_counter_event (cfun, "Built-in powi calls created",
3671                             reassociate_stats.pows_created);
3672
3673   pointer_map_destroy (operand_rank);
3674   free_alloc_pool (operand_entry_pool);
3675   free (bb_rank);
3676   VEC_free (tree, heap, plus_negates);
3677   free_dominance_info (CDI_POST_DOMINATORS);
3678   loop_optimizer_finalize ();
3679 }
3680
3681 /* Gate and execute functions for Reassociation.  */
3682
3683 static unsigned int
3684 execute_reassoc (void)
3685 {
3686   init_reassoc ();
3687
3688   do_reassoc ();
3689   repropagate_negates ();
3690
3691   fini_reassoc ();
3692   return 0;
3693 }
3694
3695 static bool
3696 gate_tree_ssa_reassoc (void)
3697 {
3698   return flag_tree_reassoc != 0;
3699 }
3700
3701 struct gimple_opt_pass pass_reassoc =
3702 {
3703  {
3704   GIMPLE_PASS,
3705   "reassoc",                            /* name */
3706   gate_tree_ssa_reassoc,                /* gate */
3707   execute_reassoc,                      /* execute */
3708   NULL,                                 /* sub */
3709   NULL,                                 /* next */
3710   0,                                    /* static_pass_number */
3711   TV_TREE_REASSOC,                      /* tv_id */
3712   PROP_cfg | PROP_ssa,                  /* properties_required */
3713   0,                                    /* properties_provided */
3714   0,                                    /* properties_destroyed */
3715   0,                                    /* todo_flags_start */
3716   TODO_verify_ssa
3717     | TODO_verify_flow
3718     | TODO_ggc_collect                  /* todo_flags_finish */
3719  }
3720 };