re PR target/55981 (std::atomic store is split in two smaller stores)
[gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "basic-block.h"
27 #include "gimple-pretty-print.h"
28 #include "tree-inline.h"
29 #include "tree-flow.h"
30 #include "gimple.h"
31 #include "tree-iterator.h"
32 #include "tree-pass.h"
33 #include "alloc-pool.h"
34 #include "vec.h"
35 #include "langhooks.h"
36 #include "pointer-set.h"
37 #include "cfgloop.h"
38 #include "flags.h"
39 #include "target.h"
40 #include "params.h"
41 #include "diagnostic-core.h"
42
43 /* This is a simple global reassociation pass. It is, in part, based
44 on the LLVM pass of the same name (They do some things more/less
45 than we do, in different orders, etc).
46
47 It consists of five steps:
48
49 1. Breaking up subtract operations into addition + negate, where
50 it would promote the reassociation of adds.
51
52 2. Left linearization of the expression trees, so that (A+B)+(C+D)
53 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
54 During linearization, we place the operands of the binary
55 expressions into a vector of operand_entry_t
56
57 3. Optimization of the operand lists, eliminating things like a +
58 -a, a & a, etc.
59
60 3a. Combine repeated factors with the same occurrence counts
61 into a __builtin_powi call that will later be optimized into
62 an optimal number of multiplies.
63
64 4. Rewrite the expression trees we linearized and optimized so
65 they are in proper rank order.
66
67 5. Repropagate negates, as nothing else will clean it up ATM.
68
69 A bit of theory on #4, since nobody seems to write anything down
70 about why it makes sense to do it the way they do it:
71
72 We could do this much nicer theoretically, but don't (for reasons
73 explained after how to do it theoretically nice :P).
74
75 In order to promote the most redundancy elimination, you want
76 binary expressions whose operands are the same rank (or
77 preferably, the same value) exposed to the redundancy eliminator,
78 for possible elimination.
79
80 So the way to do this if we really cared, is to build the new op
81 tree from the leaves to the roots, merging as you go, and putting the
82 new op on the end of the worklist, until you are left with one
83 thing on the worklist.
84
85 IE if you have to rewrite the following set of operands (listed with
86 rank in parentheses), with opcode PLUS_EXPR:
87
88 a (1), b (1), c (1), d (2), e (2)
89
90
91 We start with our merge worklist empty, and the ops list with all of
92 those on it.
93
94 You want to first merge all leaves of the same rank, as much as
95 possible.
96
97 So first build a binary op of
98
99 mergetmp = a + b, and put "mergetmp" on the merge worklist.
100
101 Because there is no three operand form of PLUS_EXPR, c is not going to
102 be exposed to redundancy elimination as a rank 1 operand.
103
104 So you might as well throw it on the merge worklist (you could also
105 consider it to now be a rank two operand, and merge it with d and e,
106 but in this case, you then have evicted e from a binary op. So at
107 least in this situation, you can't win.)
108
109 Then build a binary op of d + e
110 mergetmp2 = d + e
111
112 and put mergetmp2 on the merge worklist.
113
114 so merge worklist = {mergetmp, c, mergetmp2}
115
116 Continue building binary ops of these operations until you have only
117 one operation left on the worklist.
118
119 So we have
120
121 build binary op
122 mergetmp3 = mergetmp + c
123
124 worklist = {mergetmp2, mergetmp3}
125
126 mergetmp4 = mergetmp2 + mergetmp3
127
128 worklist = {mergetmp4}
129
130 because we have one operation left, we can now just set the original
131 statement equal to the result of that operation.
132
133 This will at least expose a + b and d + e to redundancy elimination
134 as binary operations.
135
136 For extra points, you can reuse the old statements to build the
137 mergetmps, since you shouldn't run out.
138
139 So why don't we do this?
140
141 Because it's expensive, and rarely will help. Most trees we are
142 reassociating have 3 or less ops. If they have 2 ops, they already
143 will be written into a nice single binary op. If you have 3 ops, a
144 single simple check suffices to tell you whether the first two are of the
145 same rank. If so, you know to order it
146
147 mergetmp = op1 + op2
148 newstmt = mergetmp + op3
149
150 instead of
151 mergetmp = op2 + op3
152 newstmt = mergetmp + op1
153
154 If all three are of the same rank, you can't expose them all in a
155 single binary operator anyway, so the above is *still* the best you
156 can do.
157
158 Thus, this is what we do. When we have three ops left, we check to see
159 what order to put them in, and call it a day. As a nod to vector sum
160 reduction, we check if any of the ops are really a phi node that is a
161 destructive update for the associating op, and keep the destructive
162 update together for vector sum reduction recognition. */
163
164
165 /* Statistics */
166 static struct
167 {
168 int linearized;
169 int constants_eliminated;
170 int ops_eliminated;
171 int rewritten;
172 int pows_encountered;
173 int pows_created;
174 } reassociate_stats;
175
176 /* Operator, rank pair. */
177 typedef struct operand_entry
178 {
179 unsigned int rank;
180 int id;
181 tree op;
182 unsigned int count;
183 } *operand_entry_t;
184
185 static alloc_pool operand_entry_pool;
186
187 /* This is used to assign a unique ID to each struct operand_entry
188 so that qsort results are identical on different hosts. */
189 static int next_operand_entry_id;
190
191 /* Starting rank number for a given basic block, so that we can rank
192 operations using unmovable instructions in that BB based on the bb
193 depth. */
194 static long *bb_rank;
195
196 /* Operand->rank hashtable. */
197 static struct pointer_map_t *operand_rank;
198
199 /* Forward decls. */
200 static long get_rank (tree);
201
202
203 /* Bias amount for loop-carried phis. We want this to be larger than
204 the depth of any reassociation tree we can see, but not larger than
205 the rank difference between two blocks. */
206 #define PHI_LOOP_BIAS (1 << 15)
207
208 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
209 an innermost loop, and the phi has only a single use which is inside
210 the loop, then the rank is the block rank of the loop latch plus an
211 extra bias for the loop-carried dependence. This causes expressions
212 calculated into an accumulator variable to be independent for each
213 iteration of the loop. If STMT is some other phi, the rank is the
214 block rank of its containing block. */
215 static long
216 phi_rank (gimple stmt)
217 {
218 basic_block bb = gimple_bb (stmt);
219 struct loop *father = bb->loop_father;
220 tree res;
221 unsigned i;
222 use_operand_p use;
223 gimple use_stmt;
224
225 /* We only care about real loops (those with a latch). */
226 if (!father->latch)
227 return bb_rank[bb->index];
228
229 /* Interesting phis must be in headers of innermost loops. */
230 if (bb != father->header
231 || father->inner)
232 return bb_rank[bb->index];
233
234 /* Ignore virtual SSA_NAMEs. */
235 res = gimple_phi_result (stmt);
236 if (virtual_operand_p (res))
237 return bb_rank[bb->index];
238
239 /* The phi definition must have a single use, and that use must be
240 within the loop. Otherwise this isn't an accumulator pattern. */
241 if (!single_imm_use (res, &use, &use_stmt)
242 || gimple_bb (use_stmt)->loop_father != father)
243 return bb_rank[bb->index];
244
245 /* Look for phi arguments from within the loop. If found, bias this phi. */
246 for (i = 0; i < gimple_phi_num_args (stmt); i++)
247 {
248 tree arg = gimple_phi_arg_def (stmt, i);
249 if (TREE_CODE (arg) == SSA_NAME
250 && !SSA_NAME_IS_DEFAULT_DEF (arg))
251 {
252 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
253 if (gimple_bb (def_stmt)->loop_father == father)
254 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
255 }
256 }
257
258 /* Must be an uninteresting phi. */
259 return bb_rank[bb->index];
260 }
261
262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
263 loop-carried dependence of an innermost loop, return TRUE; else
264 return FALSE. */
265 static bool
266 loop_carried_phi (tree exp)
267 {
268 gimple phi_stmt;
269 long block_rank;
270
271 if (TREE_CODE (exp) != SSA_NAME
272 || SSA_NAME_IS_DEFAULT_DEF (exp))
273 return false;
274
275 phi_stmt = SSA_NAME_DEF_STMT (exp);
276
277 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
278 return false;
279
280 /* Non-loop-carried phis have block rank. Loop-carried phis have
281 an additional bias added in. If this phi doesn't have block rank,
282 it's biased and should not be propagated. */
283 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
284
285 if (phi_rank (phi_stmt) != block_rank)
286 return true;
287
288 return false;
289 }
290
291 /* Return the maximum of RANK and the rank that should be propagated
292 from expression OP. For most operands, this is just the rank of OP.
293 For loop-carried phis, the value is zero to avoid undoing the bias
294 in favor of the phi. */
295 static long
296 propagate_rank (long rank, tree op)
297 {
298 long op_rank;
299
300 if (loop_carried_phi (op))
301 return rank;
302
303 op_rank = get_rank (op);
304
305 return MAX (rank, op_rank);
306 }
307
308 /* Look up the operand rank structure for expression E. */
309
310 static inline long
311 find_operand_rank (tree e)
312 {
313 void **slot = pointer_map_contains (operand_rank, e);
314 return slot ? (long) (intptr_t) *slot : -1;
315 }
316
317 /* Insert {E,RANK} into the operand rank hashtable. */
318
319 static inline void
320 insert_operand_rank (tree e, long rank)
321 {
322 void **slot;
323 gcc_assert (rank > 0);
324 slot = pointer_map_insert (operand_rank, e);
325 gcc_assert (!*slot);
326 *slot = (void *) (intptr_t) rank;
327 }
328
329 /* Given an expression E, return the rank of the expression. */
330
331 static long
332 get_rank (tree e)
333 {
334 /* Constants have rank 0. */
335 if (is_gimple_min_invariant (e))
336 return 0;
337
338 /* SSA_NAME's have the rank of the expression they are the result
339 of.
340 For globals and uninitialized values, the rank is 0.
341 For function arguments, use the pre-setup rank.
342 For PHI nodes, stores, asm statements, etc, we use the rank of
343 the BB.
344 For simple operations, the rank is the maximum rank of any of
345 its operands, or the bb_rank, whichever is less.
346 I make no claims that this is optimal, however, it gives good
347 results. */
348
349 /* We make an exception to the normal ranking system to break
350 dependences of accumulator variables in loops. Suppose we
351 have a simple one-block loop containing:
352
353 x_1 = phi(x_0, x_2)
354 b = a + x_1
355 c = b + d
356 x_2 = c + e
357
358 As shown, each iteration of the calculation into x is fully
359 dependent upon the iteration before it. We would prefer to
360 see this in the form:
361
362 x_1 = phi(x_0, x_2)
363 b = a + d
364 c = b + e
365 x_2 = c + x_1
366
367 If the loop is unrolled, the calculations of b and c from
368 different iterations can be interleaved.
369
370 To obtain this result during reassociation, we bias the rank
371 of the phi definition x_1 upward, when it is recognized as an
372 accumulator pattern. The artificial rank causes it to be
373 added last, providing the desired independence. */
374
375 if (TREE_CODE (e) == SSA_NAME)
376 {
377 gimple stmt;
378 long rank;
379 int i, n;
380 tree op;
381
382 if (SSA_NAME_IS_DEFAULT_DEF (e))
383 return find_operand_rank (e);
384
385 stmt = SSA_NAME_DEF_STMT (e);
386 if (gimple_code (stmt) == GIMPLE_PHI)
387 return phi_rank (stmt);
388
389 if (!is_gimple_assign (stmt)
390 || gimple_vdef (stmt))
391 return bb_rank[gimple_bb (stmt)->index];
392
393 /* If we already have a rank for this expression, use that. */
394 rank = find_operand_rank (e);
395 if (rank != -1)
396 return rank;
397
398 /* Otherwise, find the maximum rank for the operands. As an
399 exception, remove the bias from loop-carried phis when propagating
400 the rank so that dependent operations are not also biased. */
401 rank = 0;
402 if (gimple_assign_single_p (stmt))
403 {
404 tree rhs = gimple_assign_rhs1 (stmt);
405 n = TREE_OPERAND_LENGTH (rhs);
406 if (n == 0)
407 rank = propagate_rank (rank, rhs);
408 else
409 {
410 for (i = 0; i < n; i++)
411 {
412 op = TREE_OPERAND (rhs, i);
413
414 if (op != NULL_TREE)
415 rank = propagate_rank (rank, op);
416 }
417 }
418 }
419 else
420 {
421 n = gimple_num_ops (stmt);
422 for (i = 1; i < n; i++)
423 {
424 op = gimple_op (stmt, i);
425 gcc_assert (op);
426 rank = propagate_rank (rank, op);
427 }
428 }
429
430 if (dump_file && (dump_flags & TDF_DETAILS))
431 {
432 fprintf (dump_file, "Rank for ");
433 print_generic_expr (dump_file, e, 0);
434 fprintf (dump_file, " is %ld\n", (rank + 1));
435 }
436
437 /* Note the rank in the hashtable so we don't recompute it. */
438 insert_operand_rank (e, (rank + 1));
439 return (rank + 1);
440 }
441
442 /* Globals, etc, are rank 0 */
443 return 0;
444 }
445
446
447 /* We want integer ones to end up last no matter what, since they are
448 the ones we can do the most with. */
449 #define INTEGER_CONST_TYPE 1 << 3
450 #define FLOAT_CONST_TYPE 1 << 2
451 #define OTHER_CONST_TYPE 1 << 1
452
453 /* Classify an invariant tree into integer, float, or other, so that
454 we can sort them to be near other constants of the same type. */
455 static inline int
456 constant_type (tree t)
457 {
458 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
459 return INTEGER_CONST_TYPE;
460 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
461 return FLOAT_CONST_TYPE;
462 else
463 return OTHER_CONST_TYPE;
464 }
465
466 /* qsort comparison function to sort operand entries PA and PB by rank
467 so that the sorted array is ordered by rank in decreasing order. */
468 static int
469 sort_by_operand_rank (const void *pa, const void *pb)
470 {
471 const operand_entry_t oea = *(const operand_entry_t *)pa;
472 const operand_entry_t oeb = *(const operand_entry_t *)pb;
473
474 /* It's nicer for optimize_expression if constants that are likely
475 to fold when added/multiplied//whatever are put next to each
476 other. Since all constants have rank 0, order them by type. */
477 if (oeb->rank == 0 && oea->rank == 0)
478 {
479 if (constant_type (oeb->op) != constant_type (oea->op))
480 return constant_type (oeb->op) - constant_type (oea->op);
481 else
482 /* To make sorting result stable, we use unique IDs to determine
483 order. */
484 return oeb->id - oea->id;
485 }
486
487 /* Lastly, make sure the versions that are the same go next to each
488 other. We use SSA_NAME_VERSION because it's stable. */
489 if ((oeb->rank - oea->rank == 0)
490 && TREE_CODE (oea->op) == SSA_NAME
491 && TREE_CODE (oeb->op) == SSA_NAME)
492 {
493 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
494 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
495 else
496 return oeb->id - oea->id;
497 }
498
499 if (oeb->rank != oea->rank)
500 return oeb->rank - oea->rank;
501 else
502 return oeb->id - oea->id;
503 }
504
505 /* Add an operand entry to *OPS for the tree operand OP. */
506
507 static void
508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
509 {
510 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
511
512 oe->op = op;
513 oe->rank = get_rank (op);
514 oe->id = next_operand_entry_id++;
515 oe->count = 1;
516 ops->safe_push (oe);
517 }
518
519 /* Add an operand entry to *OPS for the tree operand OP with repeat
520 count REPEAT. */
521
522 static void
523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
524 HOST_WIDE_INT repeat)
525 {
526 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
527
528 oe->op = op;
529 oe->rank = get_rank (op);
530 oe->id = next_operand_entry_id++;
531 oe->count = repeat;
532 ops->safe_push (oe);
533
534 reassociate_stats.pows_encountered++;
535 }
536
537 /* Return true if STMT is reassociable operation containing a binary
538 operation with tree code CODE, and is inside LOOP. */
539
540 static bool
541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
542 {
543 basic_block bb = gimple_bb (stmt);
544
545 if (gimple_bb (stmt) == NULL)
546 return false;
547
548 if (!flow_bb_inside_loop_p (loop, bb))
549 return false;
550
551 if (is_gimple_assign (stmt)
552 && gimple_assign_rhs_code (stmt) == code
553 && has_single_use (gimple_assign_lhs (stmt)))
554 return true;
555
556 return false;
557 }
558
559
560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
561 operand of the negate operation. Otherwise, return NULL. */
562
563 static tree
564 get_unary_op (tree name, enum tree_code opcode)
565 {
566 gimple stmt = SSA_NAME_DEF_STMT (name);
567
568 if (!is_gimple_assign (stmt))
569 return NULL_TREE;
570
571 if (gimple_assign_rhs_code (stmt) == opcode)
572 return gimple_assign_rhs1 (stmt);
573 return NULL_TREE;
574 }
575
576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
577 eliminate through equivalences, do so, remove them from OPS, and
578 return true. Otherwise, return false. */
579
580 static bool
581 eliminate_duplicate_pair (enum tree_code opcode,
582 vec<operand_entry_t> *ops,
583 bool *all_done,
584 unsigned int i,
585 operand_entry_t curr,
586 operand_entry_t last)
587 {
588
589 /* If we have two of the same op, and the opcode is & |, min, or max,
590 we can eliminate one of them.
591 If we have two of the same op, and the opcode is ^, we can
592 eliminate both of them. */
593
594 if (last && last->op == curr->op)
595 {
596 switch (opcode)
597 {
598 case MAX_EXPR:
599 case MIN_EXPR:
600 case BIT_IOR_EXPR:
601 case BIT_AND_EXPR:
602 if (dump_file && (dump_flags & TDF_DETAILS))
603 {
604 fprintf (dump_file, "Equivalence: ");
605 print_generic_expr (dump_file, curr->op, 0);
606 fprintf (dump_file, " [&|minmax] ");
607 print_generic_expr (dump_file, last->op, 0);
608 fprintf (dump_file, " -> ");
609 print_generic_stmt (dump_file, last->op, 0);
610 }
611
612 ops->ordered_remove (i);
613 reassociate_stats.ops_eliminated ++;
614
615 return true;
616
617 case BIT_XOR_EXPR:
618 if (dump_file && (dump_flags & TDF_DETAILS))
619 {
620 fprintf (dump_file, "Equivalence: ");
621 print_generic_expr (dump_file, curr->op, 0);
622 fprintf (dump_file, " ^ ");
623 print_generic_expr (dump_file, last->op, 0);
624 fprintf (dump_file, " -> nothing\n");
625 }
626
627 reassociate_stats.ops_eliminated += 2;
628
629 if (ops->length () == 2)
630 {
631 ops->create (0);
632 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
633 *all_done = true;
634 }
635 else
636 {
637 ops->ordered_remove (i-1);
638 ops->ordered_remove (i-1);
639 }
640
641 return true;
642
643 default:
644 break;
645 }
646 }
647 return false;
648 }
649
650 static vec<tree> plus_negates;
651
652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
653 expression, look in OPS for a corresponding positive operation to cancel
654 it out. If we find one, remove the other from OPS, replace
655 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
656 return false. */
657
658 static bool
659 eliminate_plus_minus_pair (enum tree_code opcode,
660 vec<operand_entry_t> *ops,
661 unsigned int currindex,
662 operand_entry_t curr)
663 {
664 tree negateop;
665 tree notop;
666 unsigned int i;
667 operand_entry_t oe;
668
669 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
670 return false;
671
672 negateop = get_unary_op (curr->op, NEGATE_EXPR);
673 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
674 if (negateop == NULL_TREE && notop == NULL_TREE)
675 return false;
676
677 /* Any non-negated version will have a rank that is one less than
678 the current rank. So once we hit those ranks, if we don't find
679 one, we can stop. */
680
681 for (i = currindex + 1;
682 ops->iterate (i, &oe)
683 && oe->rank >= curr->rank - 1 ;
684 i++)
685 {
686 if (oe->op == negateop)
687 {
688
689 if (dump_file && (dump_flags & TDF_DETAILS))
690 {
691 fprintf (dump_file, "Equivalence: ");
692 print_generic_expr (dump_file, negateop, 0);
693 fprintf (dump_file, " + -");
694 print_generic_expr (dump_file, oe->op, 0);
695 fprintf (dump_file, " -> 0\n");
696 }
697
698 ops->ordered_remove (i);
699 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
700 ops->ordered_remove (currindex);
701 reassociate_stats.ops_eliminated ++;
702
703 return true;
704 }
705 else if (oe->op == notop)
706 {
707 tree op_type = TREE_TYPE (oe->op);
708
709 if (dump_file && (dump_flags & TDF_DETAILS))
710 {
711 fprintf (dump_file, "Equivalence: ");
712 print_generic_expr (dump_file, notop, 0);
713 fprintf (dump_file, " + ~");
714 print_generic_expr (dump_file, oe->op, 0);
715 fprintf (dump_file, " -> -1\n");
716 }
717
718 ops->ordered_remove (i);
719 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
720 ops->ordered_remove (currindex);
721 reassociate_stats.ops_eliminated ++;
722
723 return true;
724 }
725 }
726
727 /* CURR->OP is a negate expr in a plus expr: save it for later
728 inspection in repropagate_negates(). */
729 if (negateop != NULL_TREE)
730 plus_negates.safe_push (curr->op);
731
732 return false;
733 }
734
735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
736 bitwise not expression, look in OPS for a corresponding operand to
737 cancel it out. If we find one, remove the other from OPS, replace
738 OPS[CURRINDEX] with 0, and return true. Otherwise, return
739 false. */
740
741 static bool
742 eliminate_not_pairs (enum tree_code opcode,
743 vec<operand_entry_t> *ops,
744 unsigned int currindex,
745 operand_entry_t curr)
746 {
747 tree notop;
748 unsigned int i;
749 operand_entry_t oe;
750
751 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
752 || TREE_CODE (curr->op) != SSA_NAME)
753 return false;
754
755 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
756 if (notop == NULL_TREE)
757 return false;
758
759 /* Any non-not version will have a rank that is one less than
760 the current rank. So once we hit those ranks, if we don't find
761 one, we can stop. */
762
763 for (i = currindex + 1;
764 ops->iterate (i, &oe)
765 && oe->rank >= curr->rank - 1;
766 i++)
767 {
768 if (oe->op == notop)
769 {
770 if (dump_file && (dump_flags & TDF_DETAILS))
771 {
772 fprintf (dump_file, "Equivalence: ");
773 print_generic_expr (dump_file, notop, 0);
774 if (opcode == BIT_AND_EXPR)
775 fprintf (dump_file, " & ~");
776 else if (opcode == BIT_IOR_EXPR)
777 fprintf (dump_file, " | ~");
778 print_generic_expr (dump_file, oe->op, 0);
779 if (opcode == BIT_AND_EXPR)
780 fprintf (dump_file, " -> 0\n");
781 else if (opcode == BIT_IOR_EXPR)
782 fprintf (dump_file, " -> -1\n");
783 }
784
785 if (opcode == BIT_AND_EXPR)
786 oe->op = build_zero_cst (TREE_TYPE (oe->op));
787 else if (opcode == BIT_IOR_EXPR)
788 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
789 TYPE_PRECISION (TREE_TYPE (oe->op)));
790
791 reassociate_stats.ops_eliminated += ops->length () - 1;
792 ops->truncate (0);
793 ops->quick_push (oe);
794 return true;
795 }
796 }
797
798 return false;
799 }
800
801 /* Use constant value that may be present in OPS to try to eliminate
802 operands. Note that this function is only really used when we've
803 eliminated ops for other reasons, or merged constants. Across
804 single statements, fold already does all of this, plus more. There
805 is little point in duplicating logic, so I've only included the
806 identities that I could ever construct testcases to trigger. */
807
808 static void
809 eliminate_using_constants (enum tree_code opcode,
810 vec<operand_entry_t> *ops)
811 {
812 operand_entry_t oelast = ops->last ();
813 tree type = TREE_TYPE (oelast->op);
814
815 if (oelast->rank == 0
816 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
817 {
818 switch (opcode)
819 {
820 case BIT_AND_EXPR:
821 if (integer_zerop (oelast->op))
822 {
823 if (ops->length () != 1)
824 {
825 if (dump_file && (dump_flags & TDF_DETAILS))
826 fprintf (dump_file, "Found & 0, removing all other ops\n");
827
828 reassociate_stats.ops_eliminated += ops->length () - 1;
829
830 ops->truncate (0);
831 ops->quick_push (oelast);
832 return;
833 }
834 }
835 else if (integer_all_onesp (oelast->op))
836 {
837 if (ops->length () != 1)
838 {
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "Found & -1, removing\n");
841 ops->pop ();
842 reassociate_stats.ops_eliminated++;
843 }
844 }
845 break;
846 case BIT_IOR_EXPR:
847 if (integer_all_onesp (oelast->op))
848 {
849 if (ops->length () != 1)
850 {
851 if (dump_file && (dump_flags & TDF_DETAILS))
852 fprintf (dump_file, "Found | -1, removing all other ops\n");
853
854 reassociate_stats.ops_eliminated += ops->length () - 1;
855
856 ops->truncate (0);
857 ops->quick_push (oelast);
858 return;
859 }
860 }
861 else if (integer_zerop (oelast->op))
862 {
863 if (ops->length () != 1)
864 {
865 if (dump_file && (dump_flags & TDF_DETAILS))
866 fprintf (dump_file, "Found | 0, removing\n");
867 ops->pop ();
868 reassociate_stats.ops_eliminated++;
869 }
870 }
871 break;
872 case MULT_EXPR:
873 if (integer_zerop (oelast->op)
874 || (FLOAT_TYPE_P (type)
875 && !HONOR_NANS (TYPE_MODE (type))
876 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
877 && real_zerop (oelast->op)))
878 {
879 if (ops->length () != 1)
880 {
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fprintf (dump_file, "Found * 0, removing all other ops\n");
883
884 reassociate_stats.ops_eliminated += ops->length () - 1;
885 ops->truncate (1);
886 ops->quick_push (oelast);
887 return;
888 }
889 }
890 else if (integer_onep (oelast->op)
891 || (FLOAT_TYPE_P (type)
892 && !HONOR_SNANS (TYPE_MODE (type))
893 && real_onep (oelast->op)))
894 {
895 if (ops->length () != 1)
896 {
897 if (dump_file && (dump_flags & TDF_DETAILS))
898 fprintf (dump_file, "Found * 1, removing\n");
899 ops->pop ();
900 reassociate_stats.ops_eliminated++;
901 return;
902 }
903 }
904 break;
905 case BIT_XOR_EXPR:
906 case PLUS_EXPR:
907 case MINUS_EXPR:
908 if (integer_zerop (oelast->op)
909 || (FLOAT_TYPE_P (type)
910 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
911 && fold_real_zero_addition_p (type, oelast->op,
912 opcode == MINUS_EXPR)))
913 {
914 if (ops->length () != 1)
915 {
916 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "Found [|^+] 0, removing\n");
918 ops->pop ();
919 reassociate_stats.ops_eliminated++;
920 return;
921 }
922 }
923 break;
924 default:
925 break;
926 }
927 }
928 }
929
930
931 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
932 bool, bool);
933
934 /* Structure for tracking and counting operands. */
935 typedef struct oecount_s {
936 int cnt;
937 int id;
938 enum tree_code oecode;
939 tree op;
940 } oecount;
941
942
943 /* The heap for the oecount hashtable and the sorted list of operands. */
944 static vec<oecount> cvec;
945
946 /* Hash function for oecount. */
947
948 static hashval_t
949 oecount_hash (const void *p)
950 {
951 const oecount *c = &cvec[(size_t)p - 42];
952 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
953 }
954
955 /* Comparison function for oecount. */
956
957 static int
958 oecount_eq (const void *p1, const void *p2)
959 {
960 const oecount *c1 = &cvec[(size_t)p1 - 42];
961 const oecount *c2 = &cvec[(size_t)p2 - 42];
962 return (c1->oecode == c2->oecode
963 && c1->op == c2->op);
964 }
965
966 /* Comparison function for qsort sorting oecount elements by count. */
967
968 static int
969 oecount_cmp (const void *p1, const void *p2)
970 {
971 const oecount *c1 = (const oecount *)p1;
972 const oecount *c2 = (const oecount *)p2;
973 if (c1->cnt != c2->cnt)
974 return c1->cnt - c2->cnt;
975 else
976 /* If counts are identical, use unique IDs to stabilize qsort. */
977 return c1->id - c2->id;
978 }
979
980 /* Return TRUE iff STMT represents a builtin call that raises OP
981 to some exponent. */
982
983 static bool
984 stmt_is_power_of_op (gimple stmt, tree op)
985 {
986 tree fndecl;
987
988 if (!is_gimple_call (stmt))
989 return false;
990
991 fndecl = gimple_call_fndecl (stmt);
992
993 if (!fndecl
994 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
995 return false;
996
997 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
998 {
999 CASE_FLT_FN (BUILT_IN_POW):
1000 CASE_FLT_FN (BUILT_IN_POWI):
1001 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1002
1003 default:
1004 return false;
1005 }
1006 }
1007
1008 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1009 in place and return the result. Assumes that stmt_is_power_of_op
1010 was previously called for STMT and returned TRUE. */
1011
1012 static HOST_WIDE_INT
1013 decrement_power (gimple stmt)
1014 {
1015 REAL_VALUE_TYPE c, cint;
1016 HOST_WIDE_INT power;
1017 tree arg1;
1018
1019 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1020 {
1021 CASE_FLT_FN (BUILT_IN_POW):
1022 arg1 = gimple_call_arg (stmt, 1);
1023 c = TREE_REAL_CST (arg1);
1024 power = real_to_integer (&c) - 1;
1025 real_from_integer (&cint, VOIDmode, power, 0, 0);
1026 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1027 return power;
1028
1029 CASE_FLT_FN (BUILT_IN_POWI):
1030 arg1 = gimple_call_arg (stmt, 1);
1031 power = TREE_INT_CST_LOW (arg1) - 1;
1032 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1033 return power;
1034
1035 default:
1036 gcc_unreachable ();
1037 }
1038 }
1039
1040 /* Find the single immediate use of STMT's LHS, and replace it
1041 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1042 replace *DEF with OP as well. */
1043
1044 static void
1045 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1046 {
1047 tree lhs;
1048 gimple use_stmt;
1049 use_operand_p use;
1050 gimple_stmt_iterator gsi;
1051
1052 if (is_gimple_call (stmt))
1053 lhs = gimple_call_lhs (stmt);
1054 else
1055 lhs = gimple_assign_lhs (stmt);
1056
1057 gcc_assert (has_single_use (lhs));
1058 single_imm_use (lhs, &use, &use_stmt);
1059 if (lhs == *def)
1060 *def = op;
1061 SET_USE (use, op);
1062 if (TREE_CODE (op) != SSA_NAME)
1063 update_stmt (use_stmt);
1064 gsi = gsi_for_stmt (stmt);
1065 gsi_remove (&gsi, true);
1066 release_defs (stmt);
1067
1068 if (is_gimple_call (stmt))
1069 unlink_stmt_vdef (stmt);
1070 }
1071
1072 /* Walks the linear chain with result *DEF searching for an operation
1073 with operand OP and code OPCODE removing that from the chain. *DEF
1074 is updated if there is only one operand but no operation left. */
1075
1076 static void
1077 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1078 {
1079 gimple stmt = SSA_NAME_DEF_STMT (*def);
1080
1081 do
1082 {
1083 tree name;
1084
1085 if (opcode == MULT_EXPR
1086 && stmt_is_power_of_op (stmt, op))
1087 {
1088 if (decrement_power (stmt) == 1)
1089 propagate_op_to_single_use (op, stmt, def);
1090 return;
1091 }
1092
1093 name = gimple_assign_rhs1 (stmt);
1094
1095 /* If this is the operation we look for and one of the operands
1096 is ours simply propagate the other operand into the stmts
1097 single use. */
1098 if (gimple_assign_rhs_code (stmt) == opcode
1099 && (name == op
1100 || gimple_assign_rhs2 (stmt) == op))
1101 {
1102 if (name == op)
1103 name = gimple_assign_rhs2 (stmt);
1104 propagate_op_to_single_use (name, stmt, def);
1105 return;
1106 }
1107
1108 /* We might have a multiply of two __builtin_pow* calls, and
1109 the operand might be hiding in the rightmost one. */
1110 if (opcode == MULT_EXPR
1111 && gimple_assign_rhs_code (stmt) == opcode
1112 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1113 {
1114 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1115 if (stmt_is_power_of_op (stmt2, op))
1116 {
1117 if (decrement_power (stmt2) == 1)
1118 propagate_op_to_single_use (op, stmt2, def);
1119 return;
1120 }
1121 }
1122
1123 /* Continue walking the chain. */
1124 gcc_assert (name != op
1125 && TREE_CODE (name) == SSA_NAME);
1126 stmt = SSA_NAME_DEF_STMT (name);
1127 }
1128 while (1);
1129 }
1130
1131 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1132 the result. Places the statement after the definition of either
1133 OP1 or OP2. Returns the new statement. */
1134
1135 static gimple
1136 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1137 {
1138 gimple op1def = NULL, op2def = NULL;
1139 gimple_stmt_iterator gsi;
1140 tree op;
1141 gimple sum;
1142
1143 /* Create the addition statement. */
1144 op = make_ssa_name (type, NULL);
1145 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1146
1147 /* Find an insertion place and insert. */
1148 if (TREE_CODE (op1) == SSA_NAME)
1149 op1def = SSA_NAME_DEF_STMT (op1);
1150 if (TREE_CODE (op2) == SSA_NAME)
1151 op2def = SSA_NAME_DEF_STMT (op2);
1152 if ((!op1def || gimple_nop_p (op1def))
1153 && (!op2def || gimple_nop_p (op2def)))
1154 {
1155 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1156 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1157 }
1158 else if ((!op1def || gimple_nop_p (op1def))
1159 || (op2def && !gimple_nop_p (op2def)
1160 && stmt_dominates_stmt_p (op1def, op2def)))
1161 {
1162 if (gimple_code (op2def) == GIMPLE_PHI)
1163 {
1164 gsi = gsi_after_labels (gimple_bb (op2def));
1165 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1166 }
1167 else
1168 {
1169 if (!stmt_ends_bb_p (op2def))
1170 {
1171 gsi = gsi_for_stmt (op2def);
1172 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1173 }
1174 else
1175 {
1176 edge e;
1177 edge_iterator ei;
1178
1179 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1180 if (e->flags & EDGE_FALLTHRU)
1181 gsi_insert_on_edge_immediate (e, sum);
1182 }
1183 }
1184 }
1185 else
1186 {
1187 if (gimple_code (op1def) == GIMPLE_PHI)
1188 {
1189 gsi = gsi_after_labels (gimple_bb (op1def));
1190 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1191 }
1192 else
1193 {
1194 if (!stmt_ends_bb_p (op1def))
1195 {
1196 gsi = gsi_for_stmt (op1def);
1197 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1198 }
1199 else
1200 {
1201 edge e;
1202 edge_iterator ei;
1203
1204 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1205 if (e->flags & EDGE_FALLTHRU)
1206 gsi_insert_on_edge_immediate (e, sum);
1207 }
1208 }
1209 }
1210 update_stmt (sum);
1211
1212 return sum;
1213 }
1214
1215 /* Perform un-distribution of divisions and multiplications.
1216 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1217 to (A + B) / X for real X.
1218
1219 The algorithm is organized as follows.
1220
1221 - First we walk the addition chain *OPS looking for summands that
1222 are defined by a multiplication or a real division. This results
1223 in the candidates bitmap with relevant indices into *OPS.
1224
1225 - Second we build the chains of multiplications or divisions for
1226 these candidates, counting the number of occurrences of (operand, code)
1227 pairs in all of the candidates chains.
1228
1229 - Third we sort the (operand, code) pairs by number of occurrence and
1230 process them starting with the pair with the most uses.
1231
1232 * For each such pair we walk the candidates again to build a
1233 second candidate bitmap noting all multiplication/division chains
1234 that have at least one occurrence of (operand, code).
1235
1236 * We build an alternate addition chain only covering these
1237 candidates with one (operand, code) operation removed from their
1238 multiplication/division chain.
1239
1240 * The first candidate gets replaced by the alternate addition chain
1241 multiplied/divided by the operand.
1242
1243 * All candidate chains get disabled for further processing and
1244 processing of (operand, code) pairs continues.
1245
1246 The alternate addition chains built are re-processed by the main
1247 reassociation algorithm which allows optimizing a * x * y + b * y * x
1248 to (a + b ) * x * y in one invocation of the reassociation pass. */
1249
1250 static bool
1251 undistribute_ops_list (enum tree_code opcode,
1252 vec<operand_entry_t> *ops, struct loop *loop)
1253 {
1254 unsigned int length = ops->length ();
1255 operand_entry_t oe1;
1256 unsigned i, j;
1257 sbitmap candidates, candidates2;
1258 unsigned nr_candidates, nr_candidates2;
1259 sbitmap_iterator sbi0;
1260 vec<operand_entry_t> *subops;
1261 htab_t ctable;
1262 bool changed = false;
1263 int next_oecount_id = 0;
1264
1265 if (length <= 1
1266 || opcode != PLUS_EXPR)
1267 return false;
1268
1269 /* Build a list of candidates to process. */
1270 candidates = sbitmap_alloc (length);
1271 bitmap_clear (candidates);
1272 nr_candidates = 0;
1273 FOR_EACH_VEC_ELT (*ops, i, oe1)
1274 {
1275 enum tree_code dcode;
1276 gimple oe1def;
1277
1278 if (TREE_CODE (oe1->op) != SSA_NAME)
1279 continue;
1280 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1281 if (!is_gimple_assign (oe1def))
1282 continue;
1283 dcode = gimple_assign_rhs_code (oe1def);
1284 if ((dcode != MULT_EXPR
1285 && dcode != RDIV_EXPR)
1286 || !is_reassociable_op (oe1def, dcode, loop))
1287 continue;
1288
1289 bitmap_set_bit (candidates, i);
1290 nr_candidates++;
1291 }
1292
1293 if (nr_candidates < 2)
1294 {
1295 sbitmap_free (candidates);
1296 return false;
1297 }
1298
1299 if (dump_file && (dump_flags & TDF_DETAILS))
1300 {
1301 fprintf (dump_file, "searching for un-distribute opportunities ");
1302 print_generic_expr (dump_file,
1303 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1304 fprintf (dump_file, " %d\n", nr_candidates);
1305 }
1306
1307 /* Build linearized sub-operand lists and the counting table. */
1308 cvec.create (0);
1309 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1310 /* ??? Macro arguments cannot have multi-argument template types in
1311 them. This typedef is needed to workaround that limitation. */
1312 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1313 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1314 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1315 {
1316 gimple oedef;
1317 enum tree_code oecode;
1318 unsigned j;
1319
1320 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1321 oecode = gimple_assign_rhs_code (oedef);
1322 linearize_expr_tree (&subops[i], oedef,
1323 associative_tree_code (oecode), false);
1324
1325 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1326 {
1327 oecount c;
1328 void **slot;
1329 size_t idx;
1330 c.oecode = oecode;
1331 c.cnt = 1;
1332 c.id = next_oecount_id++;
1333 c.op = oe1->op;
1334 cvec.safe_push (c);
1335 idx = cvec.length () + 41;
1336 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1337 if (!*slot)
1338 {
1339 *slot = (void *)idx;
1340 }
1341 else
1342 {
1343 cvec.pop ();
1344 cvec[(size_t)*slot - 42].cnt++;
1345 }
1346 }
1347 }
1348 htab_delete (ctable);
1349
1350 /* Sort the counting table. */
1351 cvec.qsort (oecount_cmp);
1352
1353 if (dump_file && (dump_flags & TDF_DETAILS))
1354 {
1355 oecount *c;
1356 fprintf (dump_file, "Candidates:\n");
1357 FOR_EACH_VEC_ELT (cvec, j, c)
1358 {
1359 fprintf (dump_file, " %u %s: ", c->cnt,
1360 c->oecode == MULT_EXPR
1361 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1362 print_generic_expr (dump_file, c->op, 0);
1363 fprintf (dump_file, "\n");
1364 }
1365 }
1366
1367 /* Process the (operand, code) pairs in order of most occurence. */
1368 candidates2 = sbitmap_alloc (length);
1369 while (!cvec.is_empty ())
1370 {
1371 oecount *c = &cvec.last ();
1372 if (c->cnt < 2)
1373 break;
1374
1375 /* Now collect the operands in the outer chain that contain
1376 the common operand in their inner chain. */
1377 bitmap_clear (candidates2);
1378 nr_candidates2 = 0;
1379 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1380 {
1381 gimple oedef;
1382 enum tree_code oecode;
1383 unsigned j;
1384 tree op = (*ops)[i]->op;
1385
1386 /* If we undistributed in this chain already this may be
1387 a constant. */
1388 if (TREE_CODE (op) != SSA_NAME)
1389 continue;
1390
1391 oedef = SSA_NAME_DEF_STMT (op);
1392 oecode = gimple_assign_rhs_code (oedef);
1393 if (oecode != c->oecode)
1394 continue;
1395
1396 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1397 {
1398 if (oe1->op == c->op)
1399 {
1400 bitmap_set_bit (candidates2, i);
1401 ++nr_candidates2;
1402 break;
1403 }
1404 }
1405 }
1406
1407 if (nr_candidates2 >= 2)
1408 {
1409 operand_entry_t oe1, oe2;
1410 gimple prod;
1411 int first = bitmap_first_set_bit (candidates2);
1412
1413 /* Build the new addition chain. */
1414 oe1 = (*ops)[first];
1415 if (dump_file && (dump_flags & TDF_DETAILS))
1416 {
1417 fprintf (dump_file, "Building (");
1418 print_generic_expr (dump_file, oe1->op, 0);
1419 }
1420 zero_one_operation (&oe1->op, c->oecode, c->op);
1421 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1422 {
1423 gimple sum;
1424 oe2 = (*ops)[i];
1425 if (dump_file && (dump_flags & TDF_DETAILS))
1426 {
1427 fprintf (dump_file, " + ");
1428 print_generic_expr (dump_file, oe2->op, 0);
1429 }
1430 zero_one_operation (&oe2->op, c->oecode, c->op);
1431 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1432 oe1->op, oe2->op, opcode);
1433 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1434 oe2->rank = 0;
1435 oe1->op = gimple_get_lhs (sum);
1436 }
1437
1438 /* Apply the multiplication/division. */
1439 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1440 oe1->op, c->op, c->oecode);
1441 if (dump_file && (dump_flags & TDF_DETAILS))
1442 {
1443 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1444 print_generic_expr (dump_file, c->op, 0);
1445 fprintf (dump_file, "\n");
1446 }
1447
1448 /* Record it in the addition chain and disable further
1449 undistribution with this op. */
1450 oe1->op = gimple_assign_lhs (prod);
1451 oe1->rank = get_rank (oe1->op);
1452 subops[first].release ();
1453
1454 changed = true;
1455 }
1456
1457 cvec.pop ();
1458 }
1459
1460 for (i = 0; i < ops->length (); ++i)
1461 subops[i].release ();
1462 free (subops);
1463 cvec.release ();
1464 sbitmap_free (candidates);
1465 sbitmap_free (candidates2);
1466
1467 return changed;
1468 }
1469
1470 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1471 expression, examine the other OPS to see if any of them are comparisons
1472 of the same values, which we may be able to combine or eliminate.
1473 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1474
1475 static bool
1476 eliminate_redundant_comparison (enum tree_code opcode,
1477 vec<operand_entry_t> *ops,
1478 unsigned int currindex,
1479 operand_entry_t curr)
1480 {
1481 tree op1, op2;
1482 enum tree_code lcode, rcode;
1483 gimple def1, def2;
1484 int i;
1485 operand_entry_t oe;
1486
1487 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1488 return false;
1489
1490 /* Check that CURR is a comparison. */
1491 if (TREE_CODE (curr->op) != SSA_NAME)
1492 return false;
1493 def1 = SSA_NAME_DEF_STMT (curr->op);
1494 if (!is_gimple_assign (def1))
1495 return false;
1496 lcode = gimple_assign_rhs_code (def1);
1497 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1498 return false;
1499 op1 = gimple_assign_rhs1 (def1);
1500 op2 = gimple_assign_rhs2 (def1);
1501
1502 /* Now look for a similar comparison in the remaining OPS. */
1503 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1504 {
1505 tree t;
1506
1507 if (TREE_CODE (oe->op) != SSA_NAME)
1508 continue;
1509 def2 = SSA_NAME_DEF_STMT (oe->op);
1510 if (!is_gimple_assign (def2))
1511 continue;
1512 rcode = gimple_assign_rhs_code (def2);
1513 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1514 continue;
1515
1516 /* If we got here, we have a match. See if we can combine the
1517 two comparisons. */
1518 if (opcode == BIT_IOR_EXPR)
1519 t = maybe_fold_or_comparisons (lcode, op1, op2,
1520 rcode, gimple_assign_rhs1 (def2),
1521 gimple_assign_rhs2 (def2));
1522 else
1523 t = maybe_fold_and_comparisons (lcode, op1, op2,
1524 rcode, gimple_assign_rhs1 (def2),
1525 gimple_assign_rhs2 (def2));
1526 if (!t)
1527 continue;
1528
1529 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1530 always give us a boolean_type_node value back. If the original
1531 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1532 we need to convert. */
1533 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1534 t = fold_convert (TREE_TYPE (curr->op), t);
1535
1536 if (TREE_CODE (t) != INTEGER_CST
1537 && !operand_equal_p (t, curr->op, 0))
1538 {
1539 enum tree_code subcode;
1540 tree newop1, newop2;
1541 if (!COMPARISON_CLASS_P (t))
1542 continue;
1543 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1544 STRIP_USELESS_TYPE_CONVERSION (newop1);
1545 STRIP_USELESS_TYPE_CONVERSION (newop2);
1546 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1547 continue;
1548 }
1549
1550 if (dump_file && (dump_flags & TDF_DETAILS))
1551 {
1552 fprintf (dump_file, "Equivalence: ");
1553 print_generic_expr (dump_file, curr->op, 0);
1554 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1555 print_generic_expr (dump_file, oe->op, 0);
1556 fprintf (dump_file, " -> ");
1557 print_generic_expr (dump_file, t, 0);
1558 fprintf (dump_file, "\n");
1559 }
1560
1561 /* Now we can delete oe, as it has been subsumed by the new combined
1562 expression t. */
1563 ops->ordered_remove (i);
1564 reassociate_stats.ops_eliminated ++;
1565
1566 /* If t is the same as curr->op, we're done. Otherwise we must
1567 replace curr->op with t. Special case is if we got a constant
1568 back, in which case we add it to the end instead of in place of
1569 the current entry. */
1570 if (TREE_CODE (t) == INTEGER_CST)
1571 {
1572 ops->ordered_remove (currindex);
1573 add_to_ops_vec (ops, t);
1574 }
1575 else if (!operand_equal_p (t, curr->op, 0))
1576 {
1577 gimple sum;
1578 enum tree_code subcode;
1579 tree newop1;
1580 tree newop2;
1581 gcc_assert (COMPARISON_CLASS_P (t));
1582 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1583 STRIP_USELESS_TYPE_CONVERSION (newop1);
1584 STRIP_USELESS_TYPE_CONVERSION (newop2);
1585 gcc_checking_assert (is_gimple_val (newop1)
1586 && is_gimple_val (newop2));
1587 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1588 curr->op = gimple_get_lhs (sum);
1589 }
1590 return true;
1591 }
1592
1593 return false;
1594 }
1595
1596 /* Perform various identities and other optimizations on the list of
1597 operand entries, stored in OPS. The tree code for the binary
1598 operation between all the operands is OPCODE. */
1599
1600 static void
1601 optimize_ops_list (enum tree_code opcode,
1602 vec<operand_entry_t> *ops)
1603 {
1604 unsigned int length = ops->length ();
1605 unsigned int i;
1606 operand_entry_t oe;
1607 operand_entry_t oelast = NULL;
1608 bool iterate = false;
1609
1610 if (length == 1)
1611 return;
1612
1613 oelast = ops->last ();
1614
1615 /* If the last two are constants, pop the constants off, merge them
1616 and try the next two. */
1617 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1618 {
1619 operand_entry_t oelm1 = (*ops)[length - 2];
1620
1621 if (oelm1->rank == 0
1622 && is_gimple_min_invariant (oelm1->op)
1623 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1624 TREE_TYPE (oelast->op)))
1625 {
1626 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1627 oelm1->op, oelast->op);
1628
1629 if (folded && is_gimple_min_invariant (folded))
1630 {
1631 if (dump_file && (dump_flags & TDF_DETAILS))
1632 fprintf (dump_file, "Merging constants\n");
1633
1634 ops->pop ();
1635 ops->pop ();
1636
1637 add_to_ops_vec (ops, folded);
1638 reassociate_stats.constants_eliminated++;
1639
1640 optimize_ops_list (opcode, ops);
1641 return;
1642 }
1643 }
1644 }
1645
1646 eliminate_using_constants (opcode, ops);
1647 oelast = NULL;
1648
1649 for (i = 0; ops->iterate (i, &oe);)
1650 {
1651 bool done = false;
1652
1653 if (eliminate_not_pairs (opcode, ops, i, oe))
1654 return;
1655 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1656 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1657 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1658 {
1659 if (done)
1660 return;
1661 iterate = true;
1662 oelast = NULL;
1663 continue;
1664 }
1665 oelast = oe;
1666 i++;
1667 }
1668
1669 length = ops->length ();
1670 oelast = ops->last ();
1671
1672 if (iterate)
1673 optimize_ops_list (opcode, ops);
1674 }
1675
1676 /* The following functions are subroutines to optimize_range_tests and allow
1677 it to try to change a logical combination of comparisons into a range
1678 test.
1679
1680 For example, both
1681 X == 2 || X == 5 || X == 3 || X == 4
1682 and
1683 X >= 2 && X <= 5
1684 are converted to
1685 (unsigned) (X - 2) <= 3
1686
1687 For more information see comments above fold_test_range in fold-const.c,
1688 this implementation is for GIMPLE. */
1689
1690 struct range_entry
1691 {
1692 tree exp;
1693 tree low;
1694 tree high;
1695 bool in_p;
1696 bool strict_overflow_p;
1697 unsigned int idx, next;
1698 };
1699
1700 /* This is similar to make_range in fold-const.c, but on top of
1701 GIMPLE instead of trees. If EXP is non-NULL, it should be
1702 an SSA_NAME and STMT argument is ignored, otherwise STMT
1703 argument should be a GIMPLE_COND. */
1704
1705 static void
1706 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1707 {
1708 int in_p;
1709 tree low, high;
1710 bool is_bool, strict_overflow_p;
1711
1712 r->exp = NULL_TREE;
1713 r->in_p = false;
1714 r->strict_overflow_p = false;
1715 r->low = NULL_TREE;
1716 r->high = NULL_TREE;
1717 if (exp != NULL_TREE
1718 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1719 return;
1720
1721 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1722 and see if we can refine the range. Some of the cases below may not
1723 happen, but it doesn't seem worth worrying about this. We "continue"
1724 the outer loop when we've changed something; otherwise we "break"
1725 the switch, which will "break" the while. */
1726 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1727 high = low;
1728 in_p = 0;
1729 strict_overflow_p = false;
1730 is_bool = false;
1731 if (exp == NULL_TREE)
1732 is_bool = true;
1733 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1734 {
1735 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1736 is_bool = true;
1737 else
1738 return;
1739 }
1740 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1741 is_bool = true;
1742
1743 while (1)
1744 {
1745 enum tree_code code;
1746 tree arg0, arg1, exp_type;
1747 tree nexp;
1748 location_t loc;
1749
1750 if (exp != NULL_TREE)
1751 {
1752 if (TREE_CODE (exp) != SSA_NAME)
1753 break;
1754
1755 stmt = SSA_NAME_DEF_STMT (exp);
1756 if (!is_gimple_assign (stmt))
1757 break;
1758
1759 code = gimple_assign_rhs_code (stmt);
1760 arg0 = gimple_assign_rhs1 (stmt);
1761 arg1 = gimple_assign_rhs2 (stmt);
1762 exp_type = TREE_TYPE (exp);
1763 }
1764 else
1765 {
1766 code = gimple_cond_code (stmt);
1767 arg0 = gimple_cond_lhs (stmt);
1768 arg1 = gimple_cond_rhs (stmt);
1769 exp_type = boolean_type_node;
1770 }
1771
1772 if (TREE_CODE (arg0) != SSA_NAME)
1773 break;
1774 loc = gimple_location (stmt);
1775 switch (code)
1776 {
1777 case BIT_NOT_EXPR:
1778 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1779 {
1780 in_p = !in_p;
1781 exp = arg0;
1782 continue;
1783 }
1784 break;
1785 case SSA_NAME:
1786 exp = arg0;
1787 continue;
1788 CASE_CONVERT:
1789 if (is_bool)
1790 goto do_default;
1791 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1792 {
1793 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1794 is_bool = true;
1795 else
1796 return;
1797 }
1798 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1799 is_bool = true;
1800 goto do_default;
1801 case EQ_EXPR:
1802 case NE_EXPR:
1803 case LT_EXPR:
1804 case LE_EXPR:
1805 case GE_EXPR:
1806 case GT_EXPR:
1807 is_bool = true;
1808 /* FALLTHRU */
1809 default:
1810 if (!is_bool)
1811 return;
1812 do_default:
1813 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1814 &low, &high, &in_p,
1815 &strict_overflow_p);
1816 if (nexp != NULL_TREE)
1817 {
1818 exp = nexp;
1819 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1820 continue;
1821 }
1822 break;
1823 }
1824 break;
1825 }
1826 if (is_bool)
1827 {
1828 r->exp = exp;
1829 r->in_p = in_p;
1830 r->low = low;
1831 r->high = high;
1832 r->strict_overflow_p = strict_overflow_p;
1833 }
1834 }
1835
1836 /* Comparison function for qsort. Sort entries
1837 without SSA_NAME exp first, then with SSA_NAMEs sorted
1838 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1839 by increasing ->low and if ->low is the same, by increasing
1840 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1841 maximum. */
1842
1843 static int
1844 range_entry_cmp (const void *a, const void *b)
1845 {
1846 const struct range_entry *p = (const struct range_entry *) a;
1847 const struct range_entry *q = (const struct range_entry *) b;
1848
1849 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1850 {
1851 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1852 {
1853 /* Group range_entries for the same SSA_NAME together. */
1854 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1855 return -1;
1856 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1857 return 1;
1858 /* If ->low is different, NULL low goes first, then by
1859 ascending low. */
1860 if (p->low != NULL_TREE)
1861 {
1862 if (q->low != NULL_TREE)
1863 {
1864 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1865 p->low, q->low);
1866 if (tem && integer_onep (tem))
1867 return -1;
1868 tem = fold_binary (GT_EXPR, boolean_type_node,
1869 p->low, q->low);
1870 if (tem && integer_onep (tem))
1871 return 1;
1872 }
1873 else
1874 return 1;
1875 }
1876 else if (q->low != NULL_TREE)
1877 return -1;
1878 /* If ->high is different, NULL high goes last, before that by
1879 ascending high. */
1880 if (p->high != NULL_TREE)
1881 {
1882 if (q->high != NULL_TREE)
1883 {
1884 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1885 p->high, q->high);
1886 if (tem && integer_onep (tem))
1887 return -1;
1888 tem = fold_binary (GT_EXPR, boolean_type_node,
1889 p->high, q->high);
1890 if (tem && integer_onep (tem))
1891 return 1;
1892 }
1893 else
1894 return -1;
1895 }
1896 else if (p->high != NULL_TREE)
1897 return 1;
1898 /* If both ranges are the same, sort below by ascending idx. */
1899 }
1900 else
1901 return 1;
1902 }
1903 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1904 return -1;
1905
1906 if (p->idx < q->idx)
1907 return -1;
1908 else
1909 {
1910 gcc_checking_assert (p->idx > q->idx);
1911 return 1;
1912 }
1913 }
1914
1915 /* Helper routine of optimize_range_test.
1916 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1917 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1918 OPCODE and OPS are arguments of optimize_range_tests. Return
1919 true if the range merge has been successful.
1920 If OPCODE is ERROR_MARK, this is called from within
1921 maybe_optimize_range_tests and is performing inter-bb range optimization.
1922 Changes should be then performed right away, and whether an op is
1923 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1924
1925 static bool
1926 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1927 unsigned int count, enum tree_code opcode,
1928 vec<operand_entry_t> *ops, tree exp, bool in_p,
1929 tree low, tree high, bool strict_overflow_p)
1930 {
1931 operand_entry_t oe = (*ops)[range->idx];
1932 tree op = oe->op;
1933 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1934 location_t loc = gimple_location (stmt);
1935 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1936 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1937 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1938 gimple_stmt_iterator gsi;
1939
1940 if (tem == NULL_TREE)
1941 return false;
1942
1943 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1944 warning_at (loc, OPT_Wstrict_overflow,
1945 "assuming signed overflow does not occur "
1946 "when simplifying range test");
1947
1948 if (dump_file && (dump_flags & TDF_DETAILS))
1949 {
1950 struct range_entry *r;
1951 fprintf (dump_file, "Optimizing range tests ");
1952 print_generic_expr (dump_file, range->exp, 0);
1953 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1954 print_generic_expr (dump_file, range->low, 0);
1955 fprintf (dump_file, ", ");
1956 print_generic_expr (dump_file, range->high, 0);
1957 fprintf (dump_file, "]");
1958 for (r = otherrange; r < otherrange + count; r++)
1959 {
1960 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1961 print_generic_expr (dump_file, r->low, 0);
1962 fprintf (dump_file, ", ");
1963 print_generic_expr (dump_file, r->high, 0);
1964 fprintf (dump_file, "]");
1965 }
1966 fprintf (dump_file, "\n into ");
1967 print_generic_expr (dump_file, tem, 0);
1968 fprintf (dump_file, "\n");
1969 }
1970
1971 if (opcode == BIT_IOR_EXPR
1972 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1973 tem = invert_truthvalue_loc (loc, tem);
1974
1975 tem = fold_convert_loc (loc, optype, tem);
1976 gsi = gsi_for_stmt (stmt);
1977 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1978 GSI_SAME_STMT);
1979
1980 /* If doing inter-bb range test optimization, update the
1981 stmts immediately. Start with changing the first range test
1982 immediate use to the new value (TEM), or, if the first range
1983 test is a GIMPLE_COND stmt, change that condition. */
1984 if (opcode == ERROR_MARK)
1985 {
1986 if (op)
1987 {
1988 imm_use_iterator iter;
1989 use_operand_p use_p;
1990 gimple use_stmt;
1991
1992 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1993 {
1994 if (is_gimple_debug (use_stmt))
1995 continue;
1996 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1997 SET_USE (use_p, tem);
1998 update_stmt (use_stmt);
1999 }
2000 }
2001 else
2002 {
2003 gimple_cond_set_code (stmt, NE_EXPR);
2004 gimple_cond_set_lhs (stmt, tem);
2005 gimple_cond_set_rhs (stmt, boolean_false_node);
2006 update_stmt (stmt);
2007 }
2008 }
2009 oe->op = tem;
2010 range->exp = exp;
2011 range->low = low;
2012 range->high = high;
2013 range->in_p = in_p;
2014 range->strict_overflow_p = false;
2015
2016 for (range = otherrange; range < otherrange + count; range++)
2017 {
2018 oe = (*ops)[range->idx];
2019 /* Now change all the other range test immediate uses, so that
2020 those tests will be optimized away. */
2021 if (opcode == ERROR_MARK)
2022 {
2023 if (oe->op)
2024 {
2025 imm_use_iterator iter;
2026 use_operand_p use_p;
2027 gimple use_stmt;
2028
2029 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2030 {
2031 if (is_gimple_debug (use_stmt))
2032 continue;
2033 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2034 adjust it into _7 = _9;. */
2035 if (is_gimple_assign (use_stmt)
2036 && gimple_assign_rhs_code (use_stmt) == oe->rank)
2037 {
2038 tree expr = NULL_TREE;
2039 if (oe->op == gimple_assign_rhs1 (use_stmt))
2040 expr = gimple_assign_rhs2 (use_stmt);
2041 else if (oe->op == gimple_assign_rhs2 (use_stmt))
2042 expr = gimple_assign_rhs1 (use_stmt);
2043 if (expr
2044 && expr != oe->op
2045 && TREE_CODE (expr) == SSA_NAME)
2046 {
2047 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2048 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2049 expr, NULL_TREE);
2050 update_stmt (use_stmt);
2051 continue;
2052 }
2053 }
2054 /* If imm use of _8 is a statement like _7 = (int) _8;,
2055 adjust it into _7 = 0; or _7 = 1;. */
2056 if (gimple_assign_cast_p (use_stmt)
2057 && oe->op == gimple_assign_rhs1 (use_stmt))
2058 {
2059 tree lhs = gimple_assign_lhs (use_stmt);
2060 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2061 {
2062 gimple_stmt_iterator gsi2
2063 = gsi_for_stmt (use_stmt);
2064 tree expr = build_int_cst (TREE_TYPE (lhs),
2065 oe->rank == BIT_IOR_EXPR
2066 ? 0 : 1);
2067 gimple_assign_set_rhs_with_ops (&gsi2,
2068 INTEGER_CST,
2069 expr, NULL_TREE);
2070 update_stmt (use_stmt);
2071 continue;
2072 }
2073 }
2074 /* Otherwise replace the use with 0 or 1. */
2075 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2076 SET_USE (use_p,
2077 build_int_cst (TREE_TYPE (oe->op),
2078 oe->rank == BIT_IOR_EXPR
2079 ? 0 : 1));
2080 update_stmt (use_stmt);
2081 }
2082 }
2083 else
2084 {
2085 /* If range test was a GIMPLE_COND, simply change it
2086 into an always false or always true condition. */
2087 stmt = last_stmt (BASIC_BLOCK (oe->id));
2088 if (oe->rank == BIT_IOR_EXPR)
2089 gimple_cond_make_false (stmt);
2090 else
2091 gimple_cond_make_true (stmt);
2092 update_stmt (stmt);
2093 }
2094 }
2095 oe->op = error_mark_node;
2096 range->exp = NULL_TREE;
2097 }
2098 return true;
2099 }
2100
2101 /* Optimize range tests, similarly how fold_range_test optimizes
2102 it on trees. The tree code for the binary
2103 operation between all the operands is OPCODE.
2104 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2105 maybe_optimize_range_tests for inter-bb range optimization.
2106 In that case if oe->op is NULL, oe->id is bb->index whose
2107 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2108 the actual opcode. */
2109
2110 static void
2111 optimize_range_tests (enum tree_code opcode,
2112 vec<operand_entry_t> *ops)
2113 {
2114 unsigned int length = ops->length (), i, j, first;
2115 operand_entry_t oe;
2116 struct range_entry *ranges;
2117 bool any_changes = false;
2118
2119 if (length == 1)
2120 return;
2121
2122 ranges = XNEWVEC (struct range_entry, length);
2123 for (i = 0; i < length; i++)
2124 {
2125 oe = (*ops)[i];
2126 ranges[i].idx = i;
2127 init_range_entry (ranges + i, oe->op,
2128 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2129 /* For | invert it now, we will invert it again before emitting
2130 the optimized expression. */
2131 if (opcode == BIT_IOR_EXPR
2132 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2133 ranges[i].in_p = !ranges[i].in_p;
2134 }
2135
2136 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2137 for (i = 0; i < length; i++)
2138 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2139 break;
2140
2141 /* Try to merge ranges. */
2142 for (first = i; i < length; i++)
2143 {
2144 tree low = ranges[i].low;
2145 tree high = ranges[i].high;
2146 int in_p = ranges[i].in_p;
2147 bool strict_overflow_p = ranges[i].strict_overflow_p;
2148 int update_fail_count = 0;
2149
2150 for (j = i + 1; j < length; j++)
2151 {
2152 if (ranges[i].exp != ranges[j].exp)
2153 break;
2154 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2155 ranges[j].in_p, ranges[j].low, ranges[j].high))
2156 break;
2157 strict_overflow_p |= ranges[j].strict_overflow_p;
2158 }
2159
2160 if (j == i + 1)
2161 continue;
2162
2163 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2164 ops, ranges[i].exp, in_p, low, high,
2165 strict_overflow_p))
2166 {
2167 i = j - 1;
2168 any_changes = true;
2169 }
2170 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2171 while update_range_test would fail. */
2172 else if (update_fail_count == 64)
2173 i = j - 1;
2174 else
2175 ++update_fail_count;
2176 }
2177
2178 /* Optimize X == CST1 || X == CST2
2179 if popcount (CST1 ^ CST2) == 1 into
2180 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2181 Similarly for ranges. E.g.
2182 X != 2 && X != 3 && X != 10 && X != 11
2183 will be transformed by the above loop into
2184 (X - 2U) <= 1U && (X - 10U) <= 1U
2185 and this loop can transform that into
2186 ((X & ~8) - 2U) <= 1U. */
2187 for (i = first; i < length; i++)
2188 {
2189 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2190
2191 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2192 continue;
2193 type = TREE_TYPE (ranges[i].exp);
2194 if (!INTEGRAL_TYPE_P (type))
2195 continue;
2196 lowi = ranges[i].low;
2197 if (lowi == NULL_TREE)
2198 lowi = TYPE_MIN_VALUE (type);
2199 highi = ranges[i].high;
2200 if (highi == NULL_TREE)
2201 continue;
2202 for (j = i + 1; j < length && j < i + 64; j++)
2203 {
2204 if (ranges[j].exp == NULL_TREE)
2205 continue;
2206 if (ranges[i].exp != ranges[j].exp)
2207 break;
2208 if (ranges[j].in_p)
2209 continue;
2210 lowj = ranges[j].low;
2211 if (lowj == NULL_TREE)
2212 continue;
2213 highj = ranges[j].high;
2214 if (highj == NULL_TREE)
2215 highj = TYPE_MAX_VALUE (type);
2216 tem = fold_binary (GT_EXPR, boolean_type_node,
2217 lowj, highi);
2218 if (tem == NULL_TREE || !integer_onep (tem))
2219 continue;
2220 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2221 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2222 continue;
2223 gcc_checking_assert (!integer_zerop (lowxor));
2224 tem = fold_binary (MINUS_EXPR, type, lowxor,
2225 build_int_cst (type, 1));
2226 if (tem == NULL_TREE)
2227 continue;
2228 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2229 if (tem == NULL_TREE || !integer_zerop (tem))
2230 continue;
2231 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2232 if (!tree_int_cst_equal (lowxor, highxor))
2233 continue;
2234 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2235 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2236 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2237 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2238 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2239 ranges[i].in_p, lowj, highj,
2240 ranges[i].strict_overflow_p
2241 || ranges[j].strict_overflow_p))
2242 {
2243 any_changes = true;
2244 break;
2245 }
2246 }
2247 }
2248
2249 if (any_changes && opcode != ERROR_MARK)
2250 {
2251 j = 0;
2252 FOR_EACH_VEC_ELT (*ops, i, oe)
2253 {
2254 if (oe->op == error_mark_node)
2255 continue;
2256 else if (i != j)
2257 (*ops)[j] = oe;
2258 j++;
2259 }
2260 ops->truncate (j);
2261 }
2262
2263 XDELETEVEC (ranges);
2264 }
2265
2266 /* Return true if STMT is a cast like:
2267 <bb N>:
2268 ...
2269 _123 = (int) _234;
2270
2271 <bb M>:
2272 # _345 = PHI <_123(N), 1(...), 1(...)>
2273 where _234 has bool type, _123 has single use and
2274 bb N has a single successor M. This is commonly used in
2275 the last block of a range test. */
2276
2277 static bool
2278 final_range_test_p (gimple stmt)
2279 {
2280 basic_block bb, rhs_bb;
2281 edge e;
2282 tree lhs, rhs;
2283 use_operand_p use_p;
2284 gimple use_stmt;
2285
2286 if (!gimple_assign_cast_p (stmt))
2287 return false;
2288 bb = gimple_bb (stmt);
2289 if (!single_succ_p (bb))
2290 return false;
2291 e = single_succ_edge (bb);
2292 if (e->flags & EDGE_COMPLEX)
2293 return false;
2294
2295 lhs = gimple_assign_lhs (stmt);
2296 rhs = gimple_assign_rhs1 (stmt);
2297 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2298 || TREE_CODE (rhs) != SSA_NAME
2299 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2300 return false;
2301
2302 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2303 if (!single_imm_use (lhs, &use_p, &use_stmt))
2304 return false;
2305
2306 if (gimple_code (use_stmt) != GIMPLE_PHI
2307 || gimple_bb (use_stmt) != e->dest)
2308 return false;
2309
2310 /* And that the rhs is defined in the same loop. */
2311 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2312 if (rhs_bb == NULL
2313 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2314 return false;
2315
2316 return true;
2317 }
2318
2319 /* Return true if BB is suitable basic block for inter-bb range test
2320 optimization. If BACKWARD is true, BB should be the only predecessor
2321 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2322 or compared with to find a common basic block to which all conditions
2323 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2324 be the only predecessor of BB. */
2325
2326 static bool
2327 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2328 bool backward)
2329 {
2330 edge_iterator ei, ei2;
2331 edge e, e2;
2332 gimple stmt;
2333 gimple_stmt_iterator gsi;
2334 bool other_edge_seen = false;
2335 bool is_cond;
2336
2337 if (test_bb == bb)
2338 return false;
2339 /* Check last stmt first. */
2340 stmt = last_stmt (bb);
2341 if (stmt == NULL
2342 || (gimple_code (stmt) != GIMPLE_COND
2343 && (backward || !final_range_test_p (stmt)))
2344 || gimple_visited_p (stmt)
2345 || stmt_could_throw_p (stmt)
2346 || *other_bb == bb)
2347 return false;
2348 is_cond = gimple_code (stmt) == GIMPLE_COND;
2349 if (is_cond)
2350 {
2351 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2352 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2353 to *OTHER_BB (if not set yet, try to find it out). */
2354 if (EDGE_COUNT (bb->succs) != 2)
2355 return false;
2356 FOR_EACH_EDGE (e, ei, bb->succs)
2357 {
2358 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2359 return false;
2360 if (e->dest == test_bb)
2361 {
2362 if (backward)
2363 continue;
2364 else
2365 return false;
2366 }
2367 if (e->dest == bb)
2368 return false;
2369 if (*other_bb == NULL)
2370 {
2371 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2372 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2373 return false;
2374 else if (e->dest == e2->dest)
2375 *other_bb = e->dest;
2376 if (*other_bb == NULL)
2377 return false;
2378 }
2379 if (e->dest == *other_bb)
2380 other_edge_seen = true;
2381 else if (backward)
2382 return false;
2383 }
2384 if (*other_bb == NULL || !other_edge_seen)
2385 return false;
2386 }
2387 else if (single_succ (bb) != *other_bb)
2388 return false;
2389
2390 /* Now check all PHIs of *OTHER_BB. */
2391 e = find_edge (bb, *other_bb);
2392 e2 = find_edge (test_bb, *other_bb);
2393 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2394 {
2395 gimple phi = gsi_stmt (gsi);
2396 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2397 corresponding to BB and TEST_BB predecessor must be the same. */
2398 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2399 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2400 {
2401 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2402 one of the PHIs should have the lhs of the last stmt in
2403 that block as PHI arg and that PHI should have 0 or 1
2404 corresponding to it in all other range test basic blocks
2405 considered. */
2406 if (!is_cond)
2407 {
2408 if (gimple_phi_arg_def (phi, e->dest_idx)
2409 == gimple_assign_lhs (stmt)
2410 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2411 || integer_onep (gimple_phi_arg_def (phi,
2412 e2->dest_idx))))
2413 continue;
2414 }
2415 else
2416 {
2417 gimple test_last = last_stmt (test_bb);
2418 if (gimple_code (test_last) != GIMPLE_COND
2419 && gimple_phi_arg_def (phi, e2->dest_idx)
2420 == gimple_assign_lhs (test_last)
2421 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2422 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2423 continue;
2424 }
2425
2426 return false;
2427 }
2428 }
2429 return true;
2430 }
2431
2432 /* Return true if BB doesn't have side-effects that would disallow
2433 range test optimization, all SSA_NAMEs set in the bb are consumed
2434 in the bb and there are no PHIs. */
2435
2436 static bool
2437 no_side_effect_bb (basic_block bb)
2438 {
2439 gimple_stmt_iterator gsi;
2440 gimple last;
2441
2442 if (!gimple_seq_empty_p (phi_nodes (bb)))
2443 return false;
2444 last = last_stmt (bb);
2445 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2446 {
2447 gimple stmt = gsi_stmt (gsi);
2448 tree lhs;
2449 imm_use_iterator imm_iter;
2450 use_operand_p use_p;
2451
2452 if (is_gimple_debug (stmt))
2453 continue;
2454 if (gimple_has_side_effects (stmt))
2455 return false;
2456 if (stmt == last)
2457 return true;
2458 if (!is_gimple_assign (stmt))
2459 return false;
2460 lhs = gimple_assign_lhs (stmt);
2461 if (TREE_CODE (lhs) != SSA_NAME)
2462 return false;
2463 if (gimple_assign_rhs_could_trap_p (stmt))
2464 return false;
2465 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2466 {
2467 gimple use_stmt = USE_STMT (use_p);
2468 if (is_gimple_debug (use_stmt))
2469 continue;
2470 if (gimple_bb (use_stmt) != bb)
2471 return false;
2472 }
2473 }
2474 return false;
2475 }
2476
2477 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2478 return true and fill in *OPS recursively. */
2479
2480 static bool
2481 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2482 struct loop *loop)
2483 {
2484 gimple stmt = SSA_NAME_DEF_STMT (var);
2485 tree rhs[2];
2486 int i;
2487
2488 if (!is_reassociable_op (stmt, code, loop))
2489 return false;
2490
2491 rhs[0] = gimple_assign_rhs1 (stmt);
2492 rhs[1] = gimple_assign_rhs2 (stmt);
2493 gimple_set_visited (stmt, true);
2494 for (i = 0; i < 2; i++)
2495 if (TREE_CODE (rhs[i]) == SSA_NAME
2496 && !get_ops (rhs[i], code, ops, loop)
2497 && has_single_use (rhs[i]))
2498 {
2499 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2500
2501 oe->op = rhs[i];
2502 oe->rank = code;
2503 oe->id = 0;
2504 oe->count = 1;
2505 ops->safe_push (oe);
2506 }
2507 return true;
2508 }
2509
2510 /* Inter-bb range test optimization. */
2511
2512 static void
2513 maybe_optimize_range_tests (gimple stmt)
2514 {
2515 basic_block first_bb = gimple_bb (stmt);
2516 basic_block last_bb = first_bb;
2517 basic_block other_bb = NULL;
2518 basic_block bb;
2519 edge_iterator ei;
2520 edge e;
2521 vec<operand_entry_t> ops = vNULL;
2522
2523 /* Consider only basic blocks that end with GIMPLE_COND or
2524 a cast statement satisfying final_range_test_p. All
2525 but the last bb in the first_bb .. last_bb range
2526 should end with GIMPLE_COND. */
2527 if (gimple_code (stmt) == GIMPLE_COND)
2528 {
2529 if (EDGE_COUNT (first_bb->succs) != 2)
2530 return;
2531 }
2532 else if (final_range_test_p (stmt))
2533 other_bb = single_succ (first_bb);
2534 else
2535 return;
2536
2537 if (stmt_could_throw_p (stmt))
2538 return;
2539
2540 /* As relative ordering of post-dominator sons isn't fixed,
2541 maybe_optimize_range_tests can be called first on any
2542 bb in the range we want to optimize. So, start searching
2543 backwards, if first_bb can be set to a predecessor. */
2544 while (single_pred_p (first_bb))
2545 {
2546 basic_block pred_bb = single_pred (first_bb);
2547 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2548 break;
2549 if (!no_side_effect_bb (first_bb))
2550 break;
2551 first_bb = pred_bb;
2552 }
2553 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2554 Before starting forward search in last_bb successors, find
2555 out the other_bb. */
2556 if (first_bb == last_bb)
2557 {
2558 other_bb = NULL;
2559 /* As non-GIMPLE_COND last stmt always terminates the range,
2560 if forward search didn't discover anything, just give up. */
2561 if (gimple_code (stmt) != GIMPLE_COND)
2562 return;
2563 /* Look at both successors. Either it ends with a GIMPLE_COND
2564 and satisfies suitable_cond_bb, or ends with a cast and
2565 other_bb is that cast's successor. */
2566 FOR_EACH_EDGE (e, ei, first_bb->succs)
2567 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2568 || e->dest == first_bb)
2569 return;
2570 else if (single_pred_p (e->dest))
2571 {
2572 stmt = last_stmt (e->dest);
2573 if (stmt
2574 && gimple_code (stmt) == GIMPLE_COND
2575 && EDGE_COUNT (e->dest->succs) == 2)
2576 {
2577 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2578 break;
2579 else
2580 other_bb = NULL;
2581 }
2582 else if (stmt
2583 && final_range_test_p (stmt)
2584 && find_edge (first_bb, single_succ (e->dest)))
2585 {
2586 other_bb = single_succ (e->dest);
2587 if (other_bb == first_bb)
2588 other_bb = NULL;
2589 }
2590 }
2591 if (other_bb == NULL)
2592 return;
2593 }
2594 /* Now do the forward search, moving last_bb to successor bbs
2595 that aren't other_bb. */
2596 while (EDGE_COUNT (last_bb->succs) == 2)
2597 {
2598 FOR_EACH_EDGE (e, ei, last_bb->succs)
2599 if (e->dest != other_bb)
2600 break;
2601 if (e == NULL)
2602 break;
2603 if (!single_pred_p (e->dest))
2604 break;
2605 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2606 break;
2607 if (!no_side_effect_bb (e->dest))
2608 break;
2609 last_bb = e->dest;
2610 }
2611 if (first_bb == last_bb)
2612 return;
2613 /* Here basic blocks first_bb through last_bb's predecessor
2614 end with GIMPLE_COND, all of them have one of the edges to
2615 other_bb and another to another block in the range,
2616 all blocks except first_bb don't have side-effects and
2617 last_bb ends with either GIMPLE_COND, or cast satisfying
2618 final_range_test_p. */
2619 for (bb = last_bb; ; bb = single_pred (bb))
2620 {
2621 enum tree_code code;
2622 tree lhs, rhs;
2623
2624 e = find_edge (bb, other_bb);
2625 stmt = last_stmt (bb);
2626 gimple_set_visited (stmt, true);
2627 if (gimple_code (stmt) != GIMPLE_COND)
2628 {
2629 use_operand_p use_p;
2630 gimple phi;
2631 edge e2;
2632 unsigned int d;
2633
2634 lhs = gimple_assign_lhs (stmt);
2635 rhs = gimple_assign_rhs1 (stmt);
2636 gcc_assert (bb == last_bb);
2637
2638 /* stmt is
2639 _123 = (int) _234;
2640
2641 followed by:
2642 <bb M>:
2643 # _345 = PHI <_123(N), 1(...), 1(...)>
2644
2645 or 0 instead of 1. If it is 0, the _234
2646 range test is anded together with all the
2647 other range tests, if it is 1, it is ored with
2648 them. */
2649 single_imm_use (lhs, &use_p, &phi);
2650 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2651 e2 = find_edge (first_bb, other_bb);
2652 d = e2->dest_idx;
2653 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2654 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2655 code = BIT_AND_EXPR;
2656 else
2657 {
2658 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2659 code = BIT_IOR_EXPR;
2660 }
2661
2662 /* If _234 SSA_NAME_DEF_STMT is
2663 _234 = _567 | _789;
2664 (or &, corresponding to 1/0 in the phi arguments,
2665 push into ops the individual range test arguments
2666 of the bitwise or resp. and, recursively. */
2667 if (!get_ops (rhs, code, &ops,
2668 loop_containing_stmt (stmt))
2669 && has_single_use (rhs))
2670 {
2671 /* Otherwise, push the _234 range test itself. */
2672 operand_entry_t oe
2673 = (operand_entry_t) pool_alloc (operand_entry_pool);
2674
2675 oe->op = rhs;
2676 oe->rank = code;
2677 oe->id = 0;
2678 oe->count = 1;
2679 ops.safe_push (oe);
2680 }
2681 continue;
2682 }
2683 /* Otherwise stmt is GIMPLE_COND. */
2684 code = gimple_cond_code (stmt);
2685 lhs = gimple_cond_lhs (stmt);
2686 rhs = gimple_cond_rhs (stmt);
2687 if (TREE_CODE (lhs) == SSA_NAME
2688 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2689 && ((code != EQ_EXPR && code != NE_EXPR)
2690 || rhs != boolean_false_node
2691 /* Either push into ops the individual bitwise
2692 or resp. and operands, depending on which
2693 edge is other_bb. */
2694 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2695 ^ (code == EQ_EXPR))
2696 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2697 loop_containing_stmt (stmt))))
2698 {
2699 /* Or push the GIMPLE_COND stmt itself. */
2700 operand_entry_t oe
2701 = (operand_entry_t) pool_alloc (operand_entry_pool);
2702
2703 oe->op = NULL;
2704 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2705 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2706 /* oe->op = NULL signs that there is no SSA_NAME
2707 for the range test, and oe->id instead is the
2708 basic block number, at which's end the GIMPLE_COND
2709 is. */
2710 oe->id = bb->index;
2711 oe->count = 1;
2712 ops.safe_push (oe);
2713 }
2714 if (bb == first_bb)
2715 break;
2716 }
2717 if (ops.length () > 1)
2718 optimize_range_tests (ERROR_MARK, &ops);
2719 ops.release ();
2720 }
2721
2722 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2723 of STMT in it's operands. This is also known as a "destructive
2724 update" operation. */
2725
2726 static bool
2727 is_phi_for_stmt (gimple stmt, tree operand)
2728 {
2729 gimple def_stmt;
2730 tree lhs;
2731 use_operand_p arg_p;
2732 ssa_op_iter i;
2733
2734 if (TREE_CODE (operand) != SSA_NAME)
2735 return false;
2736
2737 lhs = gimple_assign_lhs (stmt);
2738
2739 def_stmt = SSA_NAME_DEF_STMT (operand);
2740 if (gimple_code (def_stmt) != GIMPLE_PHI)
2741 return false;
2742
2743 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2744 if (lhs == USE_FROM_PTR (arg_p))
2745 return true;
2746 return false;
2747 }
2748
2749 /* Remove def stmt of VAR if VAR has zero uses and recurse
2750 on rhs1 operand if so. */
2751
2752 static void
2753 remove_visited_stmt_chain (tree var)
2754 {
2755 gimple stmt;
2756 gimple_stmt_iterator gsi;
2757
2758 while (1)
2759 {
2760 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2761 return;
2762 stmt = SSA_NAME_DEF_STMT (var);
2763 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2764 {
2765 var = gimple_assign_rhs1 (stmt);
2766 gsi = gsi_for_stmt (stmt);
2767 gsi_remove (&gsi, true);
2768 release_defs (stmt);
2769 }
2770 else
2771 return;
2772 }
2773 }
2774
2775 /* This function checks three consequtive operands in
2776 passed operands vector OPS starting from OPINDEX and
2777 swaps two operands if it is profitable for binary operation
2778 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2779
2780 We pair ops with the same rank if possible.
2781
2782 The alternative we try is to see if STMT is a destructive
2783 update style statement, which is like:
2784 b = phi (a, ...)
2785 a = c + b;
2786 In that case, we want to use the destructive update form to
2787 expose the possible vectorizer sum reduction opportunity.
2788 In that case, the third operand will be the phi node. This
2789 check is not performed if STMT is null.
2790
2791 We could, of course, try to be better as noted above, and do a
2792 lot of work to try to find these opportunities in >3 operand
2793 cases, but it is unlikely to be worth it. */
2794
2795 static void
2796 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2797 unsigned int opindex, gimple stmt)
2798 {
2799 operand_entry_t oe1, oe2, oe3;
2800
2801 oe1 = ops[opindex];
2802 oe2 = ops[opindex + 1];
2803 oe3 = ops[opindex + 2];
2804
2805 if ((oe1->rank == oe2->rank
2806 && oe2->rank != oe3->rank)
2807 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2808 && !is_phi_for_stmt (stmt, oe1->op)
2809 && !is_phi_for_stmt (stmt, oe2->op)))
2810 {
2811 struct operand_entry temp = *oe3;
2812 oe3->op = oe1->op;
2813 oe3->rank = oe1->rank;
2814 oe1->op = temp.op;
2815 oe1->rank= temp.rank;
2816 }
2817 else if ((oe1->rank == oe3->rank
2818 && oe2->rank != oe3->rank)
2819 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2820 && !is_phi_for_stmt (stmt, oe1->op)
2821 && !is_phi_for_stmt (stmt, oe3->op)))
2822 {
2823 struct operand_entry temp = *oe2;
2824 oe2->op = oe1->op;
2825 oe2->rank = oe1->rank;
2826 oe1->op = temp.op;
2827 oe1->rank= temp.rank;
2828 }
2829 }
2830
2831 /* Recursively rewrite our linearized statements so that the operators
2832 match those in OPS[OPINDEX], putting the computation in rank
2833 order. */
2834
2835 static void
2836 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2837 vec<operand_entry_t> ops, bool moved)
2838 {
2839 tree rhs1 = gimple_assign_rhs1 (stmt);
2840 tree rhs2 = gimple_assign_rhs2 (stmt);
2841 operand_entry_t oe;
2842
2843 /* If we have three operands left, then we want to make sure the ones
2844 that get the double binary op are chosen wisely. */
2845 if (opindex + 3 == ops.length ())
2846 swap_ops_for_binary_stmt (ops, opindex, stmt);
2847
2848 /* The final recursion case for this function is that you have
2849 exactly two operations left.
2850 If we had one exactly one op in the entire list to start with, we
2851 would have never called this function, and the tail recursion
2852 rewrites them one at a time. */
2853 if (opindex + 2 == ops.length ())
2854 {
2855 operand_entry_t oe1, oe2;
2856
2857 oe1 = ops[opindex];
2858 oe2 = ops[opindex + 1];
2859
2860 if (rhs1 != oe1->op || rhs2 != oe2->op)
2861 {
2862 if (dump_file && (dump_flags & TDF_DETAILS))
2863 {
2864 fprintf (dump_file, "Transforming ");
2865 print_gimple_stmt (dump_file, stmt, 0, 0);
2866 }
2867
2868 gimple_assign_set_rhs1 (stmt, oe1->op);
2869 gimple_assign_set_rhs2 (stmt, oe2->op);
2870 update_stmt (stmt);
2871 if (rhs1 != oe1->op && rhs1 != oe2->op)
2872 remove_visited_stmt_chain (rhs1);
2873
2874 if (dump_file && (dump_flags & TDF_DETAILS))
2875 {
2876 fprintf (dump_file, " into ");
2877 print_gimple_stmt (dump_file, stmt, 0, 0);
2878 }
2879 }
2880 return;
2881 }
2882
2883 /* If we hit here, we should have 3 or more ops left. */
2884 gcc_assert (opindex + 2 < ops.length ());
2885
2886 /* Rewrite the next operator. */
2887 oe = ops[opindex];
2888
2889 if (oe->op != rhs2)
2890 {
2891 if (!moved)
2892 {
2893 gimple_stmt_iterator gsinow, gsirhs1;
2894 gimple stmt1 = stmt, stmt2;
2895 unsigned int count;
2896
2897 gsinow = gsi_for_stmt (stmt);
2898 count = ops.length () - opindex - 2;
2899 while (count-- != 0)
2900 {
2901 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2902 gsirhs1 = gsi_for_stmt (stmt2);
2903 gsi_move_before (&gsirhs1, &gsinow);
2904 gsi_prev (&gsinow);
2905 stmt1 = stmt2;
2906 }
2907 moved = true;
2908 }
2909
2910 if (dump_file && (dump_flags & TDF_DETAILS))
2911 {
2912 fprintf (dump_file, "Transforming ");
2913 print_gimple_stmt (dump_file, stmt, 0, 0);
2914 }
2915
2916 gimple_assign_set_rhs2 (stmt, oe->op);
2917 update_stmt (stmt);
2918
2919 if (dump_file && (dump_flags & TDF_DETAILS))
2920 {
2921 fprintf (dump_file, " into ");
2922 print_gimple_stmt (dump_file, stmt, 0, 0);
2923 }
2924 }
2925 /* Recurse on the LHS of the binary operator, which is guaranteed to
2926 be the non-leaf side. */
2927 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2928 }
2929
2930 /* Find out how many cycles we need to compute statements chain.
2931 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2932 maximum number of independent statements we may execute per cycle. */
2933
2934 static int
2935 get_required_cycles (int ops_num, int cpu_width)
2936 {
2937 int res;
2938 int elog;
2939 unsigned int rest;
2940
2941 /* While we have more than 2 * cpu_width operands
2942 we may reduce number of operands by cpu_width
2943 per cycle. */
2944 res = ops_num / (2 * cpu_width);
2945
2946 /* Remained operands count may be reduced twice per cycle
2947 until we have only one operand. */
2948 rest = (unsigned)(ops_num - res * cpu_width);
2949 elog = exact_log2 (rest);
2950 if (elog >= 0)
2951 res += elog;
2952 else
2953 res += floor_log2 (rest) + 1;
2954
2955 return res;
2956 }
2957
2958 /* Returns an optimal number of registers to use for computation of
2959 given statements. */
2960
2961 static int
2962 get_reassociation_width (int ops_num, enum tree_code opc,
2963 enum machine_mode mode)
2964 {
2965 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2966 int width;
2967 int width_min;
2968 int cycles_best;
2969
2970 if (param_width > 0)
2971 width = param_width;
2972 else
2973 width = targetm.sched.reassociation_width (opc, mode);
2974
2975 if (width == 1)
2976 return width;
2977
2978 /* Get the minimal time required for sequence computation. */
2979 cycles_best = get_required_cycles (ops_num, width);
2980
2981 /* Check if we may use less width and still compute sequence for
2982 the same time. It will allow us to reduce registers usage.
2983 get_required_cycles is monotonically increasing with lower width
2984 so we can perform a binary search for the minimal width that still
2985 results in the optimal cycle count. */
2986 width_min = 1;
2987 while (width > width_min)
2988 {
2989 int width_mid = (width + width_min) / 2;
2990
2991 if (get_required_cycles (ops_num, width_mid) == cycles_best)
2992 width = width_mid;
2993 else if (width_min < width_mid)
2994 width_min = width_mid;
2995 else
2996 break;
2997 }
2998
2999 return width;
3000 }
3001
3002 /* Recursively rewrite our linearized statements so that the operators
3003 match those in OPS[OPINDEX], putting the computation in rank
3004 order and trying to allow operations to be executed in
3005 parallel. */
3006
3007 static void
3008 rewrite_expr_tree_parallel (gimple stmt, int width,
3009 vec<operand_entry_t> ops)
3010 {
3011 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3012 int op_num = ops.length ();
3013 int stmt_num = op_num - 1;
3014 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3015 int op_index = op_num - 1;
3016 int stmt_index = 0;
3017 int ready_stmts_end = 0;
3018 int i = 0;
3019 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3020
3021 /* We start expression rewriting from the top statements.
3022 So, in this loop we create a full list of statements
3023 we will work with. */
3024 stmts[stmt_num - 1] = stmt;
3025 for (i = stmt_num - 2; i >= 0; i--)
3026 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3027
3028 for (i = 0; i < stmt_num; i++)
3029 {
3030 tree op1, op2;
3031
3032 /* Determine whether we should use results of
3033 already handled statements or not. */
3034 if (ready_stmts_end == 0
3035 && (i - stmt_index >= width || op_index < 1))
3036 ready_stmts_end = i;
3037
3038 /* Now we choose operands for the next statement. Non zero
3039 value in ready_stmts_end means here that we should use
3040 the result of already generated statements as new operand. */
3041 if (ready_stmts_end > 0)
3042 {
3043 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3044 if (ready_stmts_end > stmt_index)
3045 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3046 else if (op_index >= 0)
3047 op2 = ops[op_index--]->op;
3048 else
3049 {
3050 gcc_assert (stmt_index < i);
3051 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3052 }
3053
3054 if (stmt_index >= ready_stmts_end)
3055 ready_stmts_end = 0;
3056 }
3057 else
3058 {
3059 if (op_index > 1)
3060 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3061 op2 = ops[op_index--]->op;
3062 op1 = ops[op_index--]->op;
3063 }
3064
3065 /* If we emit the last statement then we should put
3066 operands into the last statement. It will also
3067 break the loop. */
3068 if (op_index < 0 && stmt_index == i)
3069 i = stmt_num - 1;
3070
3071 if (dump_file && (dump_flags & TDF_DETAILS))
3072 {
3073 fprintf (dump_file, "Transforming ");
3074 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3075 }
3076
3077 /* We keep original statement only for the last one. All
3078 others are recreated. */
3079 if (i == stmt_num - 1)
3080 {
3081 gimple_assign_set_rhs1 (stmts[i], op1);
3082 gimple_assign_set_rhs2 (stmts[i], op2);
3083 update_stmt (stmts[i]);
3084 }
3085 else
3086 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3087
3088 if (dump_file && (dump_flags & TDF_DETAILS))
3089 {
3090 fprintf (dump_file, " into ");
3091 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3092 }
3093 }
3094
3095 remove_visited_stmt_chain (last_rhs1);
3096 }
3097
3098 /* Transform STMT, which is really (A +B) + (C + D) into the left
3099 linear form, ((A+B)+C)+D.
3100 Recurse on D if necessary. */
3101
3102 static void
3103 linearize_expr (gimple stmt)
3104 {
3105 gimple_stmt_iterator gsinow, gsirhs;
3106 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3107 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3108 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3109 gimple newbinrhs = NULL;
3110 struct loop *loop = loop_containing_stmt (stmt);
3111
3112 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3113 && is_reassociable_op (binrhs, rhscode, loop));
3114
3115 gsinow = gsi_for_stmt (stmt);
3116 gsirhs = gsi_for_stmt (binrhs);
3117 gsi_move_before (&gsirhs, &gsinow);
3118
3119 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3120 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3121 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3122
3123 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3124 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3125
3126 if (dump_file && (dump_flags & TDF_DETAILS))
3127 {
3128 fprintf (dump_file, "Linearized: ");
3129 print_gimple_stmt (dump_file, stmt, 0, 0);
3130 }
3131
3132 reassociate_stats.linearized++;
3133 update_stmt (binrhs);
3134 update_stmt (binlhs);
3135 update_stmt (stmt);
3136
3137 gimple_set_visited (stmt, true);
3138 gimple_set_visited (binlhs, true);
3139 gimple_set_visited (binrhs, true);
3140
3141 /* Tail recurse on the new rhs if it still needs reassociation. */
3142 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3143 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3144 want to change the algorithm while converting to tuples. */
3145 linearize_expr (stmt);
3146 }
3147
3148 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3149 it. Otherwise, return NULL. */
3150
3151 static gimple
3152 get_single_immediate_use (tree lhs)
3153 {
3154 use_operand_p immuse;
3155 gimple immusestmt;
3156
3157 if (TREE_CODE (lhs) == SSA_NAME
3158 && single_imm_use (lhs, &immuse, &immusestmt)
3159 && is_gimple_assign (immusestmt))
3160 return immusestmt;
3161
3162 return NULL;
3163 }
3164
3165 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3166 representing the negated value. Insertions of any necessary
3167 instructions go before GSI.
3168 This function is recursive in that, if you hand it "a_5" as the
3169 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3170 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3171
3172 static tree
3173 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3174 {
3175 gimple negatedefstmt= NULL;
3176 tree resultofnegate;
3177
3178 /* If we are trying to negate a name, defined by an add, negate the
3179 add operands instead. */
3180 if (TREE_CODE (tonegate) == SSA_NAME)
3181 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3182 if (TREE_CODE (tonegate) == SSA_NAME
3183 && is_gimple_assign (negatedefstmt)
3184 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3185 && has_single_use (gimple_assign_lhs (negatedefstmt))
3186 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3187 {
3188 gimple_stmt_iterator gsi;
3189 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3190 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3191
3192 gsi = gsi_for_stmt (negatedefstmt);
3193 rhs1 = negate_value (rhs1, &gsi);
3194 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3195
3196 gsi = gsi_for_stmt (negatedefstmt);
3197 rhs2 = negate_value (rhs2, &gsi);
3198 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3199
3200 update_stmt (negatedefstmt);
3201 return gimple_assign_lhs (negatedefstmt);
3202 }
3203
3204 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3205 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3206 NULL_TREE, true, GSI_SAME_STMT);
3207 return resultofnegate;
3208 }
3209
3210 /* Return true if we should break up the subtract in STMT into an add
3211 with negate. This is true when we the subtract operands are really
3212 adds, or the subtract itself is used in an add expression. In
3213 either case, breaking up the subtract into an add with negate
3214 exposes the adds to reassociation. */
3215
3216 static bool
3217 should_break_up_subtract (gimple stmt)
3218 {
3219 tree lhs = gimple_assign_lhs (stmt);
3220 tree binlhs = gimple_assign_rhs1 (stmt);
3221 tree binrhs = gimple_assign_rhs2 (stmt);
3222 gimple immusestmt;
3223 struct loop *loop = loop_containing_stmt (stmt);
3224
3225 if (TREE_CODE (binlhs) == SSA_NAME
3226 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3227 return true;
3228
3229 if (TREE_CODE (binrhs) == SSA_NAME
3230 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3231 return true;
3232
3233 if (TREE_CODE (lhs) == SSA_NAME
3234 && (immusestmt = get_single_immediate_use (lhs))
3235 && is_gimple_assign (immusestmt)
3236 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3237 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3238 return true;
3239 return false;
3240 }
3241
3242 /* Transform STMT from A - B into A + -B. */
3243
3244 static void
3245 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3246 {
3247 tree rhs1 = gimple_assign_rhs1 (stmt);
3248 tree rhs2 = gimple_assign_rhs2 (stmt);
3249
3250 if (dump_file && (dump_flags & TDF_DETAILS))
3251 {
3252 fprintf (dump_file, "Breaking up subtract ");
3253 print_gimple_stmt (dump_file, stmt, 0, 0);
3254 }
3255
3256 rhs2 = negate_value (rhs2, gsip);
3257 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3258 update_stmt (stmt);
3259 }
3260
3261 /* Determine whether STMT is a builtin call that raises an SSA name
3262 to an integer power and has only one use. If so, and this is early
3263 reassociation and unsafe math optimizations are permitted, place
3264 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3265 If any of these conditions does not hold, return FALSE. */
3266
3267 static bool
3268 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3269 {
3270 tree fndecl, arg1;
3271 REAL_VALUE_TYPE c, cint;
3272
3273 if (!first_pass_instance
3274 || !flag_unsafe_math_optimizations
3275 || !is_gimple_call (stmt)
3276 || !has_single_use (gimple_call_lhs (stmt)))
3277 return false;
3278
3279 fndecl = gimple_call_fndecl (stmt);
3280
3281 if (!fndecl
3282 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3283 return false;
3284
3285 switch (DECL_FUNCTION_CODE (fndecl))
3286 {
3287 CASE_FLT_FN (BUILT_IN_POW):
3288 *base = gimple_call_arg (stmt, 0);
3289 arg1 = gimple_call_arg (stmt, 1);
3290
3291 if (TREE_CODE (arg1) != REAL_CST)
3292 return false;
3293
3294 c = TREE_REAL_CST (arg1);
3295
3296 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3297 return false;
3298
3299 *exponent = real_to_integer (&c);
3300 real_from_integer (&cint, VOIDmode, *exponent,
3301 *exponent < 0 ? -1 : 0, 0);
3302 if (!real_identical (&c, &cint))
3303 return false;
3304
3305 break;
3306
3307 CASE_FLT_FN (BUILT_IN_POWI):
3308 *base = gimple_call_arg (stmt, 0);
3309 arg1 = gimple_call_arg (stmt, 1);
3310
3311 if (!host_integerp (arg1, 0))
3312 return false;
3313
3314 *exponent = TREE_INT_CST_LOW (arg1);
3315 break;
3316
3317 default:
3318 return false;
3319 }
3320
3321 /* Expanding negative exponents is generally unproductive, so we don't
3322 complicate matters with those. Exponents of zero and one should
3323 have been handled by expression folding. */
3324 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3325 return false;
3326
3327 return true;
3328 }
3329
3330 /* Recursively linearize a binary expression that is the RHS of STMT.
3331 Place the operands of the expression tree in the vector named OPS. */
3332
3333 static void
3334 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3335 bool is_associative, bool set_visited)
3336 {
3337 tree binlhs = gimple_assign_rhs1 (stmt);
3338 tree binrhs = gimple_assign_rhs2 (stmt);
3339 gimple binlhsdef = NULL, binrhsdef = NULL;
3340 bool binlhsisreassoc = false;
3341 bool binrhsisreassoc = false;
3342 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3343 struct loop *loop = loop_containing_stmt (stmt);
3344 tree base = NULL_TREE;
3345 HOST_WIDE_INT exponent = 0;
3346
3347 if (set_visited)
3348 gimple_set_visited (stmt, true);
3349
3350 if (TREE_CODE (binlhs) == SSA_NAME)
3351 {
3352 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3353 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3354 && !stmt_could_throw_p (binlhsdef));
3355 }
3356
3357 if (TREE_CODE (binrhs) == SSA_NAME)
3358 {
3359 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3360 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3361 && !stmt_could_throw_p (binrhsdef));
3362 }
3363
3364 /* If the LHS is not reassociable, but the RHS is, we need to swap
3365 them. If neither is reassociable, there is nothing we can do, so
3366 just put them in the ops vector. If the LHS is reassociable,
3367 linearize it. If both are reassociable, then linearize the RHS
3368 and the LHS. */
3369
3370 if (!binlhsisreassoc)
3371 {
3372 tree temp;
3373
3374 /* If this is not a associative operation like division, give up. */
3375 if (!is_associative)
3376 {
3377 add_to_ops_vec (ops, binrhs);
3378 return;
3379 }
3380
3381 if (!binrhsisreassoc)
3382 {
3383 if (rhscode == MULT_EXPR
3384 && TREE_CODE (binrhs) == SSA_NAME
3385 && acceptable_pow_call (binrhsdef, &base, &exponent))
3386 {
3387 add_repeat_to_ops_vec (ops, base, exponent);
3388 gimple_set_visited (binrhsdef, true);
3389 }
3390 else
3391 add_to_ops_vec (ops, binrhs);
3392
3393 if (rhscode == MULT_EXPR
3394 && TREE_CODE (binlhs) == SSA_NAME
3395 && acceptable_pow_call (binlhsdef, &base, &exponent))
3396 {
3397 add_repeat_to_ops_vec (ops, base, exponent);
3398 gimple_set_visited (binlhsdef, true);
3399 }
3400 else
3401 add_to_ops_vec (ops, binlhs);
3402
3403 return;
3404 }
3405
3406 if (dump_file && (dump_flags & TDF_DETAILS))
3407 {
3408 fprintf (dump_file, "swapping operands of ");
3409 print_gimple_stmt (dump_file, stmt, 0, 0);
3410 }
3411
3412 swap_tree_operands (stmt,
3413 gimple_assign_rhs1_ptr (stmt),
3414 gimple_assign_rhs2_ptr (stmt));
3415 update_stmt (stmt);
3416
3417 if (dump_file && (dump_flags & TDF_DETAILS))
3418 {
3419 fprintf (dump_file, " is now ");
3420 print_gimple_stmt (dump_file, stmt, 0, 0);
3421 }
3422
3423 /* We want to make it so the lhs is always the reassociative op,
3424 so swap. */
3425 temp = binlhs;
3426 binlhs = binrhs;
3427 binrhs = temp;
3428 }
3429 else if (binrhsisreassoc)
3430 {
3431 linearize_expr (stmt);
3432 binlhs = gimple_assign_rhs1 (stmt);
3433 binrhs = gimple_assign_rhs2 (stmt);
3434 }
3435
3436 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3437 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3438 rhscode, loop));
3439 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3440 is_associative, set_visited);
3441
3442 if (rhscode == MULT_EXPR
3443 && TREE_CODE (binrhs) == SSA_NAME
3444 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3445 {
3446 add_repeat_to_ops_vec (ops, base, exponent);
3447 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3448 }
3449 else
3450 add_to_ops_vec (ops, binrhs);
3451 }
3452
3453 /* Repropagate the negates back into subtracts, since no other pass
3454 currently does it. */
3455
3456 static void
3457 repropagate_negates (void)
3458 {
3459 unsigned int i = 0;
3460 tree negate;
3461
3462 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3463 {
3464 gimple user = get_single_immediate_use (negate);
3465
3466 if (!user || !is_gimple_assign (user))
3467 continue;
3468
3469 /* The negate operand can be either operand of a PLUS_EXPR
3470 (it can be the LHS if the RHS is a constant for example).
3471
3472 Force the negate operand to the RHS of the PLUS_EXPR, then
3473 transform the PLUS_EXPR into a MINUS_EXPR. */
3474 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3475 {
3476 /* If the negated operand appears on the LHS of the
3477 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3478 to force the negated operand to the RHS of the PLUS_EXPR. */
3479 if (gimple_assign_rhs1 (user) == negate)
3480 {
3481 swap_tree_operands (user,
3482 gimple_assign_rhs1_ptr (user),
3483 gimple_assign_rhs2_ptr (user));
3484 }
3485
3486 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3487 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3488 if (gimple_assign_rhs2 (user) == negate)
3489 {
3490 tree rhs1 = gimple_assign_rhs1 (user);
3491 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3492 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3493 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3494 update_stmt (user);
3495 }
3496 }
3497 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3498 {
3499 if (gimple_assign_rhs1 (user) == negate)
3500 {
3501 /* We have
3502 x = -a
3503 y = x - b
3504 which we transform into
3505 x = a + b
3506 y = -x .
3507 This pushes down the negate which we possibly can merge
3508 into some other operation, hence insert it into the
3509 plus_negates vector. */
3510 gimple feed = SSA_NAME_DEF_STMT (negate);
3511 tree a = gimple_assign_rhs1 (feed);
3512 tree rhs2 = gimple_assign_rhs2 (user);
3513 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3514 gimple_replace_lhs (feed, negate);
3515 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3516 update_stmt (gsi_stmt (gsi));
3517 gsi2 = gsi_for_stmt (user);
3518 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3519 update_stmt (gsi_stmt (gsi2));
3520 gsi_move_before (&gsi, &gsi2);
3521 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3522 }
3523 else
3524 {
3525 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3526 rid of one operation. */
3527 gimple feed = SSA_NAME_DEF_STMT (negate);
3528 tree a = gimple_assign_rhs1 (feed);
3529 tree rhs1 = gimple_assign_rhs1 (user);
3530 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3531 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3532 update_stmt (gsi_stmt (gsi));
3533 }
3534 }
3535 }
3536 }
3537
3538 /* Returns true if OP is of a type for which we can do reassociation.
3539 That is for integral or non-saturating fixed-point types, and for
3540 floating point type when associative-math is enabled. */
3541
3542 static bool
3543 can_reassociate_p (tree op)
3544 {
3545 tree type = TREE_TYPE (op);
3546 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3547 || NON_SAT_FIXED_POINT_TYPE_P (type)
3548 || (flag_associative_math && FLOAT_TYPE_P (type)))
3549 return true;
3550 return false;
3551 }
3552
3553 /* Break up subtract operations in block BB.
3554
3555 We do this top down because we don't know whether the subtract is
3556 part of a possible chain of reassociation except at the top.
3557
3558 IE given
3559 d = f + g
3560 c = a + e
3561 b = c - d
3562 q = b - r
3563 k = t - q
3564
3565 we want to break up k = t - q, but we won't until we've transformed q
3566 = b - r, which won't be broken up until we transform b = c - d.
3567
3568 En passant, clear the GIMPLE visited flag on every statement. */
3569
3570 static void
3571 break_up_subtract_bb (basic_block bb)
3572 {
3573 gimple_stmt_iterator gsi;
3574 basic_block son;
3575
3576 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3577 {
3578 gimple stmt = gsi_stmt (gsi);
3579 gimple_set_visited (stmt, false);
3580
3581 if (!is_gimple_assign (stmt)
3582 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3583 continue;
3584
3585 /* Look for simple gimple subtract operations. */
3586 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3587 {
3588 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3589 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3590 continue;
3591
3592 /* Check for a subtract used only in an addition. If this
3593 is the case, transform it into add of a negate for better
3594 reassociation. IE transform C = A-B into C = A + -B if C
3595 is only used in an addition. */
3596 if (should_break_up_subtract (stmt))
3597 break_up_subtract (stmt, &gsi);
3598 }
3599 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3600 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3601 plus_negates.safe_push (gimple_assign_lhs (stmt));
3602 }
3603 for (son = first_dom_son (CDI_DOMINATORS, bb);
3604 son;
3605 son = next_dom_son (CDI_DOMINATORS, son))
3606 break_up_subtract_bb (son);
3607 }
3608
3609 /* Used for repeated factor analysis. */
3610 struct repeat_factor_d
3611 {
3612 /* An SSA name that occurs in a multiply chain. */
3613 tree factor;
3614
3615 /* Cached rank of the factor. */
3616 unsigned rank;
3617
3618 /* Number of occurrences of the factor in the chain. */
3619 HOST_WIDE_INT count;
3620
3621 /* An SSA name representing the product of this factor and
3622 all factors appearing later in the repeated factor vector. */
3623 tree repr;
3624 };
3625
3626 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3627 typedef const struct repeat_factor_d *const_repeat_factor_t;
3628
3629
3630 static vec<repeat_factor> repeat_factor_vec;
3631
3632 /* Used for sorting the repeat factor vector. Sort primarily by
3633 ascending occurrence count, secondarily by descending rank. */
3634
3635 static int
3636 compare_repeat_factors (const void *x1, const void *x2)
3637 {
3638 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3639 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3640
3641 if (rf1->count != rf2->count)
3642 return rf1->count - rf2->count;
3643
3644 return rf2->rank - rf1->rank;
3645 }
3646
3647 /* Look for repeated operands in OPS in the multiply tree rooted at
3648 STMT. Replace them with an optimal sequence of multiplies and powi
3649 builtin calls, and remove the used operands from OPS. Return an
3650 SSA name representing the value of the replacement sequence. */
3651
3652 static tree
3653 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3654 {
3655 unsigned i, j, vec_len;
3656 int ii;
3657 operand_entry_t oe;
3658 repeat_factor_t rf1, rf2;
3659 repeat_factor rfnew;
3660 tree result = NULL_TREE;
3661 tree target_ssa, iter_result;
3662 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3663 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3664 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3665 gimple mul_stmt, pow_stmt;
3666
3667 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3668 target. */
3669 if (!powi_fndecl)
3670 return NULL_TREE;
3671
3672 /* Allocate the repeated factor vector. */
3673 repeat_factor_vec.create (10);
3674
3675 /* Scan the OPS vector for all SSA names in the product and build
3676 up a vector of occurrence counts for each factor. */
3677 FOR_EACH_VEC_ELT (*ops, i, oe)
3678 {
3679 if (TREE_CODE (oe->op) == SSA_NAME)
3680 {
3681 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3682 {
3683 if (rf1->factor == oe->op)
3684 {
3685 rf1->count += oe->count;
3686 break;
3687 }
3688 }
3689
3690 if (j >= repeat_factor_vec.length ())
3691 {
3692 rfnew.factor = oe->op;
3693 rfnew.rank = oe->rank;
3694 rfnew.count = oe->count;
3695 rfnew.repr = NULL_TREE;
3696 repeat_factor_vec.safe_push (rfnew);
3697 }
3698 }
3699 }
3700
3701 /* Sort the repeated factor vector by (a) increasing occurrence count,
3702 and (b) decreasing rank. */
3703 repeat_factor_vec.qsort (compare_repeat_factors);
3704
3705 /* It is generally best to combine as many base factors as possible
3706 into a product before applying __builtin_powi to the result.
3707 However, the sort order chosen for the repeated factor vector
3708 allows us to cache partial results for the product of the base
3709 factors for subsequent use. When we already have a cached partial
3710 result from a previous iteration, it is best to make use of it
3711 before looking for another __builtin_pow opportunity.
3712
3713 As an example, consider x * x * y * y * y * z * z * z * z.
3714 We want to first compose the product x * y * z, raise it to the
3715 second power, then multiply this by y * z, and finally multiply
3716 by z. This can be done in 5 multiplies provided we cache y * z
3717 for use in both expressions:
3718
3719 t1 = y * z
3720 t2 = t1 * x
3721 t3 = t2 * t2
3722 t4 = t1 * t3
3723 result = t4 * z
3724
3725 If we instead ignored the cached y * z and first multiplied by
3726 the __builtin_pow opportunity z * z, we would get the inferior:
3727
3728 t1 = y * z
3729 t2 = t1 * x
3730 t3 = t2 * t2
3731 t4 = z * z
3732 t5 = t3 * t4
3733 result = t5 * y */
3734
3735 vec_len = repeat_factor_vec.length ();
3736
3737 /* Repeatedly look for opportunities to create a builtin_powi call. */
3738 while (true)
3739 {
3740 HOST_WIDE_INT power;
3741
3742 /* First look for the largest cached product of factors from
3743 preceding iterations. If found, create a builtin_powi for
3744 it if the minimum occurrence count for its factors is at
3745 least 2, or just use this cached product as our next
3746 multiplicand if the minimum occurrence count is 1. */
3747 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3748 {
3749 if (rf1->repr && rf1->count > 0)
3750 break;
3751 }
3752
3753 if (j < vec_len)
3754 {
3755 power = rf1->count;
3756
3757 if (power == 1)
3758 {
3759 iter_result = rf1->repr;
3760
3761 if (dump_file && (dump_flags & TDF_DETAILS))
3762 {
3763 unsigned elt;
3764 repeat_factor_t rf;
3765 fputs ("Multiplying by cached product ", dump_file);
3766 for (elt = j; elt < vec_len; elt++)
3767 {
3768 rf = &repeat_factor_vec[elt];
3769 print_generic_expr (dump_file, rf->factor, 0);
3770 if (elt < vec_len - 1)
3771 fputs (" * ", dump_file);
3772 }
3773 fputs ("\n", dump_file);
3774 }
3775 }
3776 else
3777 {
3778 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3779 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3780 build_int_cst (integer_type_node,
3781 power));
3782 gimple_call_set_lhs (pow_stmt, iter_result);
3783 gimple_set_location (pow_stmt, gimple_location (stmt));
3784 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3785
3786 if (dump_file && (dump_flags & TDF_DETAILS))
3787 {
3788 unsigned elt;
3789 repeat_factor_t rf;
3790 fputs ("Building __builtin_pow call for cached product (",
3791 dump_file);
3792 for (elt = j; elt < vec_len; elt++)
3793 {
3794 rf = &repeat_factor_vec[elt];
3795 print_generic_expr (dump_file, rf->factor, 0);
3796 if (elt < vec_len - 1)
3797 fputs (" * ", dump_file);
3798 }
3799 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3800 power);
3801 }
3802 }
3803 }
3804 else
3805 {
3806 /* Otherwise, find the first factor in the repeated factor
3807 vector whose occurrence count is at least 2. If no such
3808 factor exists, there are no builtin_powi opportunities
3809 remaining. */
3810 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3811 {
3812 if (rf1->count >= 2)
3813 break;
3814 }
3815
3816 if (j >= vec_len)
3817 break;
3818
3819 power = rf1->count;
3820
3821 if (dump_file && (dump_flags & TDF_DETAILS))
3822 {
3823 unsigned elt;
3824 repeat_factor_t rf;
3825 fputs ("Building __builtin_pow call for (", dump_file);
3826 for (elt = j; elt < vec_len; elt++)
3827 {
3828 rf = &repeat_factor_vec[elt];
3829 print_generic_expr (dump_file, rf->factor, 0);
3830 if (elt < vec_len - 1)
3831 fputs (" * ", dump_file);
3832 }
3833 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3834 }
3835
3836 reassociate_stats.pows_created++;
3837
3838 /* Visit each element of the vector in reverse order (so that
3839 high-occurrence elements are visited first, and within the
3840 same occurrence count, lower-ranked elements are visited
3841 first). Form a linear product of all elements in this order
3842 whose occurrencce count is at least that of element J.
3843 Record the SSA name representing the product of each element
3844 with all subsequent elements in the vector. */
3845 if (j == vec_len - 1)
3846 rf1->repr = rf1->factor;
3847 else
3848 {
3849 for (ii = vec_len - 2; ii >= (int)j; ii--)
3850 {
3851 tree op1, op2;
3852
3853 rf1 = &repeat_factor_vec[ii];
3854 rf2 = &repeat_factor_vec[ii + 1];
3855
3856 /* Init the last factor's representative to be itself. */
3857 if (!rf2->repr)
3858 rf2->repr = rf2->factor;
3859
3860 op1 = rf1->factor;
3861 op2 = rf2->repr;
3862
3863 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3864 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3865 target_ssa,
3866 op1, op2);
3867 gimple_set_location (mul_stmt, gimple_location (stmt));
3868 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3869 rf1->repr = target_ssa;
3870
3871 /* Don't reprocess the multiply we just introduced. */
3872 gimple_set_visited (mul_stmt, true);
3873 }
3874 }
3875
3876 /* Form a call to __builtin_powi for the maximum product
3877 just formed, raised to the power obtained earlier. */
3878 rf1 = &repeat_factor_vec[j];
3879 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3880 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3881 build_int_cst (integer_type_node,
3882 power));
3883 gimple_call_set_lhs (pow_stmt, iter_result);
3884 gimple_set_location (pow_stmt, gimple_location (stmt));
3885 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3886 }
3887
3888 /* If we previously formed at least one other builtin_powi call,
3889 form the product of this one and those others. */
3890 if (result)
3891 {
3892 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3893 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3894 result, iter_result);
3895 gimple_set_location (mul_stmt, gimple_location (stmt));
3896 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3897 gimple_set_visited (mul_stmt, true);
3898 result = new_result;
3899 }
3900 else
3901 result = iter_result;
3902
3903 /* Decrement the occurrence count of each element in the product
3904 by the count found above, and remove this many copies of each
3905 factor from OPS. */
3906 for (i = j; i < vec_len; i++)
3907 {
3908 unsigned k = power;
3909 unsigned n;
3910
3911 rf1 = &repeat_factor_vec[i];
3912 rf1->count -= power;
3913
3914 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3915 {
3916 if (oe->op == rf1->factor)
3917 {
3918 if (oe->count <= k)
3919 {
3920 ops->ordered_remove (n);
3921 k -= oe->count;
3922
3923 if (k == 0)
3924 break;
3925 }
3926 else
3927 {
3928 oe->count -= k;
3929 break;
3930 }
3931 }
3932 }
3933 }
3934 }
3935
3936 /* At this point all elements in the repeated factor vector have a
3937 remaining occurrence count of 0 or 1, and those with a count of 1
3938 don't have cached representatives. Re-sort the ops vector and
3939 clean up. */
3940 ops->qsort (sort_by_operand_rank);
3941 repeat_factor_vec.release ();
3942
3943 /* Return the final product computed herein. Note that there may
3944 still be some elements with single occurrence count left in OPS;
3945 those will be handled by the normal reassociation logic. */
3946 return result;
3947 }
3948
3949 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3950
3951 static void
3952 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3953 {
3954 tree rhs1;
3955
3956 if (dump_file && (dump_flags & TDF_DETAILS))
3957 {
3958 fprintf (dump_file, "Transforming ");
3959 print_gimple_stmt (dump_file, stmt, 0, 0);
3960 }
3961
3962 rhs1 = gimple_assign_rhs1 (stmt);
3963 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3964 update_stmt (stmt);
3965 remove_visited_stmt_chain (rhs1);
3966
3967 if (dump_file && (dump_flags & TDF_DETAILS))
3968 {
3969 fprintf (dump_file, " into ");
3970 print_gimple_stmt (dump_file, stmt, 0, 0);
3971 }
3972 }
3973
3974 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3975
3976 static void
3977 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3978 tree rhs1, tree rhs2)
3979 {
3980 if (dump_file && (dump_flags & TDF_DETAILS))
3981 {
3982 fprintf (dump_file, "Transforming ");
3983 print_gimple_stmt (dump_file, stmt, 0, 0);
3984 }
3985
3986 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3987 update_stmt (gsi_stmt (*gsi));
3988 remove_visited_stmt_chain (rhs1);
3989
3990 if (dump_file && (dump_flags & TDF_DETAILS))
3991 {
3992 fprintf (dump_file, " into ");
3993 print_gimple_stmt (dump_file, stmt, 0, 0);
3994 }
3995 }
3996
3997 /* Reassociate expressions in basic block BB and its post-dominator as
3998 children. */
3999
4000 static void
4001 reassociate_bb (basic_block bb)
4002 {
4003 gimple_stmt_iterator gsi;
4004 basic_block son;
4005 gimple stmt = last_stmt (bb);
4006
4007 if (stmt && !gimple_visited_p (stmt))
4008 maybe_optimize_range_tests (stmt);
4009
4010 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4011 {
4012 stmt = gsi_stmt (gsi);
4013
4014 if (is_gimple_assign (stmt)
4015 && !stmt_could_throw_p (stmt))
4016 {
4017 tree lhs, rhs1, rhs2;
4018 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4019
4020 /* If this is not a gimple binary expression, there is
4021 nothing for us to do with it. */
4022 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4023 continue;
4024
4025 /* If this was part of an already processed statement,
4026 we don't need to touch it again. */
4027 if (gimple_visited_p (stmt))
4028 {
4029 /* This statement might have become dead because of previous
4030 reassociations. */
4031 if (has_zero_uses (gimple_get_lhs (stmt)))
4032 {
4033 gsi_remove (&gsi, true);
4034 release_defs (stmt);
4035 /* We might end up removing the last stmt above which
4036 places the iterator to the end of the sequence.
4037 Reset it to the last stmt in this case which might
4038 be the end of the sequence as well if we removed
4039 the last statement of the sequence. In which case
4040 we need to bail out. */
4041 if (gsi_end_p (gsi))
4042 {
4043 gsi = gsi_last_bb (bb);
4044 if (gsi_end_p (gsi))
4045 break;
4046 }
4047 }
4048 continue;
4049 }
4050
4051 lhs = gimple_assign_lhs (stmt);
4052 rhs1 = gimple_assign_rhs1 (stmt);
4053 rhs2 = gimple_assign_rhs2 (stmt);
4054
4055 /* For non-bit or min/max operations we can't associate
4056 all types. Verify that here. */
4057 if (rhs_code != BIT_IOR_EXPR
4058 && rhs_code != BIT_AND_EXPR
4059 && rhs_code != BIT_XOR_EXPR
4060 && rhs_code != MIN_EXPR
4061 && rhs_code != MAX_EXPR
4062 && (!can_reassociate_p (lhs)
4063 || !can_reassociate_p (rhs1)
4064 || !can_reassociate_p (rhs2)))
4065 continue;
4066
4067 if (associative_tree_code (rhs_code))
4068 {
4069 vec<operand_entry_t> ops = vNULL;
4070 tree powi_result = NULL_TREE;
4071
4072 /* There may be no immediate uses left by the time we
4073 get here because we may have eliminated them all. */
4074 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4075 continue;
4076
4077 gimple_set_visited (stmt, true);
4078 linearize_expr_tree (&ops, stmt, true, true);
4079 ops.qsort (sort_by_operand_rank);
4080 optimize_ops_list (rhs_code, &ops);
4081 if (undistribute_ops_list (rhs_code, &ops,
4082 loop_containing_stmt (stmt)))
4083 {
4084 ops.qsort (sort_by_operand_rank);
4085 optimize_ops_list (rhs_code, &ops);
4086 }
4087
4088 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4089 optimize_range_tests (rhs_code, &ops);
4090
4091 if (first_pass_instance
4092 && rhs_code == MULT_EXPR
4093 && flag_unsafe_math_optimizations)
4094 powi_result = attempt_builtin_powi (stmt, &ops);
4095
4096 /* If the operand vector is now empty, all operands were
4097 consumed by the __builtin_powi optimization. */
4098 if (ops.length () == 0)
4099 transform_stmt_to_copy (&gsi, stmt, powi_result);
4100 else if (ops.length () == 1)
4101 {
4102 tree last_op = ops.last ()->op;
4103
4104 if (powi_result)
4105 transform_stmt_to_multiply (&gsi, stmt, last_op,
4106 powi_result);
4107 else
4108 transform_stmt_to_copy (&gsi, stmt, last_op);
4109 }
4110 else
4111 {
4112 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4113 int ops_num = ops.length ();
4114 int width = get_reassociation_width (ops_num, rhs_code, mode);
4115
4116 if (dump_file && (dump_flags & TDF_DETAILS))
4117 fprintf (dump_file,
4118 "Width = %d was chosen for reassociation\n", width);
4119
4120 if (width > 1
4121 && ops.length () > 3)
4122 rewrite_expr_tree_parallel (stmt, width, ops);
4123 else
4124 rewrite_expr_tree (stmt, 0, ops, false);
4125
4126 /* If we combined some repeated factors into a
4127 __builtin_powi call, multiply that result by the
4128 reassociated operands. */
4129 if (powi_result)
4130 {
4131 gimple mul_stmt;
4132 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4133 tree target_ssa = make_temp_ssa_name (type, NULL,
4134 "reassocpow");
4135 gimple_set_lhs (stmt, target_ssa);
4136 update_stmt (stmt);
4137 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4138 powi_result,
4139 target_ssa);
4140 gimple_set_location (mul_stmt, gimple_location (stmt));
4141 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4142 }
4143 }
4144
4145 ops.release ();
4146 }
4147 }
4148 }
4149 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4150 son;
4151 son = next_dom_son (CDI_POST_DOMINATORS, son))
4152 reassociate_bb (son);
4153 }
4154
4155 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4156 void debug_ops_vector (vec<operand_entry_t> ops);
4157
4158 /* Dump the operand entry vector OPS to FILE. */
4159
4160 void
4161 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4162 {
4163 operand_entry_t oe;
4164 unsigned int i;
4165
4166 FOR_EACH_VEC_ELT (ops, i, oe)
4167 {
4168 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4169 print_generic_expr (file, oe->op, 0);
4170 }
4171 }
4172
4173 /* Dump the operand entry vector OPS to STDERR. */
4174
4175 DEBUG_FUNCTION void
4176 debug_ops_vector (vec<operand_entry_t> ops)
4177 {
4178 dump_ops_vector (stderr, ops);
4179 }
4180
4181 static void
4182 do_reassoc (void)
4183 {
4184 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4185 reassociate_bb (EXIT_BLOCK_PTR);
4186 }
4187
4188 /* Initialize the reassociation pass. */
4189
4190 static void
4191 init_reassoc (void)
4192 {
4193 int i;
4194 long rank = 2;
4195 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4196
4197 /* Find the loops, so that we can prevent moving calculations in
4198 them. */
4199 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4200
4201 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4202
4203 operand_entry_pool = create_alloc_pool ("operand entry pool",
4204 sizeof (struct operand_entry), 30);
4205 next_operand_entry_id = 0;
4206
4207 /* Reverse RPO (Reverse Post Order) will give us something where
4208 deeper loops come later. */
4209 pre_and_rev_post_order_compute (NULL, bbs, false);
4210 bb_rank = XCNEWVEC (long, last_basic_block);
4211 operand_rank = pointer_map_create ();
4212
4213 /* Give each default definition a distinct rank. This includes
4214 parameters and the static chain. Walk backwards over all
4215 SSA names so that we get proper rank ordering according
4216 to tree_swap_operands_p. */
4217 for (i = num_ssa_names - 1; i > 0; --i)
4218 {
4219 tree name = ssa_name (i);
4220 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4221 insert_operand_rank (name, ++rank);
4222 }
4223
4224 /* Set up rank for each BB */
4225 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4226 bb_rank[bbs[i]] = ++rank << 16;
4227
4228 free (bbs);
4229 calculate_dominance_info (CDI_POST_DOMINATORS);
4230 plus_negates = vNULL;
4231 }
4232
4233 /* Cleanup after the reassociation pass, and print stats if
4234 requested. */
4235
4236 static void
4237 fini_reassoc (void)
4238 {
4239 statistics_counter_event (cfun, "Linearized",
4240 reassociate_stats.linearized);
4241 statistics_counter_event (cfun, "Constants eliminated",
4242 reassociate_stats.constants_eliminated);
4243 statistics_counter_event (cfun, "Ops eliminated",
4244 reassociate_stats.ops_eliminated);
4245 statistics_counter_event (cfun, "Statements rewritten",
4246 reassociate_stats.rewritten);
4247 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4248 reassociate_stats.pows_encountered);
4249 statistics_counter_event (cfun, "Built-in powi calls created",
4250 reassociate_stats.pows_created);
4251
4252 pointer_map_destroy (operand_rank);
4253 free_alloc_pool (operand_entry_pool);
4254 free (bb_rank);
4255 plus_negates.release ();
4256 free_dominance_info (CDI_POST_DOMINATORS);
4257 loop_optimizer_finalize ();
4258 }
4259
4260 /* Gate and execute functions for Reassociation. */
4261
4262 static unsigned int
4263 execute_reassoc (void)
4264 {
4265 init_reassoc ();
4266
4267 do_reassoc ();
4268 repropagate_negates ();
4269
4270 fini_reassoc ();
4271 return 0;
4272 }
4273
4274 static bool
4275 gate_tree_ssa_reassoc (void)
4276 {
4277 return flag_tree_reassoc != 0;
4278 }
4279
4280 struct gimple_opt_pass pass_reassoc =
4281 {
4282 {
4283 GIMPLE_PASS,
4284 "reassoc", /* name */
4285 OPTGROUP_NONE, /* optinfo_flags */
4286 gate_tree_ssa_reassoc, /* gate */
4287 execute_reassoc, /* execute */
4288 NULL, /* sub */
4289 NULL, /* next */
4290 0, /* static_pass_number */
4291 TV_TREE_REASSOC, /* tv_id */
4292 PROP_cfg | PROP_ssa, /* properties_required */
4293 0, /* properties_provided */
4294 0, /* properties_destroyed */
4295 0, /* todo_flags_start */
4296 TODO_verify_ssa
4297 | TODO_verify_flow
4298 | TODO_ggc_collect /* todo_flags_finish */
4299 }
4300 };