Automated conversion of passes to C++ classes
[gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
31 #include "gimple.h"
32 #include "tree-iterator.h"
33 #include "tree-pass.h"
34 #include "alloc-pool.h"
35 #include "vec.h"
36 #include "langhooks.h"
37 #include "pointer-set.h"
38 #include "cfgloop.h"
39 #include "flags.h"
40 #include "target.h"
41 #include "params.h"
42 #include "diagnostic-core.h"
43
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
47
48 It consists of five steps:
49
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
52
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
57
58 3. Optimization of the operand lists, eliminating things like a +
59 -a, a & a, etc.
60
61 3a. Combine repeated factors with the same occurrence counts
62 into a __builtin_powi call that will later be optimized into
63 an optimal number of multiplies.
64
65 4. Rewrite the expression trees we linearized and optimized so
66 they are in proper rank order.
67
68 5. Repropagate negates, as nothing else will clean it up ATM.
69
70 A bit of theory on #4, since nobody seems to write anything down
71 about why it makes sense to do it the way they do it:
72
73 We could do this much nicer theoretically, but don't (for reasons
74 explained after how to do it theoretically nice :P).
75
76 In order to promote the most redundancy elimination, you want
77 binary expressions whose operands are the same rank (or
78 preferably, the same value) exposed to the redundancy eliminator,
79 for possible elimination.
80
81 So the way to do this if we really cared, is to build the new op
82 tree from the leaves to the roots, merging as you go, and putting the
83 new op on the end of the worklist, until you are left with one
84 thing on the worklist.
85
86 IE if you have to rewrite the following set of operands (listed with
87 rank in parentheses), with opcode PLUS_EXPR:
88
89 a (1), b (1), c (1), d (2), e (2)
90
91
92 We start with our merge worklist empty, and the ops list with all of
93 those on it.
94
95 You want to first merge all leaves of the same rank, as much as
96 possible.
97
98 So first build a binary op of
99
100 mergetmp = a + b, and put "mergetmp" on the merge worklist.
101
102 Because there is no three operand form of PLUS_EXPR, c is not going to
103 be exposed to redundancy elimination as a rank 1 operand.
104
105 So you might as well throw it on the merge worklist (you could also
106 consider it to now be a rank two operand, and merge it with d and e,
107 but in this case, you then have evicted e from a binary op. So at
108 least in this situation, you can't win.)
109
110 Then build a binary op of d + e
111 mergetmp2 = d + e
112
113 and put mergetmp2 on the merge worklist.
114
115 so merge worklist = {mergetmp, c, mergetmp2}
116
117 Continue building binary ops of these operations until you have only
118 one operation left on the worklist.
119
120 So we have
121
122 build binary op
123 mergetmp3 = mergetmp + c
124
125 worklist = {mergetmp2, mergetmp3}
126
127 mergetmp4 = mergetmp2 + mergetmp3
128
129 worklist = {mergetmp4}
130
131 because we have one operation left, we can now just set the original
132 statement equal to the result of that operation.
133
134 This will at least expose a + b and d + e to redundancy elimination
135 as binary operations.
136
137 For extra points, you can reuse the old statements to build the
138 mergetmps, since you shouldn't run out.
139
140 So why don't we do this?
141
142 Because it's expensive, and rarely will help. Most trees we are
143 reassociating have 3 or less ops. If they have 2 ops, they already
144 will be written into a nice single binary op. If you have 3 ops, a
145 single simple check suffices to tell you whether the first two are of the
146 same rank. If so, you know to order it
147
148 mergetmp = op1 + op2
149 newstmt = mergetmp + op3
150
151 instead of
152 mergetmp = op2 + op3
153 newstmt = mergetmp + op1
154
155 If all three are of the same rank, you can't expose them all in a
156 single binary operator anyway, so the above is *still* the best you
157 can do.
158
159 Thus, this is what we do. When we have three ops left, we check to see
160 what order to put them in, and call it a day. As a nod to vector sum
161 reduction, we check if any of the ops are really a phi node that is a
162 destructive update for the associating op, and keep the destructive
163 update together for vector sum reduction recognition. */
164
165
166 /* Statistics */
167 static struct
168 {
169 int linearized;
170 int constants_eliminated;
171 int ops_eliminated;
172 int rewritten;
173 int pows_encountered;
174 int pows_created;
175 } reassociate_stats;
176
177 /* Operator, rank pair. */
178 typedef struct operand_entry
179 {
180 unsigned int rank;
181 int id;
182 tree op;
183 unsigned int count;
184 } *operand_entry_t;
185
186 static alloc_pool operand_entry_pool;
187
188 /* This is used to assign a unique ID to each struct operand_entry
189 so that qsort results are identical on different hosts. */
190 static int next_operand_entry_id;
191
192 /* Starting rank number for a given basic block, so that we can rank
193 operations using unmovable instructions in that BB based on the bb
194 depth. */
195 static long *bb_rank;
196
197 /* Operand->rank hashtable. */
198 static struct pointer_map_t *operand_rank;
199
200 /* Forward decls. */
201 static long get_rank (tree);
202
203
204 /* Bias amount for loop-carried phis. We want this to be larger than
205 the depth of any reassociation tree we can see, but not larger than
206 the rank difference between two blocks. */
207 #define PHI_LOOP_BIAS (1 << 15)
208
209 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
210 an innermost loop, and the phi has only a single use which is inside
211 the loop, then the rank is the block rank of the loop latch plus an
212 extra bias for the loop-carried dependence. This causes expressions
213 calculated into an accumulator variable to be independent for each
214 iteration of the loop. If STMT is some other phi, the rank is the
215 block rank of its containing block. */
216 static long
217 phi_rank (gimple stmt)
218 {
219 basic_block bb = gimple_bb (stmt);
220 struct loop *father = bb->loop_father;
221 tree res;
222 unsigned i;
223 use_operand_p use;
224 gimple use_stmt;
225
226 /* We only care about real loops (those with a latch). */
227 if (!father->latch)
228 return bb_rank[bb->index];
229
230 /* Interesting phis must be in headers of innermost loops. */
231 if (bb != father->header
232 || father->inner)
233 return bb_rank[bb->index];
234
235 /* Ignore virtual SSA_NAMEs. */
236 res = gimple_phi_result (stmt);
237 if (virtual_operand_p (res))
238 return bb_rank[bb->index];
239
240 /* The phi definition must have a single use, and that use must be
241 within the loop. Otherwise this isn't an accumulator pattern. */
242 if (!single_imm_use (res, &use, &use_stmt)
243 || gimple_bb (use_stmt)->loop_father != father)
244 return bb_rank[bb->index];
245
246 /* Look for phi arguments from within the loop. If found, bias this phi. */
247 for (i = 0; i < gimple_phi_num_args (stmt); i++)
248 {
249 tree arg = gimple_phi_arg_def (stmt, i);
250 if (TREE_CODE (arg) == SSA_NAME
251 && !SSA_NAME_IS_DEFAULT_DEF (arg))
252 {
253 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
254 if (gimple_bb (def_stmt)->loop_father == father)
255 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
256 }
257 }
258
259 /* Must be an uninteresting phi. */
260 return bb_rank[bb->index];
261 }
262
263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
264 loop-carried dependence of an innermost loop, return TRUE; else
265 return FALSE. */
266 static bool
267 loop_carried_phi (tree exp)
268 {
269 gimple phi_stmt;
270 long block_rank;
271
272 if (TREE_CODE (exp) != SSA_NAME
273 || SSA_NAME_IS_DEFAULT_DEF (exp))
274 return false;
275
276 phi_stmt = SSA_NAME_DEF_STMT (exp);
277
278 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
279 return false;
280
281 /* Non-loop-carried phis have block rank. Loop-carried phis have
282 an additional bias added in. If this phi doesn't have block rank,
283 it's biased and should not be propagated. */
284 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
285
286 if (phi_rank (phi_stmt) != block_rank)
287 return true;
288
289 return false;
290 }
291
292 /* Return the maximum of RANK and the rank that should be propagated
293 from expression OP. For most operands, this is just the rank of OP.
294 For loop-carried phis, the value is zero to avoid undoing the bias
295 in favor of the phi. */
296 static long
297 propagate_rank (long rank, tree op)
298 {
299 long op_rank;
300
301 if (loop_carried_phi (op))
302 return rank;
303
304 op_rank = get_rank (op);
305
306 return MAX (rank, op_rank);
307 }
308
309 /* Look up the operand rank structure for expression E. */
310
311 static inline long
312 find_operand_rank (tree e)
313 {
314 void **slot = pointer_map_contains (operand_rank, e);
315 return slot ? (long) (intptr_t) *slot : -1;
316 }
317
318 /* Insert {E,RANK} into the operand rank hashtable. */
319
320 static inline void
321 insert_operand_rank (tree e, long rank)
322 {
323 void **slot;
324 gcc_assert (rank > 0);
325 slot = pointer_map_insert (operand_rank, e);
326 gcc_assert (!*slot);
327 *slot = (void *) (intptr_t) rank;
328 }
329
330 /* Given an expression E, return the rank of the expression. */
331
332 static long
333 get_rank (tree e)
334 {
335 /* Constants have rank 0. */
336 if (is_gimple_min_invariant (e))
337 return 0;
338
339 /* SSA_NAME's have the rank of the expression they are the result
340 of.
341 For globals and uninitialized values, the rank is 0.
342 For function arguments, use the pre-setup rank.
343 For PHI nodes, stores, asm statements, etc, we use the rank of
344 the BB.
345 For simple operations, the rank is the maximum rank of any of
346 its operands, or the bb_rank, whichever is less.
347 I make no claims that this is optimal, however, it gives good
348 results. */
349
350 /* We make an exception to the normal ranking system to break
351 dependences of accumulator variables in loops. Suppose we
352 have a simple one-block loop containing:
353
354 x_1 = phi(x_0, x_2)
355 b = a + x_1
356 c = b + d
357 x_2 = c + e
358
359 As shown, each iteration of the calculation into x is fully
360 dependent upon the iteration before it. We would prefer to
361 see this in the form:
362
363 x_1 = phi(x_0, x_2)
364 b = a + d
365 c = b + e
366 x_2 = c + x_1
367
368 If the loop is unrolled, the calculations of b and c from
369 different iterations can be interleaved.
370
371 To obtain this result during reassociation, we bias the rank
372 of the phi definition x_1 upward, when it is recognized as an
373 accumulator pattern. The artificial rank causes it to be
374 added last, providing the desired independence. */
375
376 if (TREE_CODE (e) == SSA_NAME)
377 {
378 gimple stmt;
379 long rank;
380 int i, n;
381 tree op;
382
383 if (SSA_NAME_IS_DEFAULT_DEF (e))
384 return find_operand_rank (e);
385
386 stmt = SSA_NAME_DEF_STMT (e);
387 if (gimple_code (stmt) == GIMPLE_PHI)
388 return phi_rank (stmt);
389
390 if (!is_gimple_assign (stmt)
391 || gimple_vdef (stmt))
392 return bb_rank[gimple_bb (stmt)->index];
393
394 /* If we already have a rank for this expression, use that. */
395 rank = find_operand_rank (e);
396 if (rank != -1)
397 return rank;
398
399 /* Otherwise, find the maximum rank for the operands. As an
400 exception, remove the bias from loop-carried phis when propagating
401 the rank so that dependent operations are not also biased. */
402 rank = 0;
403 if (gimple_assign_single_p (stmt))
404 {
405 tree rhs = gimple_assign_rhs1 (stmt);
406 n = TREE_OPERAND_LENGTH (rhs);
407 if (n == 0)
408 rank = propagate_rank (rank, rhs);
409 else
410 {
411 for (i = 0; i < n; i++)
412 {
413 op = TREE_OPERAND (rhs, i);
414
415 if (op != NULL_TREE)
416 rank = propagate_rank (rank, op);
417 }
418 }
419 }
420 else
421 {
422 n = gimple_num_ops (stmt);
423 for (i = 1; i < n; i++)
424 {
425 op = gimple_op (stmt, i);
426 gcc_assert (op);
427 rank = propagate_rank (rank, op);
428 }
429 }
430
431 if (dump_file && (dump_flags & TDF_DETAILS))
432 {
433 fprintf (dump_file, "Rank for ");
434 print_generic_expr (dump_file, e, 0);
435 fprintf (dump_file, " is %ld\n", (rank + 1));
436 }
437
438 /* Note the rank in the hashtable so we don't recompute it. */
439 insert_operand_rank (e, (rank + 1));
440 return (rank + 1);
441 }
442
443 /* Globals, etc, are rank 0 */
444 return 0;
445 }
446
447
448 /* We want integer ones to end up last no matter what, since they are
449 the ones we can do the most with. */
450 #define INTEGER_CONST_TYPE 1 << 3
451 #define FLOAT_CONST_TYPE 1 << 2
452 #define OTHER_CONST_TYPE 1 << 1
453
454 /* Classify an invariant tree into integer, float, or other, so that
455 we can sort them to be near other constants of the same type. */
456 static inline int
457 constant_type (tree t)
458 {
459 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
460 return INTEGER_CONST_TYPE;
461 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
462 return FLOAT_CONST_TYPE;
463 else
464 return OTHER_CONST_TYPE;
465 }
466
467 /* qsort comparison function to sort operand entries PA and PB by rank
468 so that the sorted array is ordered by rank in decreasing order. */
469 static int
470 sort_by_operand_rank (const void *pa, const void *pb)
471 {
472 const operand_entry_t oea = *(const operand_entry_t *)pa;
473 const operand_entry_t oeb = *(const operand_entry_t *)pb;
474
475 /* It's nicer for optimize_expression if constants that are likely
476 to fold when added/multiplied//whatever are put next to each
477 other. Since all constants have rank 0, order them by type. */
478 if (oeb->rank == 0 && oea->rank == 0)
479 {
480 if (constant_type (oeb->op) != constant_type (oea->op))
481 return constant_type (oeb->op) - constant_type (oea->op);
482 else
483 /* To make sorting result stable, we use unique IDs to determine
484 order. */
485 return oeb->id - oea->id;
486 }
487
488 /* Lastly, make sure the versions that are the same go next to each
489 other. We use SSA_NAME_VERSION because it's stable. */
490 if ((oeb->rank - oea->rank == 0)
491 && TREE_CODE (oea->op) == SSA_NAME
492 && TREE_CODE (oeb->op) == SSA_NAME)
493 {
494 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
495 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
496 else
497 return oeb->id - oea->id;
498 }
499
500 if (oeb->rank != oea->rank)
501 return oeb->rank - oea->rank;
502 else
503 return oeb->id - oea->id;
504 }
505
506 /* Add an operand entry to *OPS for the tree operand OP. */
507
508 static void
509 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
510 {
511 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
512
513 oe->op = op;
514 oe->rank = get_rank (op);
515 oe->id = next_operand_entry_id++;
516 oe->count = 1;
517 ops->safe_push (oe);
518 }
519
520 /* Add an operand entry to *OPS for the tree operand OP with repeat
521 count REPEAT. */
522
523 static void
524 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
525 HOST_WIDE_INT repeat)
526 {
527 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
528
529 oe->op = op;
530 oe->rank = get_rank (op);
531 oe->id = next_operand_entry_id++;
532 oe->count = repeat;
533 ops->safe_push (oe);
534
535 reassociate_stats.pows_encountered++;
536 }
537
538 /* Return true if STMT is reassociable operation containing a binary
539 operation with tree code CODE, and is inside LOOP. */
540
541 static bool
542 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
543 {
544 basic_block bb = gimple_bb (stmt);
545
546 if (gimple_bb (stmt) == NULL)
547 return false;
548
549 if (!flow_bb_inside_loop_p (loop, bb))
550 return false;
551
552 if (is_gimple_assign (stmt)
553 && gimple_assign_rhs_code (stmt) == code
554 && has_single_use (gimple_assign_lhs (stmt)))
555 return true;
556
557 return false;
558 }
559
560
561 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
562 operand of the negate operation. Otherwise, return NULL. */
563
564 static tree
565 get_unary_op (tree name, enum tree_code opcode)
566 {
567 gimple stmt = SSA_NAME_DEF_STMT (name);
568
569 if (!is_gimple_assign (stmt))
570 return NULL_TREE;
571
572 if (gimple_assign_rhs_code (stmt) == opcode)
573 return gimple_assign_rhs1 (stmt);
574 return NULL_TREE;
575 }
576
577 /* If CURR and LAST are a pair of ops that OPCODE allows us to
578 eliminate through equivalences, do so, remove them from OPS, and
579 return true. Otherwise, return false. */
580
581 static bool
582 eliminate_duplicate_pair (enum tree_code opcode,
583 vec<operand_entry_t> *ops,
584 bool *all_done,
585 unsigned int i,
586 operand_entry_t curr,
587 operand_entry_t last)
588 {
589
590 /* If we have two of the same op, and the opcode is & |, min, or max,
591 we can eliminate one of them.
592 If we have two of the same op, and the opcode is ^, we can
593 eliminate both of them. */
594
595 if (last && last->op == curr->op)
596 {
597 switch (opcode)
598 {
599 case MAX_EXPR:
600 case MIN_EXPR:
601 case BIT_IOR_EXPR:
602 case BIT_AND_EXPR:
603 if (dump_file && (dump_flags & TDF_DETAILS))
604 {
605 fprintf (dump_file, "Equivalence: ");
606 print_generic_expr (dump_file, curr->op, 0);
607 fprintf (dump_file, " [&|minmax] ");
608 print_generic_expr (dump_file, last->op, 0);
609 fprintf (dump_file, " -> ");
610 print_generic_stmt (dump_file, last->op, 0);
611 }
612
613 ops->ordered_remove (i);
614 reassociate_stats.ops_eliminated ++;
615
616 return true;
617
618 case BIT_XOR_EXPR:
619 if (dump_file && (dump_flags & TDF_DETAILS))
620 {
621 fprintf (dump_file, "Equivalence: ");
622 print_generic_expr (dump_file, curr->op, 0);
623 fprintf (dump_file, " ^ ");
624 print_generic_expr (dump_file, last->op, 0);
625 fprintf (dump_file, " -> nothing\n");
626 }
627
628 reassociate_stats.ops_eliminated += 2;
629
630 if (ops->length () == 2)
631 {
632 ops->create (0);
633 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
634 *all_done = true;
635 }
636 else
637 {
638 ops->ordered_remove (i-1);
639 ops->ordered_remove (i-1);
640 }
641
642 return true;
643
644 default:
645 break;
646 }
647 }
648 return false;
649 }
650
651 static vec<tree> plus_negates;
652
653 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
654 expression, look in OPS for a corresponding positive operation to cancel
655 it out. If we find one, remove the other from OPS, replace
656 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
657 return false. */
658
659 static bool
660 eliminate_plus_minus_pair (enum tree_code opcode,
661 vec<operand_entry_t> *ops,
662 unsigned int currindex,
663 operand_entry_t curr)
664 {
665 tree negateop;
666 tree notop;
667 unsigned int i;
668 operand_entry_t oe;
669
670 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
671 return false;
672
673 negateop = get_unary_op (curr->op, NEGATE_EXPR);
674 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
675 if (negateop == NULL_TREE && notop == NULL_TREE)
676 return false;
677
678 /* Any non-negated version will have a rank that is one less than
679 the current rank. So once we hit those ranks, if we don't find
680 one, we can stop. */
681
682 for (i = currindex + 1;
683 ops->iterate (i, &oe)
684 && oe->rank >= curr->rank - 1 ;
685 i++)
686 {
687 if (oe->op == negateop)
688 {
689
690 if (dump_file && (dump_flags & TDF_DETAILS))
691 {
692 fprintf (dump_file, "Equivalence: ");
693 print_generic_expr (dump_file, negateop, 0);
694 fprintf (dump_file, " + -");
695 print_generic_expr (dump_file, oe->op, 0);
696 fprintf (dump_file, " -> 0\n");
697 }
698
699 ops->ordered_remove (i);
700 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
701 ops->ordered_remove (currindex);
702 reassociate_stats.ops_eliminated ++;
703
704 return true;
705 }
706 else if (oe->op == notop)
707 {
708 tree op_type = TREE_TYPE (oe->op);
709
710 if (dump_file && (dump_flags & TDF_DETAILS))
711 {
712 fprintf (dump_file, "Equivalence: ");
713 print_generic_expr (dump_file, notop, 0);
714 fprintf (dump_file, " + ~");
715 print_generic_expr (dump_file, oe->op, 0);
716 fprintf (dump_file, " -> -1\n");
717 }
718
719 ops->ordered_remove (i);
720 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
721 ops->ordered_remove (currindex);
722 reassociate_stats.ops_eliminated ++;
723
724 return true;
725 }
726 }
727
728 /* CURR->OP is a negate expr in a plus expr: save it for later
729 inspection in repropagate_negates(). */
730 if (negateop != NULL_TREE)
731 plus_negates.safe_push (curr->op);
732
733 return false;
734 }
735
736 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
737 bitwise not expression, look in OPS for a corresponding operand to
738 cancel it out. If we find one, remove the other from OPS, replace
739 OPS[CURRINDEX] with 0, and return true. Otherwise, return
740 false. */
741
742 static bool
743 eliminate_not_pairs (enum tree_code opcode,
744 vec<operand_entry_t> *ops,
745 unsigned int currindex,
746 operand_entry_t curr)
747 {
748 tree notop;
749 unsigned int i;
750 operand_entry_t oe;
751
752 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
753 || TREE_CODE (curr->op) != SSA_NAME)
754 return false;
755
756 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
757 if (notop == NULL_TREE)
758 return false;
759
760 /* Any non-not version will have a rank that is one less than
761 the current rank. So once we hit those ranks, if we don't find
762 one, we can stop. */
763
764 for (i = currindex + 1;
765 ops->iterate (i, &oe)
766 && oe->rank >= curr->rank - 1;
767 i++)
768 {
769 if (oe->op == notop)
770 {
771 if (dump_file && (dump_flags & TDF_DETAILS))
772 {
773 fprintf (dump_file, "Equivalence: ");
774 print_generic_expr (dump_file, notop, 0);
775 if (opcode == BIT_AND_EXPR)
776 fprintf (dump_file, " & ~");
777 else if (opcode == BIT_IOR_EXPR)
778 fprintf (dump_file, " | ~");
779 print_generic_expr (dump_file, oe->op, 0);
780 if (opcode == BIT_AND_EXPR)
781 fprintf (dump_file, " -> 0\n");
782 else if (opcode == BIT_IOR_EXPR)
783 fprintf (dump_file, " -> -1\n");
784 }
785
786 if (opcode == BIT_AND_EXPR)
787 oe->op = build_zero_cst (TREE_TYPE (oe->op));
788 else if (opcode == BIT_IOR_EXPR)
789 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
790 TYPE_PRECISION (TREE_TYPE (oe->op)));
791
792 reassociate_stats.ops_eliminated += ops->length () - 1;
793 ops->truncate (0);
794 ops->quick_push (oe);
795 return true;
796 }
797 }
798
799 return false;
800 }
801
802 /* Use constant value that may be present in OPS to try to eliminate
803 operands. Note that this function is only really used when we've
804 eliminated ops for other reasons, or merged constants. Across
805 single statements, fold already does all of this, plus more. There
806 is little point in duplicating logic, so I've only included the
807 identities that I could ever construct testcases to trigger. */
808
809 static void
810 eliminate_using_constants (enum tree_code opcode,
811 vec<operand_entry_t> *ops)
812 {
813 operand_entry_t oelast = ops->last ();
814 tree type = TREE_TYPE (oelast->op);
815
816 if (oelast->rank == 0
817 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
818 {
819 switch (opcode)
820 {
821 case BIT_AND_EXPR:
822 if (integer_zerop (oelast->op))
823 {
824 if (ops->length () != 1)
825 {
826 if (dump_file && (dump_flags & TDF_DETAILS))
827 fprintf (dump_file, "Found & 0, removing all other ops\n");
828
829 reassociate_stats.ops_eliminated += ops->length () - 1;
830
831 ops->truncate (0);
832 ops->quick_push (oelast);
833 return;
834 }
835 }
836 else if (integer_all_onesp (oelast->op))
837 {
838 if (ops->length () != 1)
839 {
840 if (dump_file && (dump_flags & TDF_DETAILS))
841 fprintf (dump_file, "Found & -1, removing\n");
842 ops->pop ();
843 reassociate_stats.ops_eliminated++;
844 }
845 }
846 break;
847 case BIT_IOR_EXPR:
848 if (integer_all_onesp (oelast->op))
849 {
850 if (ops->length () != 1)
851 {
852 if (dump_file && (dump_flags & TDF_DETAILS))
853 fprintf (dump_file, "Found | -1, removing all other ops\n");
854
855 reassociate_stats.ops_eliminated += ops->length () - 1;
856
857 ops->truncate (0);
858 ops->quick_push (oelast);
859 return;
860 }
861 }
862 else if (integer_zerop (oelast->op))
863 {
864 if (ops->length () != 1)
865 {
866 if (dump_file && (dump_flags & TDF_DETAILS))
867 fprintf (dump_file, "Found | 0, removing\n");
868 ops->pop ();
869 reassociate_stats.ops_eliminated++;
870 }
871 }
872 break;
873 case MULT_EXPR:
874 if (integer_zerop (oelast->op)
875 || (FLOAT_TYPE_P (type)
876 && !HONOR_NANS (TYPE_MODE (type))
877 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
878 && real_zerop (oelast->op)))
879 {
880 if (ops->length () != 1)
881 {
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Found * 0, removing all other ops\n");
884
885 reassociate_stats.ops_eliminated += ops->length () - 1;
886 ops->truncate (1);
887 ops->quick_push (oelast);
888 return;
889 }
890 }
891 else if (integer_onep (oelast->op)
892 || (FLOAT_TYPE_P (type)
893 && !HONOR_SNANS (TYPE_MODE (type))
894 && real_onep (oelast->op)))
895 {
896 if (ops->length () != 1)
897 {
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found * 1, removing\n");
900 ops->pop ();
901 reassociate_stats.ops_eliminated++;
902 return;
903 }
904 }
905 break;
906 case BIT_XOR_EXPR:
907 case PLUS_EXPR:
908 case MINUS_EXPR:
909 if (integer_zerop (oelast->op)
910 || (FLOAT_TYPE_P (type)
911 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
912 && fold_real_zero_addition_p (type, oelast->op,
913 opcode == MINUS_EXPR)))
914 {
915 if (ops->length () != 1)
916 {
917 if (dump_file && (dump_flags & TDF_DETAILS))
918 fprintf (dump_file, "Found [|^+] 0, removing\n");
919 ops->pop ();
920 reassociate_stats.ops_eliminated++;
921 return;
922 }
923 }
924 break;
925 default:
926 break;
927 }
928 }
929 }
930
931
932 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
933 bool, bool);
934
935 /* Structure for tracking and counting operands. */
936 typedef struct oecount_s {
937 int cnt;
938 int id;
939 enum tree_code oecode;
940 tree op;
941 } oecount;
942
943
944 /* The heap for the oecount hashtable and the sorted list of operands. */
945 static vec<oecount> cvec;
946
947
948 /* Oecount hashtable helpers. */
949
950 struct oecount_hasher : typed_noop_remove <void>
951 {
952 /* Note that this hash table stores integers, not pointers.
953 So, observe the casting in the member functions. */
954 typedef void value_type;
955 typedef void compare_type;
956 static inline hashval_t hash (const value_type *);
957 static inline bool equal (const value_type *, const compare_type *);
958 };
959
960 /* Hash function for oecount. */
961
962 inline hashval_t
963 oecount_hasher::hash (const value_type *p)
964 {
965 const oecount *c = &cvec[(size_t)p - 42];
966 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
967 }
968
969 /* Comparison function for oecount. */
970
971 inline bool
972 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
973 {
974 const oecount *c1 = &cvec[(size_t)p1 - 42];
975 const oecount *c2 = &cvec[(size_t)p2 - 42];
976 return (c1->oecode == c2->oecode
977 && c1->op == c2->op);
978 }
979
980 /* Comparison function for qsort sorting oecount elements by count. */
981
982 static int
983 oecount_cmp (const void *p1, const void *p2)
984 {
985 const oecount *c1 = (const oecount *)p1;
986 const oecount *c2 = (const oecount *)p2;
987 if (c1->cnt != c2->cnt)
988 return c1->cnt - c2->cnt;
989 else
990 /* If counts are identical, use unique IDs to stabilize qsort. */
991 return c1->id - c2->id;
992 }
993
994 /* Return TRUE iff STMT represents a builtin call that raises OP
995 to some exponent. */
996
997 static bool
998 stmt_is_power_of_op (gimple stmt, tree op)
999 {
1000 tree fndecl;
1001
1002 if (!is_gimple_call (stmt))
1003 return false;
1004
1005 fndecl = gimple_call_fndecl (stmt);
1006
1007 if (!fndecl
1008 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1009 return false;
1010
1011 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1012 {
1013 CASE_FLT_FN (BUILT_IN_POW):
1014 CASE_FLT_FN (BUILT_IN_POWI):
1015 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1016
1017 default:
1018 return false;
1019 }
1020 }
1021
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023 in place and return the result. Assumes that stmt_is_power_of_op
1024 was previously called for STMT and returned TRUE. */
1025
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt)
1028 {
1029 REAL_VALUE_TYPE c, cint;
1030 HOST_WIDE_INT power;
1031 tree arg1;
1032
1033 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1034 {
1035 CASE_FLT_FN (BUILT_IN_POW):
1036 arg1 = gimple_call_arg (stmt, 1);
1037 c = TREE_REAL_CST (arg1);
1038 power = real_to_integer (&c) - 1;
1039 real_from_integer (&cint, VOIDmode, power, 0, 0);
1040 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1041 return power;
1042
1043 CASE_FLT_FN (BUILT_IN_POWI):
1044 arg1 = gimple_call_arg (stmt, 1);
1045 power = TREE_INT_CST_LOW (arg1) - 1;
1046 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1047 return power;
1048
1049 default:
1050 gcc_unreachable ();
1051 }
1052 }
1053
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1056 replace *DEF with OP as well. */
1057
1058 static void
1059 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1060 {
1061 tree lhs;
1062 gimple use_stmt;
1063 use_operand_p use;
1064 gimple_stmt_iterator gsi;
1065
1066 if (is_gimple_call (stmt))
1067 lhs = gimple_call_lhs (stmt);
1068 else
1069 lhs = gimple_assign_lhs (stmt);
1070
1071 gcc_assert (has_single_use (lhs));
1072 single_imm_use (lhs, &use, &use_stmt);
1073 if (lhs == *def)
1074 *def = op;
1075 SET_USE (use, op);
1076 if (TREE_CODE (op) != SSA_NAME)
1077 update_stmt (use_stmt);
1078 gsi = gsi_for_stmt (stmt);
1079 unlink_stmt_vdef (stmt);
1080 gsi_remove (&gsi, true);
1081 release_defs (stmt);
1082 }
1083
1084 /* Walks the linear chain with result *DEF searching for an operation
1085 with operand OP and code OPCODE removing that from the chain. *DEF
1086 is updated if there is only one operand but no operation left. */
1087
1088 static void
1089 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1090 {
1091 gimple stmt = SSA_NAME_DEF_STMT (*def);
1092
1093 do
1094 {
1095 tree name;
1096
1097 if (opcode == MULT_EXPR
1098 && stmt_is_power_of_op (stmt, op))
1099 {
1100 if (decrement_power (stmt) == 1)
1101 propagate_op_to_single_use (op, stmt, def);
1102 return;
1103 }
1104
1105 name = gimple_assign_rhs1 (stmt);
1106
1107 /* If this is the operation we look for and one of the operands
1108 is ours simply propagate the other operand into the stmts
1109 single use. */
1110 if (gimple_assign_rhs_code (stmt) == opcode
1111 && (name == op
1112 || gimple_assign_rhs2 (stmt) == op))
1113 {
1114 if (name == op)
1115 name = gimple_assign_rhs2 (stmt);
1116 propagate_op_to_single_use (name, stmt, def);
1117 return;
1118 }
1119
1120 /* We might have a multiply of two __builtin_pow* calls, and
1121 the operand might be hiding in the rightmost one. */
1122 if (opcode == MULT_EXPR
1123 && gimple_assign_rhs_code (stmt) == opcode
1124 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1125 && has_single_use (gimple_assign_rhs2 (stmt)))
1126 {
1127 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1128 if (stmt_is_power_of_op (stmt2, op))
1129 {
1130 if (decrement_power (stmt2) == 1)
1131 propagate_op_to_single_use (op, stmt2, def);
1132 return;
1133 }
1134 }
1135
1136 /* Continue walking the chain. */
1137 gcc_assert (name != op
1138 && TREE_CODE (name) == SSA_NAME);
1139 stmt = SSA_NAME_DEF_STMT (name);
1140 }
1141 while (1);
1142 }
1143
1144 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1145 the result. Places the statement after the definition of either
1146 OP1 or OP2. Returns the new statement. */
1147
1148 static gimple
1149 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1150 {
1151 gimple op1def = NULL, op2def = NULL;
1152 gimple_stmt_iterator gsi;
1153 tree op;
1154 gimple sum;
1155
1156 /* Create the addition statement. */
1157 op = make_ssa_name (type, NULL);
1158 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1159
1160 /* Find an insertion place and insert. */
1161 if (TREE_CODE (op1) == SSA_NAME)
1162 op1def = SSA_NAME_DEF_STMT (op1);
1163 if (TREE_CODE (op2) == SSA_NAME)
1164 op2def = SSA_NAME_DEF_STMT (op2);
1165 if ((!op1def || gimple_nop_p (op1def))
1166 && (!op2def || gimple_nop_p (op2def)))
1167 {
1168 gimple first_stmt;
1169 unsigned uid;
1170 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1171 first_stmt = gsi_stmt (gsi);
1172 uid = first_stmt ? gimple_uid (first_stmt) : 1;
1173 gimple_set_uid (sum, uid);
1174 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1175 }
1176 else if ((!op1def || gimple_nop_p (op1def))
1177 || (op2def && !gimple_nop_p (op2def)
1178 && stmt_dominates_stmt_p (op1def, op2def)))
1179 {
1180 if (gimple_code (op2def) == GIMPLE_PHI)
1181 {
1182 gsi = gsi_after_labels (gimple_bb (op2def));
1183 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1184 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1185 }
1186 else
1187 {
1188 if (!stmt_ends_bb_p (op2def))
1189 {
1190 gsi = gsi_for_stmt (op2def);
1191 gimple_set_uid (sum, gimple_uid (op2def));
1192 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1193 }
1194 else
1195 {
1196 edge e;
1197 edge_iterator ei;
1198
1199 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1200 if (e->flags & EDGE_FALLTHRU)
1201 gsi_insert_on_edge_immediate (e, sum);
1202 }
1203 }
1204 }
1205 else
1206 {
1207 if (gimple_code (op1def) == GIMPLE_PHI)
1208 {
1209 gsi = gsi_after_labels (gimple_bb (op1def));
1210 gimple_set_uid (sum, gimple_uid (op1def));
1211 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1212 }
1213 else
1214 {
1215 if (!stmt_ends_bb_p (op1def))
1216 {
1217 gsi = gsi_for_stmt (op1def);
1218 gimple_set_uid (sum, gimple_uid (op1def));
1219 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1220 }
1221 else
1222 {
1223 edge e;
1224 edge_iterator ei;
1225
1226 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1227 if (e->flags & EDGE_FALLTHRU)
1228 gsi_insert_on_edge_immediate (e, sum);
1229 }
1230 }
1231 }
1232 update_stmt (sum);
1233
1234 return sum;
1235 }
1236
1237 /* Perform un-distribution of divisions and multiplications.
1238 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1239 to (A + B) / X for real X.
1240
1241 The algorithm is organized as follows.
1242
1243 - First we walk the addition chain *OPS looking for summands that
1244 are defined by a multiplication or a real division. This results
1245 in the candidates bitmap with relevant indices into *OPS.
1246
1247 - Second we build the chains of multiplications or divisions for
1248 these candidates, counting the number of occurrences of (operand, code)
1249 pairs in all of the candidates chains.
1250
1251 - Third we sort the (operand, code) pairs by number of occurrence and
1252 process them starting with the pair with the most uses.
1253
1254 * For each such pair we walk the candidates again to build a
1255 second candidate bitmap noting all multiplication/division chains
1256 that have at least one occurrence of (operand, code).
1257
1258 * We build an alternate addition chain only covering these
1259 candidates with one (operand, code) operation removed from their
1260 multiplication/division chain.
1261
1262 * The first candidate gets replaced by the alternate addition chain
1263 multiplied/divided by the operand.
1264
1265 * All candidate chains get disabled for further processing and
1266 processing of (operand, code) pairs continues.
1267
1268 The alternate addition chains built are re-processed by the main
1269 reassociation algorithm which allows optimizing a * x * y + b * y * x
1270 to (a + b ) * x * y in one invocation of the reassociation pass. */
1271
1272 static bool
1273 undistribute_ops_list (enum tree_code opcode,
1274 vec<operand_entry_t> *ops, struct loop *loop)
1275 {
1276 unsigned int length = ops->length ();
1277 operand_entry_t oe1;
1278 unsigned i, j;
1279 sbitmap candidates, candidates2;
1280 unsigned nr_candidates, nr_candidates2;
1281 sbitmap_iterator sbi0;
1282 vec<operand_entry_t> *subops;
1283 hash_table <oecount_hasher> ctable;
1284 bool changed = false;
1285 int next_oecount_id = 0;
1286
1287 if (length <= 1
1288 || opcode != PLUS_EXPR)
1289 return false;
1290
1291 /* Build a list of candidates to process. */
1292 candidates = sbitmap_alloc (length);
1293 bitmap_clear (candidates);
1294 nr_candidates = 0;
1295 FOR_EACH_VEC_ELT (*ops, i, oe1)
1296 {
1297 enum tree_code dcode;
1298 gimple oe1def;
1299
1300 if (TREE_CODE (oe1->op) != SSA_NAME)
1301 continue;
1302 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1303 if (!is_gimple_assign (oe1def))
1304 continue;
1305 dcode = gimple_assign_rhs_code (oe1def);
1306 if ((dcode != MULT_EXPR
1307 && dcode != RDIV_EXPR)
1308 || !is_reassociable_op (oe1def, dcode, loop))
1309 continue;
1310
1311 bitmap_set_bit (candidates, i);
1312 nr_candidates++;
1313 }
1314
1315 if (nr_candidates < 2)
1316 {
1317 sbitmap_free (candidates);
1318 return false;
1319 }
1320
1321 if (dump_file && (dump_flags & TDF_DETAILS))
1322 {
1323 fprintf (dump_file, "searching for un-distribute opportunities ");
1324 print_generic_expr (dump_file,
1325 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1326 fprintf (dump_file, " %d\n", nr_candidates);
1327 }
1328
1329 /* Build linearized sub-operand lists and the counting table. */
1330 cvec.create (0);
1331 ctable.create (15);
1332 /* ??? Macro arguments cannot have multi-argument template types in
1333 them. This typedef is needed to workaround that limitation. */
1334 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1335 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1336 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1337 {
1338 gimple oedef;
1339 enum tree_code oecode;
1340 unsigned j;
1341
1342 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1343 oecode = gimple_assign_rhs_code (oedef);
1344 linearize_expr_tree (&subops[i], oedef,
1345 associative_tree_code (oecode), false);
1346
1347 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1348 {
1349 oecount c;
1350 void **slot;
1351 size_t idx;
1352 c.oecode = oecode;
1353 c.cnt = 1;
1354 c.id = next_oecount_id++;
1355 c.op = oe1->op;
1356 cvec.safe_push (c);
1357 idx = cvec.length () + 41;
1358 slot = ctable.find_slot ((void *)idx, INSERT);
1359 if (!*slot)
1360 {
1361 *slot = (void *)idx;
1362 }
1363 else
1364 {
1365 cvec.pop ();
1366 cvec[(size_t)*slot - 42].cnt++;
1367 }
1368 }
1369 }
1370 ctable.dispose ();
1371
1372 /* Sort the counting table. */
1373 cvec.qsort (oecount_cmp);
1374
1375 if (dump_file && (dump_flags & TDF_DETAILS))
1376 {
1377 oecount *c;
1378 fprintf (dump_file, "Candidates:\n");
1379 FOR_EACH_VEC_ELT (cvec, j, c)
1380 {
1381 fprintf (dump_file, " %u %s: ", c->cnt,
1382 c->oecode == MULT_EXPR
1383 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1384 print_generic_expr (dump_file, c->op, 0);
1385 fprintf (dump_file, "\n");
1386 }
1387 }
1388
1389 /* Process the (operand, code) pairs in order of most occurrence. */
1390 candidates2 = sbitmap_alloc (length);
1391 while (!cvec.is_empty ())
1392 {
1393 oecount *c = &cvec.last ();
1394 if (c->cnt < 2)
1395 break;
1396
1397 /* Now collect the operands in the outer chain that contain
1398 the common operand in their inner chain. */
1399 bitmap_clear (candidates2);
1400 nr_candidates2 = 0;
1401 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1402 {
1403 gimple oedef;
1404 enum tree_code oecode;
1405 unsigned j;
1406 tree op = (*ops)[i]->op;
1407
1408 /* If we undistributed in this chain already this may be
1409 a constant. */
1410 if (TREE_CODE (op) != SSA_NAME)
1411 continue;
1412
1413 oedef = SSA_NAME_DEF_STMT (op);
1414 oecode = gimple_assign_rhs_code (oedef);
1415 if (oecode != c->oecode)
1416 continue;
1417
1418 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1419 {
1420 if (oe1->op == c->op)
1421 {
1422 bitmap_set_bit (candidates2, i);
1423 ++nr_candidates2;
1424 break;
1425 }
1426 }
1427 }
1428
1429 if (nr_candidates2 >= 2)
1430 {
1431 operand_entry_t oe1, oe2;
1432 gimple prod;
1433 int first = bitmap_first_set_bit (candidates2);
1434
1435 /* Build the new addition chain. */
1436 oe1 = (*ops)[first];
1437 if (dump_file && (dump_flags & TDF_DETAILS))
1438 {
1439 fprintf (dump_file, "Building (");
1440 print_generic_expr (dump_file, oe1->op, 0);
1441 }
1442 zero_one_operation (&oe1->op, c->oecode, c->op);
1443 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1444 {
1445 gimple sum;
1446 oe2 = (*ops)[i];
1447 if (dump_file && (dump_flags & TDF_DETAILS))
1448 {
1449 fprintf (dump_file, " + ");
1450 print_generic_expr (dump_file, oe2->op, 0);
1451 }
1452 zero_one_operation (&oe2->op, c->oecode, c->op);
1453 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1454 oe1->op, oe2->op, opcode);
1455 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1456 oe2->rank = 0;
1457 oe1->op = gimple_get_lhs (sum);
1458 }
1459
1460 /* Apply the multiplication/division. */
1461 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1462 oe1->op, c->op, c->oecode);
1463 if (dump_file && (dump_flags & TDF_DETAILS))
1464 {
1465 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1466 print_generic_expr (dump_file, c->op, 0);
1467 fprintf (dump_file, "\n");
1468 }
1469
1470 /* Record it in the addition chain and disable further
1471 undistribution with this op. */
1472 oe1->op = gimple_assign_lhs (prod);
1473 oe1->rank = get_rank (oe1->op);
1474 subops[first].release ();
1475
1476 changed = true;
1477 }
1478
1479 cvec.pop ();
1480 }
1481
1482 for (i = 0; i < ops->length (); ++i)
1483 subops[i].release ();
1484 free (subops);
1485 cvec.release ();
1486 sbitmap_free (candidates);
1487 sbitmap_free (candidates2);
1488
1489 return changed;
1490 }
1491
1492 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1493 expression, examine the other OPS to see if any of them are comparisons
1494 of the same values, which we may be able to combine or eliminate.
1495 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1496
1497 static bool
1498 eliminate_redundant_comparison (enum tree_code opcode,
1499 vec<operand_entry_t> *ops,
1500 unsigned int currindex,
1501 operand_entry_t curr)
1502 {
1503 tree op1, op2;
1504 enum tree_code lcode, rcode;
1505 gimple def1, def2;
1506 int i;
1507 operand_entry_t oe;
1508
1509 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1510 return false;
1511
1512 /* Check that CURR is a comparison. */
1513 if (TREE_CODE (curr->op) != SSA_NAME)
1514 return false;
1515 def1 = SSA_NAME_DEF_STMT (curr->op);
1516 if (!is_gimple_assign (def1))
1517 return false;
1518 lcode = gimple_assign_rhs_code (def1);
1519 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1520 return false;
1521 op1 = gimple_assign_rhs1 (def1);
1522 op2 = gimple_assign_rhs2 (def1);
1523
1524 /* Now look for a similar comparison in the remaining OPS. */
1525 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1526 {
1527 tree t;
1528
1529 if (TREE_CODE (oe->op) != SSA_NAME)
1530 continue;
1531 def2 = SSA_NAME_DEF_STMT (oe->op);
1532 if (!is_gimple_assign (def2))
1533 continue;
1534 rcode = gimple_assign_rhs_code (def2);
1535 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1536 continue;
1537
1538 /* If we got here, we have a match. See if we can combine the
1539 two comparisons. */
1540 if (opcode == BIT_IOR_EXPR)
1541 t = maybe_fold_or_comparisons (lcode, op1, op2,
1542 rcode, gimple_assign_rhs1 (def2),
1543 gimple_assign_rhs2 (def2));
1544 else
1545 t = maybe_fold_and_comparisons (lcode, op1, op2,
1546 rcode, gimple_assign_rhs1 (def2),
1547 gimple_assign_rhs2 (def2));
1548 if (!t)
1549 continue;
1550
1551 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1552 always give us a boolean_type_node value back. If the original
1553 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1554 we need to convert. */
1555 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1556 t = fold_convert (TREE_TYPE (curr->op), t);
1557
1558 if (TREE_CODE (t) != INTEGER_CST
1559 && !operand_equal_p (t, curr->op, 0))
1560 {
1561 enum tree_code subcode;
1562 tree newop1, newop2;
1563 if (!COMPARISON_CLASS_P (t))
1564 continue;
1565 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1566 STRIP_USELESS_TYPE_CONVERSION (newop1);
1567 STRIP_USELESS_TYPE_CONVERSION (newop2);
1568 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1569 continue;
1570 }
1571
1572 if (dump_file && (dump_flags & TDF_DETAILS))
1573 {
1574 fprintf (dump_file, "Equivalence: ");
1575 print_generic_expr (dump_file, curr->op, 0);
1576 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1577 print_generic_expr (dump_file, oe->op, 0);
1578 fprintf (dump_file, " -> ");
1579 print_generic_expr (dump_file, t, 0);
1580 fprintf (dump_file, "\n");
1581 }
1582
1583 /* Now we can delete oe, as it has been subsumed by the new combined
1584 expression t. */
1585 ops->ordered_remove (i);
1586 reassociate_stats.ops_eliminated ++;
1587
1588 /* If t is the same as curr->op, we're done. Otherwise we must
1589 replace curr->op with t. Special case is if we got a constant
1590 back, in which case we add it to the end instead of in place of
1591 the current entry. */
1592 if (TREE_CODE (t) == INTEGER_CST)
1593 {
1594 ops->ordered_remove (currindex);
1595 add_to_ops_vec (ops, t);
1596 }
1597 else if (!operand_equal_p (t, curr->op, 0))
1598 {
1599 gimple sum;
1600 enum tree_code subcode;
1601 tree newop1;
1602 tree newop2;
1603 gcc_assert (COMPARISON_CLASS_P (t));
1604 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1605 STRIP_USELESS_TYPE_CONVERSION (newop1);
1606 STRIP_USELESS_TYPE_CONVERSION (newop2);
1607 gcc_checking_assert (is_gimple_val (newop1)
1608 && is_gimple_val (newop2));
1609 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1610 curr->op = gimple_get_lhs (sum);
1611 }
1612 return true;
1613 }
1614
1615 return false;
1616 }
1617
1618 /* Perform various identities and other optimizations on the list of
1619 operand entries, stored in OPS. The tree code for the binary
1620 operation between all the operands is OPCODE. */
1621
1622 static void
1623 optimize_ops_list (enum tree_code opcode,
1624 vec<operand_entry_t> *ops)
1625 {
1626 unsigned int length = ops->length ();
1627 unsigned int i;
1628 operand_entry_t oe;
1629 operand_entry_t oelast = NULL;
1630 bool iterate = false;
1631
1632 if (length == 1)
1633 return;
1634
1635 oelast = ops->last ();
1636
1637 /* If the last two are constants, pop the constants off, merge them
1638 and try the next two. */
1639 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1640 {
1641 operand_entry_t oelm1 = (*ops)[length - 2];
1642
1643 if (oelm1->rank == 0
1644 && is_gimple_min_invariant (oelm1->op)
1645 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1646 TREE_TYPE (oelast->op)))
1647 {
1648 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1649 oelm1->op, oelast->op);
1650
1651 if (folded && is_gimple_min_invariant (folded))
1652 {
1653 if (dump_file && (dump_flags & TDF_DETAILS))
1654 fprintf (dump_file, "Merging constants\n");
1655
1656 ops->pop ();
1657 ops->pop ();
1658
1659 add_to_ops_vec (ops, folded);
1660 reassociate_stats.constants_eliminated++;
1661
1662 optimize_ops_list (opcode, ops);
1663 return;
1664 }
1665 }
1666 }
1667
1668 eliminate_using_constants (opcode, ops);
1669 oelast = NULL;
1670
1671 for (i = 0; ops->iterate (i, &oe);)
1672 {
1673 bool done = false;
1674
1675 if (eliminate_not_pairs (opcode, ops, i, oe))
1676 return;
1677 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1678 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1679 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1680 {
1681 if (done)
1682 return;
1683 iterate = true;
1684 oelast = NULL;
1685 continue;
1686 }
1687 oelast = oe;
1688 i++;
1689 }
1690
1691 length = ops->length ();
1692 oelast = ops->last ();
1693
1694 if (iterate)
1695 optimize_ops_list (opcode, ops);
1696 }
1697
1698 /* The following functions are subroutines to optimize_range_tests and allow
1699 it to try to change a logical combination of comparisons into a range
1700 test.
1701
1702 For example, both
1703 X == 2 || X == 5 || X == 3 || X == 4
1704 and
1705 X >= 2 && X <= 5
1706 are converted to
1707 (unsigned) (X - 2) <= 3
1708
1709 For more information see comments above fold_test_range in fold-const.c,
1710 this implementation is for GIMPLE. */
1711
1712 struct range_entry
1713 {
1714 tree exp;
1715 tree low;
1716 tree high;
1717 bool in_p;
1718 bool strict_overflow_p;
1719 unsigned int idx, next;
1720 };
1721
1722 /* This is similar to make_range in fold-const.c, but on top of
1723 GIMPLE instead of trees. If EXP is non-NULL, it should be
1724 an SSA_NAME and STMT argument is ignored, otherwise STMT
1725 argument should be a GIMPLE_COND. */
1726
1727 static void
1728 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1729 {
1730 int in_p;
1731 tree low, high;
1732 bool is_bool, strict_overflow_p;
1733
1734 r->exp = NULL_TREE;
1735 r->in_p = false;
1736 r->strict_overflow_p = false;
1737 r->low = NULL_TREE;
1738 r->high = NULL_TREE;
1739 if (exp != NULL_TREE
1740 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1741 return;
1742
1743 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1744 and see if we can refine the range. Some of the cases below may not
1745 happen, but it doesn't seem worth worrying about this. We "continue"
1746 the outer loop when we've changed something; otherwise we "break"
1747 the switch, which will "break" the while. */
1748 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1749 high = low;
1750 in_p = 0;
1751 strict_overflow_p = false;
1752 is_bool = false;
1753 if (exp == NULL_TREE)
1754 is_bool = true;
1755 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1756 {
1757 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1758 is_bool = true;
1759 else
1760 return;
1761 }
1762 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1763 is_bool = true;
1764
1765 while (1)
1766 {
1767 enum tree_code code;
1768 tree arg0, arg1, exp_type;
1769 tree nexp;
1770 location_t loc;
1771
1772 if (exp != NULL_TREE)
1773 {
1774 if (TREE_CODE (exp) != SSA_NAME)
1775 break;
1776
1777 stmt = SSA_NAME_DEF_STMT (exp);
1778 if (!is_gimple_assign (stmt))
1779 break;
1780
1781 code = gimple_assign_rhs_code (stmt);
1782 arg0 = gimple_assign_rhs1 (stmt);
1783 arg1 = gimple_assign_rhs2 (stmt);
1784 exp_type = TREE_TYPE (exp);
1785 }
1786 else
1787 {
1788 code = gimple_cond_code (stmt);
1789 arg0 = gimple_cond_lhs (stmt);
1790 arg1 = gimple_cond_rhs (stmt);
1791 exp_type = boolean_type_node;
1792 }
1793
1794 if (TREE_CODE (arg0) != SSA_NAME)
1795 break;
1796 loc = gimple_location (stmt);
1797 switch (code)
1798 {
1799 case BIT_NOT_EXPR:
1800 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1801 {
1802 in_p = !in_p;
1803 exp = arg0;
1804 continue;
1805 }
1806 break;
1807 case SSA_NAME:
1808 exp = arg0;
1809 continue;
1810 CASE_CONVERT:
1811 if (is_bool)
1812 goto do_default;
1813 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1814 {
1815 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1816 is_bool = true;
1817 else
1818 return;
1819 }
1820 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1821 is_bool = true;
1822 goto do_default;
1823 case EQ_EXPR:
1824 case NE_EXPR:
1825 case LT_EXPR:
1826 case LE_EXPR:
1827 case GE_EXPR:
1828 case GT_EXPR:
1829 is_bool = true;
1830 /* FALLTHRU */
1831 default:
1832 if (!is_bool)
1833 return;
1834 do_default:
1835 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1836 &low, &high, &in_p,
1837 &strict_overflow_p);
1838 if (nexp != NULL_TREE)
1839 {
1840 exp = nexp;
1841 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1842 continue;
1843 }
1844 break;
1845 }
1846 break;
1847 }
1848 if (is_bool)
1849 {
1850 r->exp = exp;
1851 r->in_p = in_p;
1852 r->low = low;
1853 r->high = high;
1854 r->strict_overflow_p = strict_overflow_p;
1855 }
1856 }
1857
1858 /* Comparison function for qsort. Sort entries
1859 without SSA_NAME exp first, then with SSA_NAMEs sorted
1860 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1861 by increasing ->low and if ->low is the same, by increasing
1862 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1863 maximum. */
1864
1865 static int
1866 range_entry_cmp (const void *a, const void *b)
1867 {
1868 const struct range_entry *p = (const struct range_entry *) a;
1869 const struct range_entry *q = (const struct range_entry *) b;
1870
1871 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1872 {
1873 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1874 {
1875 /* Group range_entries for the same SSA_NAME together. */
1876 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1877 return -1;
1878 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1879 return 1;
1880 /* If ->low is different, NULL low goes first, then by
1881 ascending low. */
1882 if (p->low != NULL_TREE)
1883 {
1884 if (q->low != NULL_TREE)
1885 {
1886 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1887 p->low, q->low);
1888 if (tem && integer_onep (tem))
1889 return -1;
1890 tem = fold_binary (GT_EXPR, boolean_type_node,
1891 p->low, q->low);
1892 if (tem && integer_onep (tem))
1893 return 1;
1894 }
1895 else
1896 return 1;
1897 }
1898 else if (q->low != NULL_TREE)
1899 return -1;
1900 /* If ->high is different, NULL high goes last, before that by
1901 ascending high. */
1902 if (p->high != NULL_TREE)
1903 {
1904 if (q->high != NULL_TREE)
1905 {
1906 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1907 p->high, q->high);
1908 if (tem && integer_onep (tem))
1909 return -1;
1910 tem = fold_binary (GT_EXPR, boolean_type_node,
1911 p->high, q->high);
1912 if (tem && integer_onep (tem))
1913 return 1;
1914 }
1915 else
1916 return -1;
1917 }
1918 else if (p->high != NULL_TREE)
1919 return 1;
1920 /* If both ranges are the same, sort below by ascending idx. */
1921 }
1922 else
1923 return 1;
1924 }
1925 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1926 return -1;
1927
1928 if (p->idx < q->idx)
1929 return -1;
1930 else
1931 {
1932 gcc_checking_assert (p->idx > q->idx);
1933 return 1;
1934 }
1935 }
1936
1937 /* Helper routine of optimize_range_test.
1938 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1939 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1940 OPCODE and OPS are arguments of optimize_range_tests. Return
1941 true if the range merge has been successful.
1942 If OPCODE is ERROR_MARK, this is called from within
1943 maybe_optimize_range_tests and is performing inter-bb range optimization.
1944 Changes should be then performed right away, and whether an op is
1945 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1946
1947 static bool
1948 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1949 unsigned int count, enum tree_code opcode,
1950 vec<operand_entry_t> *ops, tree exp, bool in_p,
1951 tree low, tree high, bool strict_overflow_p)
1952 {
1953 operand_entry_t oe = (*ops)[range->idx];
1954 tree op = oe->op;
1955 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1956 location_t loc = gimple_location (stmt);
1957 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1958 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1959 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1960 gimple_stmt_iterator gsi;
1961
1962 if (tem == NULL_TREE)
1963 return false;
1964
1965 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1966 warning_at (loc, OPT_Wstrict_overflow,
1967 "assuming signed overflow does not occur "
1968 "when simplifying range test");
1969
1970 if (dump_file && (dump_flags & TDF_DETAILS))
1971 {
1972 struct range_entry *r;
1973 fprintf (dump_file, "Optimizing range tests ");
1974 print_generic_expr (dump_file, range->exp, 0);
1975 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1976 print_generic_expr (dump_file, range->low, 0);
1977 fprintf (dump_file, ", ");
1978 print_generic_expr (dump_file, range->high, 0);
1979 fprintf (dump_file, "]");
1980 for (r = otherrange; r < otherrange + count; r++)
1981 {
1982 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1983 print_generic_expr (dump_file, r->low, 0);
1984 fprintf (dump_file, ", ");
1985 print_generic_expr (dump_file, r->high, 0);
1986 fprintf (dump_file, "]");
1987 }
1988 fprintf (dump_file, "\n into ");
1989 print_generic_expr (dump_file, tem, 0);
1990 fprintf (dump_file, "\n");
1991 }
1992
1993 if (opcode == BIT_IOR_EXPR
1994 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1995 tem = invert_truthvalue_loc (loc, tem);
1996
1997 tem = fold_convert_loc (loc, optype, tem);
1998 gsi = gsi_for_stmt (stmt);
1999 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2000 GSI_SAME_STMT);
2001
2002 /* If doing inter-bb range test optimization, update the
2003 stmts immediately. Start with changing the first range test
2004 immediate use to the new value (TEM), or, if the first range
2005 test is a GIMPLE_COND stmt, change that condition. */
2006 if (opcode == ERROR_MARK)
2007 {
2008 if (op)
2009 {
2010 imm_use_iterator iter;
2011 use_operand_p use_p;
2012 gimple use_stmt;
2013
2014 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2015 {
2016 if (is_gimple_debug (use_stmt))
2017 continue;
2018 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2019 SET_USE (use_p, tem);
2020 update_stmt (use_stmt);
2021 }
2022 }
2023 else
2024 {
2025 gimple_cond_set_code (stmt, NE_EXPR);
2026 gimple_cond_set_lhs (stmt, tem);
2027 gimple_cond_set_rhs (stmt, boolean_false_node);
2028 update_stmt (stmt);
2029 }
2030 }
2031 oe->op = tem;
2032 range->exp = exp;
2033 range->low = low;
2034 range->high = high;
2035 range->in_p = in_p;
2036 range->strict_overflow_p = false;
2037
2038 for (range = otherrange; range < otherrange + count; range++)
2039 {
2040 oe = (*ops)[range->idx];
2041 /* Now change all the other range test immediate uses, so that
2042 those tests will be optimized away. */
2043 if (opcode == ERROR_MARK)
2044 {
2045 if (oe->op)
2046 {
2047 imm_use_iterator iter;
2048 use_operand_p use_p;
2049 gimple use_stmt;
2050
2051 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2052 {
2053 if (is_gimple_debug (use_stmt))
2054 continue;
2055 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2056 adjust it into _7 = _9;. */
2057 if (is_gimple_assign (use_stmt)
2058 && gimple_assign_rhs_code (use_stmt) == oe->rank)
2059 {
2060 tree expr = NULL_TREE;
2061 if (oe->op == gimple_assign_rhs1 (use_stmt))
2062 expr = gimple_assign_rhs2 (use_stmt);
2063 else if (oe->op == gimple_assign_rhs2 (use_stmt))
2064 expr = gimple_assign_rhs1 (use_stmt);
2065 if (expr
2066 && expr != oe->op
2067 && TREE_CODE (expr) == SSA_NAME)
2068 {
2069 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2070 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2071 expr, NULL_TREE);
2072 update_stmt (use_stmt);
2073 continue;
2074 }
2075 }
2076 /* If imm use of _8 is a statement like _7 = (int) _8;,
2077 adjust it into _7 = 0; or _7 = 1;. */
2078 if (gimple_assign_cast_p (use_stmt)
2079 && oe->op == gimple_assign_rhs1 (use_stmt))
2080 {
2081 tree lhs = gimple_assign_lhs (use_stmt);
2082 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2083 {
2084 gimple_stmt_iterator gsi2
2085 = gsi_for_stmt (use_stmt);
2086 tree expr = build_int_cst (TREE_TYPE (lhs),
2087 oe->rank == BIT_IOR_EXPR
2088 ? 0 : 1);
2089 gimple_assign_set_rhs_with_ops (&gsi2,
2090 INTEGER_CST,
2091 expr, NULL_TREE);
2092 update_stmt (use_stmt);
2093 continue;
2094 }
2095 }
2096 /* Otherwise replace the use with 0 or 1. */
2097 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2098 SET_USE (use_p,
2099 build_int_cst (TREE_TYPE (oe->op),
2100 oe->rank == BIT_IOR_EXPR
2101 ? 0 : 1));
2102 update_stmt (use_stmt);
2103 }
2104 }
2105 else
2106 {
2107 /* If range test was a GIMPLE_COND, simply change it
2108 into an always false or always true condition. */
2109 stmt = last_stmt (BASIC_BLOCK (oe->id));
2110 if (oe->rank == BIT_IOR_EXPR)
2111 gimple_cond_make_false (stmt);
2112 else
2113 gimple_cond_make_true (stmt);
2114 update_stmt (stmt);
2115 }
2116 }
2117 oe->op = error_mark_node;
2118 range->exp = NULL_TREE;
2119 }
2120 return true;
2121 }
2122
2123 /* Optimize range tests, similarly how fold_range_test optimizes
2124 it on trees. The tree code for the binary
2125 operation between all the operands is OPCODE.
2126 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2127 maybe_optimize_range_tests for inter-bb range optimization.
2128 In that case if oe->op is NULL, oe->id is bb->index whose
2129 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2130 the actual opcode. */
2131
2132 static void
2133 optimize_range_tests (enum tree_code opcode,
2134 vec<operand_entry_t> *ops)
2135 {
2136 unsigned int length = ops->length (), i, j, first;
2137 operand_entry_t oe;
2138 struct range_entry *ranges;
2139 bool any_changes = false;
2140
2141 if (length == 1)
2142 return;
2143
2144 ranges = XNEWVEC (struct range_entry, length);
2145 for (i = 0; i < length; i++)
2146 {
2147 oe = (*ops)[i];
2148 ranges[i].idx = i;
2149 init_range_entry (ranges + i, oe->op,
2150 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2151 /* For | invert it now, we will invert it again before emitting
2152 the optimized expression. */
2153 if (opcode == BIT_IOR_EXPR
2154 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2155 ranges[i].in_p = !ranges[i].in_p;
2156 }
2157
2158 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2159 for (i = 0; i < length; i++)
2160 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2161 break;
2162
2163 /* Try to merge ranges. */
2164 for (first = i; i < length; i++)
2165 {
2166 tree low = ranges[i].low;
2167 tree high = ranges[i].high;
2168 int in_p = ranges[i].in_p;
2169 bool strict_overflow_p = ranges[i].strict_overflow_p;
2170 int update_fail_count = 0;
2171
2172 for (j = i + 1; j < length; j++)
2173 {
2174 if (ranges[i].exp != ranges[j].exp)
2175 break;
2176 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2177 ranges[j].in_p, ranges[j].low, ranges[j].high))
2178 break;
2179 strict_overflow_p |= ranges[j].strict_overflow_p;
2180 }
2181
2182 if (j == i + 1)
2183 continue;
2184
2185 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2186 ops, ranges[i].exp, in_p, low, high,
2187 strict_overflow_p))
2188 {
2189 i = j - 1;
2190 any_changes = true;
2191 }
2192 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2193 while update_range_test would fail. */
2194 else if (update_fail_count == 64)
2195 i = j - 1;
2196 else
2197 ++update_fail_count;
2198 }
2199
2200 /* Optimize X == CST1 || X == CST2
2201 if popcount (CST1 ^ CST2) == 1 into
2202 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2203 Similarly for ranges. E.g.
2204 X != 2 && X != 3 && X != 10 && X != 11
2205 will be transformed by the above loop into
2206 (X - 2U) <= 1U && (X - 10U) <= 1U
2207 and this loop can transform that into
2208 ((X & ~8) - 2U) <= 1U. */
2209 for (i = first; i < length; i++)
2210 {
2211 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2212
2213 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2214 continue;
2215 type = TREE_TYPE (ranges[i].exp);
2216 if (!INTEGRAL_TYPE_P (type))
2217 continue;
2218 lowi = ranges[i].low;
2219 if (lowi == NULL_TREE)
2220 lowi = TYPE_MIN_VALUE (type);
2221 highi = ranges[i].high;
2222 if (highi == NULL_TREE)
2223 continue;
2224 for (j = i + 1; j < length && j < i + 64; j++)
2225 {
2226 if (ranges[j].exp == NULL_TREE)
2227 continue;
2228 if (ranges[i].exp != ranges[j].exp)
2229 break;
2230 if (ranges[j].in_p)
2231 continue;
2232 lowj = ranges[j].low;
2233 if (lowj == NULL_TREE)
2234 continue;
2235 highj = ranges[j].high;
2236 if (highj == NULL_TREE)
2237 highj = TYPE_MAX_VALUE (type);
2238 tem = fold_binary (GT_EXPR, boolean_type_node,
2239 lowj, highi);
2240 if (tem == NULL_TREE || !integer_onep (tem))
2241 continue;
2242 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2243 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2244 continue;
2245 gcc_checking_assert (!integer_zerop (lowxor));
2246 tem = fold_binary (MINUS_EXPR, type, lowxor,
2247 build_int_cst (type, 1));
2248 if (tem == NULL_TREE)
2249 continue;
2250 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2251 if (tem == NULL_TREE || !integer_zerop (tem))
2252 continue;
2253 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2254 if (!tree_int_cst_equal (lowxor, highxor))
2255 continue;
2256 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2257 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2258 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2259 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2260 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2261 ranges[i].in_p, lowj, highj,
2262 ranges[i].strict_overflow_p
2263 || ranges[j].strict_overflow_p))
2264 {
2265 any_changes = true;
2266 break;
2267 }
2268 }
2269 }
2270
2271 if (any_changes && opcode != ERROR_MARK)
2272 {
2273 j = 0;
2274 FOR_EACH_VEC_ELT (*ops, i, oe)
2275 {
2276 if (oe->op == error_mark_node)
2277 continue;
2278 else if (i != j)
2279 (*ops)[j] = oe;
2280 j++;
2281 }
2282 ops->truncate (j);
2283 }
2284
2285 XDELETEVEC (ranges);
2286 }
2287
2288 /* Return true if STMT is a cast like:
2289 <bb N>:
2290 ...
2291 _123 = (int) _234;
2292
2293 <bb M>:
2294 # _345 = PHI <_123(N), 1(...), 1(...)>
2295 where _234 has bool type, _123 has single use and
2296 bb N has a single successor M. This is commonly used in
2297 the last block of a range test. */
2298
2299 static bool
2300 final_range_test_p (gimple stmt)
2301 {
2302 basic_block bb, rhs_bb;
2303 edge e;
2304 tree lhs, rhs;
2305 use_operand_p use_p;
2306 gimple use_stmt;
2307
2308 if (!gimple_assign_cast_p (stmt))
2309 return false;
2310 bb = gimple_bb (stmt);
2311 if (!single_succ_p (bb))
2312 return false;
2313 e = single_succ_edge (bb);
2314 if (e->flags & EDGE_COMPLEX)
2315 return false;
2316
2317 lhs = gimple_assign_lhs (stmt);
2318 rhs = gimple_assign_rhs1 (stmt);
2319 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2320 || TREE_CODE (rhs) != SSA_NAME
2321 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2322 return false;
2323
2324 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2325 if (!single_imm_use (lhs, &use_p, &use_stmt))
2326 return false;
2327
2328 if (gimple_code (use_stmt) != GIMPLE_PHI
2329 || gimple_bb (use_stmt) != e->dest)
2330 return false;
2331
2332 /* And that the rhs is defined in the same loop. */
2333 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2334 if (rhs_bb == NULL
2335 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2336 return false;
2337
2338 return true;
2339 }
2340
2341 /* Return true if BB is suitable basic block for inter-bb range test
2342 optimization. If BACKWARD is true, BB should be the only predecessor
2343 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2344 or compared with to find a common basic block to which all conditions
2345 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2346 be the only predecessor of BB. */
2347
2348 static bool
2349 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2350 bool backward)
2351 {
2352 edge_iterator ei, ei2;
2353 edge e, e2;
2354 gimple stmt;
2355 gimple_stmt_iterator gsi;
2356 bool other_edge_seen = false;
2357 bool is_cond;
2358
2359 if (test_bb == bb)
2360 return false;
2361 /* Check last stmt first. */
2362 stmt = last_stmt (bb);
2363 if (stmt == NULL
2364 || (gimple_code (stmt) != GIMPLE_COND
2365 && (backward || !final_range_test_p (stmt)))
2366 || gimple_visited_p (stmt)
2367 || stmt_could_throw_p (stmt)
2368 || *other_bb == bb)
2369 return false;
2370 is_cond = gimple_code (stmt) == GIMPLE_COND;
2371 if (is_cond)
2372 {
2373 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2374 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2375 to *OTHER_BB (if not set yet, try to find it out). */
2376 if (EDGE_COUNT (bb->succs) != 2)
2377 return false;
2378 FOR_EACH_EDGE (e, ei, bb->succs)
2379 {
2380 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2381 return false;
2382 if (e->dest == test_bb)
2383 {
2384 if (backward)
2385 continue;
2386 else
2387 return false;
2388 }
2389 if (e->dest == bb)
2390 return false;
2391 if (*other_bb == NULL)
2392 {
2393 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2394 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2395 return false;
2396 else if (e->dest == e2->dest)
2397 *other_bb = e->dest;
2398 if (*other_bb == NULL)
2399 return false;
2400 }
2401 if (e->dest == *other_bb)
2402 other_edge_seen = true;
2403 else if (backward)
2404 return false;
2405 }
2406 if (*other_bb == NULL || !other_edge_seen)
2407 return false;
2408 }
2409 else if (single_succ (bb) != *other_bb)
2410 return false;
2411
2412 /* Now check all PHIs of *OTHER_BB. */
2413 e = find_edge (bb, *other_bb);
2414 e2 = find_edge (test_bb, *other_bb);
2415 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2416 {
2417 gimple phi = gsi_stmt (gsi);
2418 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2419 corresponding to BB and TEST_BB predecessor must be the same. */
2420 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2421 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2422 {
2423 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2424 one of the PHIs should have the lhs of the last stmt in
2425 that block as PHI arg and that PHI should have 0 or 1
2426 corresponding to it in all other range test basic blocks
2427 considered. */
2428 if (!is_cond)
2429 {
2430 if (gimple_phi_arg_def (phi, e->dest_idx)
2431 == gimple_assign_lhs (stmt)
2432 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2433 || integer_onep (gimple_phi_arg_def (phi,
2434 e2->dest_idx))))
2435 continue;
2436 }
2437 else
2438 {
2439 gimple test_last = last_stmt (test_bb);
2440 if (gimple_code (test_last) != GIMPLE_COND
2441 && gimple_phi_arg_def (phi, e2->dest_idx)
2442 == gimple_assign_lhs (test_last)
2443 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2444 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2445 continue;
2446 }
2447
2448 return false;
2449 }
2450 }
2451 return true;
2452 }
2453
2454 /* Return true if BB doesn't have side-effects that would disallow
2455 range test optimization, all SSA_NAMEs set in the bb are consumed
2456 in the bb and there are no PHIs. */
2457
2458 static bool
2459 no_side_effect_bb (basic_block bb)
2460 {
2461 gimple_stmt_iterator gsi;
2462 gimple last;
2463
2464 if (!gimple_seq_empty_p (phi_nodes (bb)))
2465 return false;
2466 last = last_stmt (bb);
2467 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2468 {
2469 gimple stmt = gsi_stmt (gsi);
2470 tree lhs;
2471 imm_use_iterator imm_iter;
2472 use_operand_p use_p;
2473
2474 if (is_gimple_debug (stmt))
2475 continue;
2476 if (gimple_has_side_effects (stmt))
2477 return false;
2478 if (stmt == last)
2479 return true;
2480 if (!is_gimple_assign (stmt))
2481 return false;
2482 lhs = gimple_assign_lhs (stmt);
2483 if (TREE_CODE (lhs) != SSA_NAME)
2484 return false;
2485 if (gimple_assign_rhs_could_trap_p (stmt))
2486 return false;
2487 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2488 {
2489 gimple use_stmt = USE_STMT (use_p);
2490 if (is_gimple_debug (use_stmt))
2491 continue;
2492 if (gimple_bb (use_stmt) != bb)
2493 return false;
2494 }
2495 }
2496 return false;
2497 }
2498
2499 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2500 return true and fill in *OPS recursively. */
2501
2502 static bool
2503 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2504 struct loop *loop)
2505 {
2506 gimple stmt = SSA_NAME_DEF_STMT (var);
2507 tree rhs[2];
2508 int i;
2509
2510 if (!is_reassociable_op (stmt, code, loop))
2511 return false;
2512
2513 rhs[0] = gimple_assign_rhs1 (stmt);
2514 rhs[1] = gimple_assign_rhs2 (stmt);
2515 gimple_set_visited (stmt, true);
2516 for (i = 0; i < 2; i++)
2517 if (TREE_CODE (rhs[i]) == SSA_NAME
2518 && !get_ops (rhs[i], code, ops, loop)
2519 && has_single_use (rhs[i]))
2520 {
2521 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2522
2523 oe->op = rhs[i];
2524 oe->rank = code;
2525 oe->id = 0;
2526 oe->count = 1;
2527 ops->safe_push (oe);
2528 }
2529 return true;
2530 }
2531
2532 /* Inter-bb range test optimization. */
2533
2534 static void
2535 maybe_optimize_range_tests (gimple stmt)
2536 {
2537 basic_block first_bb = gimple_bb (stmt);
2538 basic_block last_bb = first_bb;
2539 basic_block other_bb = NULL;
2540 basic_block bb;
2541 edge_iterator ei;
2542 edge e;
2543 vec<operand_entry_t> ops = vNULL;
2544
2545 /* Consider only basic blocks that end with GIMPLE_COND or
2546 a cast statement satisfying final_range_test_p. All
2547 but the last bb in the first_bb .. last_bb range
2548 should end with GIMPLE_COND. */
2549 if (gimple_code (stmt) == GIMPLE_COND)
2550 {
2551 if (EDGE_COUNT (first_bb->succs) != 2)
2552 return;
2553 }
2554 else if (final_range_test_p (stmt))
2555 other_bb = single_succ (first_bb);
2556 else
2557 return;
2558
2559 if (stmt_could_throw_p (stmt))
2560 return;
2561
2562 /* As relative ordering of post-dominator sons isn't fixed,
2563 maybe_optimize_range_tests can be called first on any
2564 bb in the range we want to optimize. So, start searching
2565 backwards, if first_bb can be set to a predecessor. */
2566 while (single_pred_p (first_bb))
2567 {
2568 basic_block pred_bb = single_pred (first_bb);
2569 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2570 break;
2571 if (!no_side_effect_bb (first_bb))
2572 break;
2573 first_bb = pred_bb;
2574 }
2575 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2576 Before starting forward search in last_bb successors, find
2577 out the other_bb. */
2578 if (first_bb == last_bb)
2579 {
2580 other_bb = NULL;
2581 /* As non-GIMPLE_COND last stmt always terminates the range,
2582 if forward search didn't discover anything, just give up. */
2583 if (gimple_code (stmt) != GIMPLE_COND)
2584 return;
2585 /* Look at both successors. Either it ends with a GIMPLE_COND
2586 and satisfies suitable_cond_bb, or ends with a cast and
2587 other_bb is that cast's successor. */
2588 FOR_EACH_EDGE (e, ei, first_bb->succs)
2589 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2590 || e->dest == first_bb)
2591 return;
2592 else if (single_pred_p (e->dest))
2593 {
2594 stmt = last_stmt (e->dest);
2595 if (stmt
2596 && gimple_code (stmt) == GIMPLE_COND
2597 && EDGE_COUNT (e->dest->succs) == 2)
2598 {
2599 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2600 break;
2601 else
2602 other_bb = NULL;
2603 }
2604 else if (stmt
2605 && final_range_test_p (stmt)
2606 && find_edge (first_bb, single_succ (e->dest)))
2607 {
2608 other_bb = single_succ (e->dest);
2609 if (other_bb == first_bb)
2610 other_bb = NULL;
2611 }
2612 }
2613 if (other_bb == NULL)
2614 return;
2615 }
2616 /* Now do the forward search, moving last_bb to successor bbs
2617 that aren't other_bb. */
2618 while (EDGE_COUNT (last_bb->succs) == 2)
2619 {
2620 FOR_EACH_EDGE (e, ei, last_bb->succs)
2621 if (e->dest != other_bb)
2622 break;
2623 if (e == NULL)
2624 break;
2625 if (!single_pred_p (e->dest))
2626 break;
2627 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2628 break;
2629 if (!no_side_effect_bb (e->dest))
2630 break;
2631 last_bb = e->dest;
2632 }
2633 if (first_bb == last_bb)
2634 return;
2635 /* Here basic blocks first_bb through last_bb's predecessor
2636 end with GIMPLE_COND, all of them have one of the edges to
2637 other_bb and another to another block in the range,
2638 all blocks except first_bb don't have side-effects and
2639 last_bb ends with either GIMPLE_COND, or cast satisfying
2640 final_range_test_p. */
2641 for (bb = last_bb; ; bb = single_pred (bb))
2642 {
2643 enum tree_code code;
2644 tree lhs, rhs;
2645
2646 e = find_edge (bb, other_bb);
2647 stmt = last_stmt (bb);
2648 gimple_set_visited (stmt, true);
2649 if (gimple_code (stmt) != GIMPLE_COND)
2650 {
2651 use_operand_p use_p;
2652 gimple phi;
2653 edge e2;
2654 unsigned int d;
2655
2656 lhs = gimple_assign_lhs (stmt);
2657 rhs = gimple_assign_rhs1 (stmt);
2658 gcc_assert (bb == last_bb);
2659
2660 /* stmt is
2661 _123 = (int) _234;
2662
2663 followed by:
2664 <bb M>:
2665 # _345 = PHI <_123(N), 1(...), 1(...)>
2666
2667 or 0 instead of 1. If it is 0, the _234
2668 range test is anded together with all the
2669 other range tests, if it is 1, it is ored with
2670 them. */
2671 single_imm_use (lhs, &use_p, &phi);
2672 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2673 e2 = find_edge (first_bb, other_bb);
2674 d = e2->dest_idx;
2675 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2676 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2677 code = BIT_AND_EXPR;
2678 else
2679 {
2680 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2681 code = BIT_IOR_EXPR;
2682 }
2683
2684 /* If _234 SSA_NAME_DEF_STMT is
2685 _234 = _567 | _789;
2686 (or &, corresponding to 1/0 in the phi arguments,
2687 push into ops the individual range test arguments
2688 of the bitwise or resp. and, recursively. */
2689 if (!get_ops (rhs, code, &ops,
2690 loop_containing_stmt (stmt))
2691 && has_single_use (rhs))
2692 {
2693 /* Otherwise, push the _234 range test itself. */
2694 operand_entry_t oe
2695 = (operand_entry_t) pool_alloc (operand_entry_pool);
2696
2697 oe->op = rhs;
2698 oe->rank = code;
2699 oe->id = 0;
2700 oe->count = 1;
2701 ops.safe_push (oe);
2702 }
2703 continue;
2704 }
2705 /* Otherwise stmt is GIMPLE_COND. */
2706 code = gimple_cond_code (stmt);
2707 lhs = gimple_cond_lhs (stmt);
2708 rhs = gimple_cond_rhs (stmt);
2709 if (TREE_CODE (lhs) == SSA_NAME
2710 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2711 && ((code != EQ_EXPR && code != NE_EXPR)
2712 || rhs != boolean_false_node
2713 /* Either push into ops the individual bitwise
2714 or resp. and operands, depending on which
2715 edge is other_bb. */
2716 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2717 ^ (code == EQ_EXPR))
2718 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2719 loop_containing_stmt (stmt))))
2720 {
2721 /* Or push the GIMPLE_COND stmt itself. */
2722 operand_entry_t oe
2723 = (operand_entry_t) pool_alloc (operand_entry_pool);
2724
2725 oe->op = NULL;
2726 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2727 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2728 /* oe->op = NULL signs that there is no SSA_NAME
2729 for the range test, and oe->id instead is the
2730 basic block number, at which's end the GIMPLE_COND
2731 is. */
2732 oe->id = bb->index;
2733 oe->count = 1;
2734 ops.safe_push (oe);
2735 }
2736 if (bb == first_bb)
2737 break;
2738 }
2739 if (ops.length () > 1)
2740 optimize_range_tests (ERROR_MARK, &ops);
2741 ops.release ();
2742 }
2743
2744 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2745 of STMT in it's operands. This is also known as a "destructive
2746 update" operation. */
2747
2748 static bool
2749 is_phi_for_stmt (gimple stmt, tree operand)
2750 {
2751 gimple def_stmt;
2752 tree lhs;
2753 use_operand_p arg_p;
2754 ssa_op_iter i;
2755
2756 if (TREE_CODE (operand) != SSA_NAME)
2757 return false;
2758
2759 lhs = gimple_assign_lhs (stmt);
2760
2761 def_stmt = SSA_NAME_DEF_STMT (operand);
2762 if (gimple_code (def_stmt) != GIMPLE_PHI)
2763 return false;
2764
2765 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2766 if (lhs == USE_FROM_PTR (arg_p))
2767 return true;
2768 return false;
2769 }
2770
2771 /* Remove def stmt of VAR if VAR has zero uses and recurse
2772 on rhs1 operand if so. */
2773
2774 static void
2775 remove_visited_stmt_chain (tree var)
2776 {
2777 gimple stmt;
2778 gimple_stmt_iterator gsi;
2779
2780 while (1)
2781 {
2782 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2783 return;
2784 stmt = SSA_NAME_DEF_STMT (var);
2785 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2786 {
2787 var = gimple_assign_rhs1 (stmt);
2788 gsi = gsi_for_stmt (stmt);
2789 gsi_remove (&gsi, true);
2790 release_defs (stmt);
2791 }
2792 else
2793 return;
2794 }
2795 }
2796
2797 /* This function checks three consequtive operands in
2798 passed operands vector OPS starting from OPINDEX and
2799 swaps two operands if it is profitable for binary operation
2800 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2801
2802 We pair ops with the same rank if possible.
2803
2804 The alternative we try is to see if STMT is a destructive
2805 update style statement, which is like:
2806 b = phi (a, ...)
2807 a = c + b;
2808 In that case, we want to use the destructive update form to
2809 expose the possible vectorizer sum reduction opportunity.
2810 In that case, the third operand will be the phi node. This
2811 check is not performed if STMT is null.
2812
2813 We could, of course, try to be better as noted above, and do a
2814 lot of work to try to find these opportunities in >3 operand
2815 cases, but it is unlikely to be worth it. */
2816
2817 static void
2818 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2819 unsigned int opindex, gimple stmt)
2820 {
2821 operand_entry_t oe1, oe2, oe3;
2822
2823 oe1 = ops[opindex];
2824 oe2 = ops[opindex + 1];
2825 oe3 = ops[opindex + 2];
2826
2827 if ((oe1->rank == oe2->rank
2828 && oe2->rank != oe3->rank)
2829 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2830 && !is_phi_for_stmt (stmt, oe1->op)
2831 && !is_phi_for_stmt (stmt, oe2->op)))
2832 {
2833 struct operand_entry temp = *oe3;
2834 oe3->op = oe1->op;
2835 oe3->rank = oe1->rank;
2836 oe1->op = temp.op;
2837 oe1->rank= temp.rank;
2838 }
2839 else if ((oe1->rank == oe3->rank
2840 && oe2->rank != oe3->rank)
2841 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2842 && !is_phi_for_stmt (stmt, oe1->op)
2843 && !is_phi_for_stmt (stmt, oe3->op)))
2844 {
2845 struct operand_entry temp = *oe2;
2846 oe2->op = oe1->op;
2847 oe2->rank = oe1->rank;
2848 oe1->op = temp.op;
2849 oe1->rank= temp.rank;
2850 }
2851 }
2852
2853 /* Determine if stmt A is not dominated by stmt B. If A and B are in
2854 same basic block, then A's UID has to be less than B. If they are
2855 in different BB's, then A's BB must not be dominated by B's BB. */
2856
2857 static inline bool
2858 not_dominated_by (gimple a, gimple b)
2859 {
2860 basic_block bb_a, bb_b;
2861 bb_a = gimple_bb (a);
2862 bb_b = gimple_bb (b);
2863 return ((bb_a == bb_b && gimple_uid (a) < gimple_uid (b))
2864 || (bb_a != bb_b
2865 && !dominated_by_p (CDI_DOMINATORS, bb_a, bb_b)));
2866
2867 }
2868
2869 /* Among STMT1 and STMT2, return the statement that appears later. Both
2870 statements are in same BB and have the same UID. */
2871
2872 static gimple
2873 appears_later_in_bb (gimple stmt1, gimple stmt2)
2874 {
2875 unsigned uid = gimple_uid (stmt1);
2876 gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
2877 gsi_next (&gsi);
2878 if (gsi_end_p (gsi))
2879 return stmt1;
2880 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2881 {
2882 gimple stmt = gsi_stmt (gsi);
2883
2884 /* If STMT has a different UID than STMT1 and we haven't seen
2885 STMT2 during traversal, we know STMT1 appears later. */
2886 if (gimple_uid (stmt) != uid)
2887 return stmt1;
2888 else if (stmt == stmt2)
2889 return stmt2;
2890 }
2891 return stmt1;
2892 }
2893
2894 /* Find the statement after which STMT must be moved so that the
2895 dependency from DEP_STMT to STMT is maintained. */
2896
2897 static gimple
2898 find_insert_point (gimple stmt, gimple dep_stmt)
2899 {
2900 gimple insert_stmt = stmt;
2901 if (dep_stmt == NULL)
2902 return stmt;
2903 if (gimple_uid (insert_stmt) == gimple_uid (dep_stmt)
2904 && gimple_bb (insert_stmt) == gimple_bb (dep_stmt)
2905 && insert_stmt != dep_stmt)
2906 insert_stmt = appears_later_in_bb (insert_stmt, dep_stmt);
2907 else if (not_dominated_by (insert_stmt, dep_stmt))
2908 insert_stmt = dep_stmt;
2909 return insert_stmt;
2910 }
2911
2912 /* Insert STMT after INSERT_POINT. */
2913
2914 static void
2915 insert_stmt_after (gimple stmt, gimple insert_point)
2916 {
2917 imm_use_iterator iter;
2918 tree lhs;
2919 gimple use_stmt;
2920 gimple_stmt_iterator gsistmt = gsi_for_stmt (stmt), gsi_insert;
2921 basic_block insert_bb = gimple_bb (insert_point);
2922 bool insert_bb_different = (insert_bb != gimple_bb (stmt));
2923 lhs = gimple_assign_lhs (stmt);
2924 /* If there are any debug uses of LHS, reset them. */
2925 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2926 {
2927 if (is_gimple_debug (use_stmt)
2928 && not_dominated_by (use_stmt, insert_point))
2929 {
2930 gimple_debug_bind_reset_value (use_stmt);
2931 update_stmt (use_stmt);
2932 }
2933 }
2934 /* If INSERT_STMT is a phi node, then do not insert just after that statement.
2935 Instead, find the first non-label gimple statement in BB and insert before
2936 that. */
2937 if (gimple_code (insert_point) == GIMPLE_PHI)
2938 {
2939 gsi_insert = gsi_after_labels (insert_bb);
2940 gsi_move_before (&gsistmt, &gsi_insert);
2941 }
2942 /* Statements marked for throw can not be in the middle of a basic block. So
2943 we can not insert a statement (not marked for throw) immediately after. */
2944 else if (stmt_ends_bb_p (insert_point))
2945 {
2946 edge succ_edge = find_fallthru_edge (insert_bb->succs);
2947 insert_bb = succ_edge->dest;
2948 insert_bb_different = (insert_bb != gimple_bb (stmt));
2949 /* Insert STMT at the beginning of the successor basic block. */
2950 gsi_insert = gsi_after_labels (insert_bb);
2951 gsi_move_before (&gsistmt, &gsi_insert);
2952 }
2953 else
2954 {
2955 gsi_insert = gsi_for_stmt (insert_point);
2956 gsi_move_after (&gsistmt, &gsi_insert);
2957 }
2958 /* Set the UID of STMT to that of INSERT_POINT so that subsequent comparisons
2959 of UIDs to determine dominance within a basic block works. */
2960 gimple_set_uid (stmt, gimple_uid (insert_point));
2961 if (dump_file && (dump_flags & TDF_DETAILS))
2962 {
2963 fprintf (dump_file, "Moved stmt ");
2964 print_gimple_stmt (dump_file, stmt, 0, 0);
2965 fprintf (dump_file, " %s to satisfy dependences\n",
2966 insert_bb_different ? "to a different BB" : "within same BB");
2967 }
2968
2969 }
2970
2971 /* If OP is a SSA variable and is not the default definition, return the
2972 gimple statement that defines OP. Else return NULL. */
2973
2974 static inline gimple
2975 get_def_stmt (tree op)
2976 {
2977 if (TREE_CODE (op) == SSA_NAME
2978 && !SSA_NAME_IS_DEFAULT_DEF (op))
2979 return SSA_NAME_DEF_STMT (op);
2980 else
2981 return NULL;
2982 }
2983
2984 /* Ensure that operands in the OPS vector are available for STMT and all
2985 gimple statements on which STMT depends. */
2986
2987 static void
2988 ensure_ops_are_available (gimple stmt, vec<operand_entry_t> ops, int opindex)
2989 {
2990 unsigned int len = ops.length ();
2991 gimple insert_stmt = stmt;
2992 gimple dep_stmts[2];
2993 dep_stmts[0] = get_def_stmt (ops[opindex]->op);
2994 if (len - opindex == 2)
2995 {
2996 dep_stmts[1] = get_def_stmt (ops[opindex + 1]->op);
2997 }
2998 else
2999 {
3000 gimple stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3001 ensure_ops_are_available (stmt1, ops, opindex + 1);
3002 dep_stmts[1] = stmt1;
3003 }
3004 for (int i = 0; i < 2; i++)
3005 insert_stmt = find_insert_point (insert_stmt, dep_stmts[i]);
3006
3007 if (insert_stmt != stmt)
3008 insert_stmt_after (stmt, insert_stmt);
3009 }
3010
3011 /* Recursively rewrite our linearized statements so that the operators
3012 match those in OPS[OPINDEX], putting the computation in rank
3013 order. */
3014
3015 static void
3016 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3017 vec<operand_entry_t> ops, bool moved)
3018 {
3019 tree rhs1 = gimple_assign_rhs1 (stmt);
3020 tree rhs2 = gimple_assign_rhs2 (stmt);
3021 operand_entry_t oe;
3022
3023 /* The final recursion case for this function is that you have
3024 exactly two operations left.
3025 If we had one exactly one op in the entire list to start with, we
3026 would have never called this function, and the tail recursion
3027 rewrites them one at a time. */
3028 if (opindex + 2 == ops.length ())
3029 {
3030 operand_entry_t oe1, oe2;
3031
3032 oe1 = ops[opindex];
3033 oe2 = ops[opindex + 1];
3034
3035 if (rhs1 != oe1->op || rhs2 != oe2->op)
3036 {
3037 if (dump_file && (dump_flags & TDF_DETAILS))
3038 {
3039 fprintf (dump_file, "Transforming ");
3040 print_gimple_stmt (dump_file, stmt, 0, 0);
3041 }
3042
3043 gimple_assign_set_rhs1 (stmt, oe1->op);
3044 gimple_assign_set_rhs2 (stmt, oe2->op);
3045 update_stmt (stmt);
3046 if (rhs1 != oe1->op && rhs1 != oe2->op)
3047 remove_visited_stmt_chain (rhs1);
3048
3049 if (dump_file && (dump_flags & TDF_DETAILS))
3050 {
3051 fprintf (dump_file, " into ");
3052 print_gimple_stmt (dump_file, stmt, 0, 0);
3053 }
3054 }
3055 return;
3056 }
3057
3058 /* If we hit here, we should have 3 or more ops left. */
3059 gcc_assert (opindex + 2 < ops.length ());
3060
3061 /* Rewrite the next operator. */
3062 oe = ops[opindex];
3063
3064 if (oe->op != rhs2)
3065 {
3066 if (!moved)
3067 {
3068 ensure_ops_are_available (stmt, ops, opindex);
3069 moved = true;
3070 }
3071
3072 if (dump_file && (dump_flags & TDF_DETAILS))
3073 {
3074 fprintf (dump_file, "Transforming ");
3075 print_gimple_stmt (dump_file, stmt, 0, 0);
3076 }
3077
3078 gimple_assign_set_rhs2 (stmt, oe->op);
3079 update_stmt (stmt);
3080
3081 if (dump_file && (dump_flags & TDF_DETAILS))
3082 {
3083 fprintf (dump_file, " into ");
3084 print_gimple_stmt (dump_file, stmt, 0, 0);
3085 }
3086 }
3087 /* Recurse on the LHS of the binary operator, which is guaranteed to
3088 be the non-leaf side. */
3089 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
3090 }
3091
3092 /* Find out how many cycles we need to compute statements chain.
3093 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3094 maximum number of independent statements we may execute per cycle. */
3095
3096 static int
3097 get_required_cycles (int ops_num, int cpu_width)
3098 {
3099 int res;
3100 int elog;
3101 unsigned int rest;
3102
3103 /* While we have more than 2 * cpu_width operands
3104 we may reduce number of operands by cpu_width
3105 per cycle. */
3106 res = ops_num / (2 * cpu_width);
3107
3108 /* Remained operands count may be reduced twice per cycle
3109 until we have only one operand. */
3110 rest = (unsigned)(ops_num - res * cpu_width);
3111 elog = exact_log2 (rest);
3112 if (elog >= 0)
3113 res += elog;
3114 else
3115 res += floor_log2 (rest) + 1;
3116
3117 return res;
3118 }
3119
3120 /* Returns an optimal number of registers to use for computation of
3121 given statements. */
3122
3123 static int
3124 get_reassociation_width (int ops_num, enum tree_code opc,
3125 enum machine_mode mode)
3126 {
3127 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3128 int width;
3129 int width_min;
3130 int cycles_best;
3131
3132 if (param_width > 0)
3133 width = param_width;
3134 else
3135 width = targetm.sched.reassociation_width (opc, mode);
3136
3137 if (width == 1)
3138 return width;
3139
3140 /* Get the minimal time required for sequence computation. */
3141 cycles_best = get_required_cycles (ops_num, width);
3142
3143 /* Check if we may use less width and still compute sequence for
3144 the same time. It will allow us to reduce registers usage.
3145 get_required_cycles is monotonically increasing with lower width
3146 so we can perform a binary search for the minimal width that still
3147 results in the optimal cycle count. */
3148 width_min = 1;
3149 while (width > width_min)
3150 {
3151 int width_mid = (width + width_min) / 2;
3152
3153 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3154 width = width_mid;
3155 else if (width_min < width_mid)
3156 width_min = width_mid;
3157 else
3158 break;
3159 }
3160
3161 return width;
3162 }
3163
3164 /* Recursively rewrite our linearized statements so that the operators
3165 match those in OPS[OPINDEX], putting the computation in rank
3166 order and trying to allow operations to be executed in
3167 parallel. */
3168
3169 static void
3170 rewrite_expr_tree_parallel (gimple stmt, int width,
3171 vec<operand_entry_t> ops)
3172 {
3173 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3174 int op_num = ops.length ();
3175 int stmt_num = op_num - 1;
3176 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3177 int op_index = op_num - 1;
3178 int stmt_index = 0;
3179 int ready_stmts_end = 0;
3180 int i = 0;
3181 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3182
3183 /* We start expression rewriting from the top statements.
3184 So, in this loop we create a full list of statements
3185 we will work with. */
3186 stmts[stmt_num - 1] = stmt;
3187 for (i = stmt_num - 2; i >= 0; i--)
3188 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3189
3190 for (i = 0; i < stmt_num; i++)
3191 {
3192 tree op1, op2;
3193
3194 /* Determine whether we should use results of
3195 already handled statements or not. */
3196 if (ready_stmts_end == 0
3197 && (i - stmt_index >= width || op_index < 1))
3198 ready_stmts_end = i;
3199
3200 /* Now we choose operands for the next statement. Non zero
3201 value in ready_stmts_end means here that we should use
3202 the result of already generated statements as new operand. */
3203 if (ready_stmts_end > 0)
3204 {
3205 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3206 if (ready_stmts_end > stmt_index)
3207 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3208 else if (op_index >= 0)
3209 op2 = ops[op_index--]->op;
3210 else
3211 {
3212 gcc_assert (stmt_index < i);
3213 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3214 }
3215
3216 if (stmt_index >= ready_stmts_end)
3217 ready_stmts_end = 0;
3218 }
3219 else
3220 {
3221 if (op_index > 1)
3222 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3223 op2 = ops[op_index--]->op;
3224 op1 = ops[op_index--]->op;
3225 }
3226
3227 /* If we emit the last statement then we should put
3228 operands into the last statement. It will also
3229 break the loop. */
3230 if (op_index < 0 && stmt_index == i)
3231 i = stmt_num - 1;
3232
3233 if (dump_file && (dump_flags & TDF_DETAILS))
3234 {
3235 fprintf (dump_file, "Transforming ");
3236 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3237 }
3238
3239 /* We keep original statement only for the last one. All
3240 others are recreated. */
3241 if (i == stmt_num - 1)
3242 {
3243 gimple_assign_set_rhs1 (stmts[i], op1);
3244 gimple_assign_set_rhs2 (stmts[i], op2);
3245 update_stmt (stmts[i]);
3246 }
3247 else
3248 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3249
3250 if (dump_file && (dump_flags & TDF_DETAILS))
3251 {
3252 fprintf (dump_file, " into ");
3253 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3254 }
3255 }
3256
3257 remove_visited_stmt_chain (last_rhs1);
3258 }
3259
3260 /* Transform STMT, which is really (A +B) + (C + D) into the left
3261 linear form, ((A+B)+C)+D.
3262 Recurse on D if necessary. */
3263
3264 static void
3265 linearize_expr (gimple stmt)
3266 {
3267 gimple_stmt_iterator gsinow, gsirhs;
3268 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3269 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3270 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3271 gimple newbinrhs = NULL;
3272 struct loop *loop = loop_containing_stmt (stmt);
3273
3274 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3275 && is_reassociable_op (binrhs, rhscode, loop));
3276
3277 gsinow = gsi_for_stmt (stmt);
3278 gsirhs = gsi_for_stmt (binrhs);
3279 gsi_move_before (&gsirhs, &gsinow);
3280 gimple_set_uid (binrhs, gimple_uid (stmt));
3281
3282 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3283 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3284 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3285
3286 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3287 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3288
3289 if (dump_file && (dump_flags & TDF_DETAILS))
3290 {
3291 fprintf (dump_file, "Linearized: ");
3292 print_gimple_stmt (dump_file, stmt, 0, 0);
3293 }
3294
3295 reassociate_stats.linearized++;
3296 update_stmt (binrhs);
3297 update_stmt (binlhs);
3298 update_stmt (stmt);
3299
3300 gimple_set_visited (stmt, true);
3301 gimple_set_visited (binlhs, true);
3302 gimple_set_visited (binrhs, true);
3303
3304 /* Tail recurse on the new rhs if it still needs reassociation. */
3305 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3306 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3307 want to change the algorithm while converting to tuples. */
3308 linearize_expr (stmt);
3309 }
3310
3311 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3312 it. Otherwise, return NULL. */
3313
3314 static gimple
3315 get_single_immediate_use (tree lhs)
3316 {
3317 use_operand_p immuse;
3318 gimple immusestmt;
3319
3320 if (TREE_CODE (lhs) == SSA_NAME
3321 && single_imm_use (lhs, &immuse, &immusestmt)
3322 && is_gimple_assign (immusestmt))
3323 return immusestmt;
3324
3325 return NULL;
3326 }
3327
3328 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3329 representing the negated value. Insertions of any necessary
3330 instructions go before GSI.
3331 This function is recursive in that, if you hand it "a_5" as the
3332 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3333 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3334
3335 static tree
3336 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3337 {
3338 gimple negatedefstmt= NULL;
3339 tree resultofnegate;
3340
3341 /* If we are trying to negate a name, defined by an add, negate the
3342 add operands instead. */
3343 if (TREE_CODE (tonegate) == SSA_NAME)
3344 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3345 if (TREE_CODE (tonegate) == SSA_NAME
3346 && is_gimple_assign (negatedefstmt)
3347 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3348 && has_single_use (gimple_assign_lhs (negatedefstmt))
3349 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3350 {
3351 gimple_stmt_iterator gsi;
3352 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3353 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3354
3355 gsi = gsi_for_stmt (negatedefstmt);
3356 rhs1 = negate_value (rhs1, &gsi);
3357 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3358
3359 gsi = gsi_for_stmt (negatedefstmt);
3360 rhs2 = negate_value (rhs2, &gsi);
3361 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3362
3363 update_stmt (negatedefstmt);
3364 return gimple_assign_lhs (negatedefstmt);
3365 }
3366
3367 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3368 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3369 NULL_TREE, true, GSI_SAME_STMT);
3370 return resultofnegate;
3371 }
3372
3373 /* Return true if we should break up the subtract in STMT into an add
3374 with negate. This is true when we the subtract operands are really
3375 adds, or the subtract itself is used in an add expression. In
3376 either case, breaking up the subtract into an add with negate
3377 exposes the adds to reassociation. */
3378
3379 static bool
3380 should_break_up_subtract (gimple stmt)
3381 {
3382 tree lhs = gimple_assign_lhs (stmt);
3383 tree binlhs = gimple_assign_rhs1 (stmt);
3384 tree binrhs = gimple_assign_rhs2 (stmt);
3385 gimple immusestmt;
3386 struct loop *loop = loop_containing_stmt (stmt);
3387
3388 if (TREE_CODE (binlhs) == SSA_NAME
3389 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3390 return true;
3391
3392 if (TREE_CODE (binrhs) == SSA_NAME
3393 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3394 return true;
3395
3396 if (TREE_CODE (lhs) == SSA_NAME
3397 && (immusestmt = get_single_immediate_use (lhs))
3398 && is_gimple_assign (immusestmt)
3399 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3400 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3401 return true;
3402 return false;
3403 }
3404
3405 /* Transform STMT from A - B into A + -B. */
3406
3407 static void
3408 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3409 {
3410 tree rhs1 = gimple_assign_rhs1 (stmt);
3411 tree rhs2 = gimple_assign_rhs2 (stmt);
3412
3413 if (dump_file && (dump_flags & TDF_DETAILS))
3414 {
3415 fprintf (dump_file, "Breaking up subtract ");
3416 print_gimple_stmt (dump_file, stmt, 0, 0);
3417 }
3418
3419 rhs2 = negate_value (rhs2, gsip);
3420 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3421 update_stmt (stmt);
3422 }
3423
3424 /* Determine whether STMT is a builtin call that raises an SSA name
3425 to an integer power and has only one use. If so, and this is early
3426 reassociation and unsafe math optimizations are permitted, place
3427 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3428 If any of these conditions does not hold, return FALSE. */
3429
3430 static bool
3431 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3432 {
3433 tree fndecl, arg1;
3434 REAL_VALUE_TYPE c, cint;
3435
3436 if (!first_pass_instance
3437 || !flag_unsafe_math_optimizations
3438 || !is_gimple_call (stmt)
3439 || !has_single_use (gimple_call_lhs (stmt)))
3440 return false;
3441
3442 fndecl = gimple_call_fndecl (stmt);
3443
3444 if (!fndecl
3445 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3446 return false;
3447
3448 switch (DECL_FUNCTION_CODE (fndecl))
3449 {
3450 CASE_FLT_FN (BUILT_IN_POW):
3451 *base = gimple_call_arg (stmt, 0);
3452 arg1 = gimple_call_arg (stmt, 1);
3453
3454 if (TREE_CODE (arg1) != REAL_CST)
3455 return false;
3456
3457 c = TREE_REAL_CST (arg1);
3458
3459 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3460 return false;
3461
3462 *exponent = real_to_integer (&c);
3463 real_from_integer (&cint, VOIDmode, *exponent,
3464 *exponent < 0 ? -1 : 0, 0);
3465 if (!real_identical (&c, &cint))
3466 return false;
3467
3468 break;
3469
3470 CASE_FLT_FN (BUILT_IN_POWI):
3471 *base = gimple_call_arg (stmt, 0);
3472 arg1 = gimple_call_arg (stmt, 1);
3473
3474 if (!host_integerp (arg1, 0))
3475 return false;
3476
3477 *exponent = TREE_INT_CST_LOW (arg1);
3478 break;
3479
3480 default:
3481 return false;
3482 }
3483
3484 /* Expanding negative exponents is generally unproductive, so we don't
3485 complicate matters with those. Exponents of zero and one should
3486 have been handled by expression folding. */
3487 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3488 return false;
3489
3490 return true;
3491 }
3492
3493 /* Recursively linearize a binary expression that is the RHS of STMT.
3494 Place the operands of the expression tree in the vector named OPS. */
3495
3496 static void
3497 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3498 bool is_associative, bool set_visited)
3499 {
3500 tree binlhs = gimple_assign_rhs1 (stmt);
3501 tree binrhs = gimple_assign_rhs2 (stmt);
3502 gimple binlhsdef = NULL, binrhsdef = NULL;
3503 bool binlhsisreassoc = false;
3504 bool binrhsisreassoc = false;
3505 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3506 struct loop *loop = loop_containing_stmt (stmt);
3507 tree base = NULL_TREE;
3508 HOST_WIDE_INT exponent = 0;
3509
3510 if (set_visited)
3511 gimple_set_visited (stmt, true);
3512
3513 if (TREE_CODE (binlhs) == SSA_NAME)
3514 {
3515 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3516 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3517 && !stmt_could_throw_p (binlhsdef));
3518 }
3519
3520 if (TREE_CODE (binrhs) == SSA_NAME)
3521 {
3522 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3523 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3524 && !stmt_could_throw_p (binrhsdef));
3525 }
3526
3527 /* If the LHS is not reassociable, but the RHS is, we need to swap
3528 them. If neither is reassociable, there is nothing we can do, so
3529 just put them in the ops vector. If the LHS is reassociable,
3530 linearize it. If both are reassociable, then linearize the RHS
3531 and the LHS. */
3532
3533 if (!binlhsisreassoc)
3534 {
3535 tree temp;
3536
3537 /* If this is not a associative operation like division, give up. */
3538 if (!is_associative)
3539 {
3540 add_to_ops_vec (ops, binrhs);
3541 return;
3542 }
3543
3544 if (!binrhsisreassoc)
3545 {
3546 if (rhscode == MULT_EXPR
3547 && TREE_CODE (binrhs) == SSA_NAME
3548 && acceptable_pow_call (binrhsdef, &base, &exponent))
3549 {
3550 add_repeat_to_ops_vec (ops, base, exponent);
3551 gimple_set_visited (binrhsdef, true);
3552 }
3553 else
3554 add_to_ops_vec (ops, binrhs);
3555
3556 if (rhscode == MULT_EXPR
3557 && TREE_CODE (binlhs) == SSA_NAME
3558 && acceptable_pow_call (binlhsdef, &base, &exponent))
3559 {
3560 add_repeat_to_ops_vec (ops, base, exponent);
3561 gimple_set_visited (binlhsdef, true);
3562 }
3563 else
3564 add_to_ops_vec (ops, binlhs);
3565
3566 return;
3567 }
3568
3569 if (dump_file && (dump_flags & TDF_DETAILS))
3570 {
3571 fprintf (dump_file, "swapping operands of ");
3572 print_gimple_stmt (dump_file, stmt, 0, 0);
3573 }
3574
3575 swap_tree_operands (stmt,
3576 gimple_assign_rhs1_ptr (stmt),
3577 gimple_assign_rhs2_ptr (stmt));
3578 update_stmt (stmt);
3579
3580 if (dump_file && (dump_flags & TDF_DETAILS))
3581 {
3582 fprintf (dump_file, " is now ");
3583 print_gimple_stmt (dump_file, stmt, 0, 0);
3584 }
3585
3586 /* We want to make it so the lhs is always the reassociative op,
3587 so swap. */
3588 temp = binlhs;
3589 binlhs = binrhs;
3590 binrhs = temp;
3591 }
3592 else if (binrhsisreassoc)
3593 {
3594 linearize_expr (stmt);
3595 binlhs = gimple_assign_rhs1 (stmt);
3596 binrhs = gimple_assign_rhs2 (stmt);
3597 }
3598
3599 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3600 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3601 rhscode, loop));
3602 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3603 is_associative, set_visited);
3604
3605 if (rhscode == MULT_EXPR
3606 && TREE_CODE (binrhs) == SSA_NAME
3607 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3608 {
3609 add_repeat_to_ops_vec (ops, base, exponent);
3610 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3611 }
3612 else
3613 add_to_ops_vec (ops, binrhs);
3614 }
3615
3616 /* Repropagate the negates back into subtracts, since no other pass
3617 currently does it. */
3618
3619 static void
3620 repropagate_negates (void)
3621 {
3622 unsigned int i = 0;
3623 tree negate;
3624
3625 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3626 {
3627 gimple user = get_single_immediate_use (negate);
3628
3629 if (!user || !is_gimple_assign (user))
3630 continue;
3631
3632 /* The negate operand can be either operand of a PLUS_EXPR
3633 (it can be the LHS if the RHS is a constant for example).
3634
3635 Force the negate operand to the RHS of the PLUS_EXPR, then
3636 transform the PLUS_EXPR into a MINUS_EXPR. */
3637 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3638 {
3639 /* If the negated operand appears on the LHS of the
3640 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3641 to force the negated operand to the RHS of the PLUS_EXPR. */
3642 if (gimple_assign_rhs1 (user) == negate)
3643 {
3644 swap_tree_operands (user,
3645 gimple_assign_rhs1_ptr (user),
3646 gimple_assign_rhs2_ptr (user));
3647 }
3648
3649 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3650 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3651 if (gimple_assign_rhs2 (user) == negate)
3652 {
3653 tree rhs1 = gimple_assign_rhs1 (user);
3654 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3655 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3656 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3657 update_stmt (user);
3658 }
3659 }
3660 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3661 {
3662 if (gimple_assign_rhs1 (user) == negate)
3663 {
3664 /* We have
3665 x = -a
3666 y = x - b
3667 which we transform into
3668 x = a + b
3669 y = -x .
3670 This pushes down the negate which we possibly can merge
3671 into some other operation, hence insert it into the
3672 plus_negates vector. */
3673 gimple feed = SSA_NAME_DEF_STMT (negate);
3674 tree a = gimple_assign_rhs1 (feed);
3675 tree rhs2 = gimple_assign_rhs2 (user);
3676 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3677 gimple_replace_lhs (feed, negate);
3678 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3679 update_stmt (gsi_stmt (gsi));
3680 gsi2 = gsi_for_stmt (user);
3681 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3682 update_stmt (gsi_stmt (gsi2));
3683 gsi_move_before (&gsi, &gsi2);
3684 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3685 }
3686 else
3687 {
3688 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3689 rid of one operation. */
3690 gimple feed = SSA_NAME_DEF_STMT (negate);
3691 tree a = gimple_assign_rhs1 (feed);
3692 tree rhs1 = gimple_assign_rhs1 (user);
3693 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3694 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3695 update_stmt (gsi_stmt (gsi));
3696 }
3697 }
3698 }
3699 }
3700
3701 /* Returns true if OP is of a type for which we can do reassociation.
3702 That is for integral or non-saturating fixed-point types, and for
3703 floating point type when associative-math is enabled. */
3704
3705 static bool
3706 can_reassociate_p (tree op)
3707 {
3708 tree type = TREE_TYPE (op);
3709 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3710 || NON_SAT_FIXED_POINT_TYPE_P (type)
3711 || (flag_associative_math && FLOAT_TYPE_P (type)))
3712 return true;
3713 return false;
3714 }
3715
3716 /* Break up subtract operations in block BB.
3717
3718 We do this top down because we don't know whether the subtract is
3719 part of a possible chain of reassociation except at the top.
3720
3721 IE given
3722 d = f + g
3723 c = a + e
3724 b = c - d
3725 q = b - r
3726 k = t - q
3727
3728 we want to break up k = t - q, but we won't until we've transformed q
3729 = b - r, which won't be broken up until we transform b = c - d.
3730
3731 En passant, clear the GIMPLE visited flag on every statement. */
3732
3733 static void
3734 break_up_subtract_bb (basic_block bb)
3735 {
3736 gimple_stmt_iterator gsi;
3737 basic_block son;
3738
3739 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3740 {
3741 gimple stmt = gsi_stmt (gsi);
3742 gimple_set_visited (stmt, false);
3743
3744 if (!is_gimple_assign (stmt)
3745 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3746 continue;
3747
3748 /* Look for simple gimple subtract operations. */
3749 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3750 {
3751 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3752 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3753 continue;
3754
3755 /* Check for a subtract used only in an addition. If this
3756 is the case, transform it into add of a negate for better
3757 reassociation. IE transform C = A-B into C = A + -B if C
3758 is only used in an addition. */
3759 if (should_break_up_subtract (stmt))
3760 break_up_subtract (stmt, &gsi);
3761 }
3762 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3763 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3764 plus_negates.safe_push (gimple_assign_lhs (stmt));
3765 }
3766 for (son = first_dom_son (CDI_DOMINATORS, bb);
3767 son;
3768 son = next_dom_son (CDI_DOMINATORS, son))
3769 break_up_subtract_bb (son);
3770 }
3771
3772 /* Used for repeated factor analysis. */
3773 struct repeat_factor_d
3774 {
3775 /* An SSA name that occurs in a multiply chain. */
3776 tree factor;
3777
3778 /* Cached rank of the factor. */
3779 unsigned rank;
3780
3781 /* Number of occurrences of the factor in the chain. */
3782 HOST_WIDE_INT count;
3783
3784 /* An SSA name representing the product of this factor and
3785 all factors appearing later in the repeated factor vector. */
3786 tree repr;
3787 };
3788
3789 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3790 typedef const struct repeat_factor_d *const_repeat_factor_t;
3791
3792
3793 static vec<repeat_factor> repeat_factor_vec;
3794
3795 /* Used for sorting the repeat factor vector. Sort primarily by
3796 ascending occurrence count, secondarily by descending rank. */
3797
3798 static int
3799 compare_repeat_factors (const void *x1, const void *x2)
3800 {
3801 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3802 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3803
3804 if (rf1->count != rf2->count)
3805 return rf1->count - rf2->count;
3806
3807 return rf2->rank - rf1->rank;
3808 }
3809
3810 /* Look for repeated operands in OPS in the multiply tree rooted at
3811 STMT. Replace them with an optimal sequence of multiplies and powi
3812 builtin calls, and remove the used operands from OPS. Return an
3813 SSA name representing the value of the replacement sequence. */
3814
3815 static tree
3816 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3817 {
3818 unsigned i, j, vec_len;
3819 int ii;
3820 operand_entry_t oe;
3821 repeat_factor_t rf1, rf2;
3822 repeat_factor rfnew;
3823 tree result = NULL_TREE;
3824 tree target_ssa, iter_result;
3825 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3826 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3827 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3828 gimple mul_stmt, pow_stmt;
3829
3830 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3831 target. */
3832 if (!powi_fndecl)
3833 return NULL_TREE;
3834
3835 /* Allocate the repeated factor vector. */
3836 repeat_factor_vec.create (10);
3837
3838 /* Scan the OPS vector for all SSA names in the product and build
3839 up a vector of occurrence counts for each factor. */
3840 FOR_EACH_VEC_ELT (*ops, i, oe)
3841 {
3842 if (TREE_CODE (oe->op) == SSA_NAME)
3843 {
3844 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3845 {
3846 if (rf1->factor == oe->op)
3847 {
3848 rf1->count += oe->count;
3849 break;
3850 }
3851 }
3852
3853 if (j >= repeat_factor_vec.length ())
3854 {
3855 rfnew.factor = oe->op;
3856 rfnew.rank = oe->rank;
3857 rfnew.count = oe->count;
3858 rfnew.repr = NULL_TREE;
3859 repeat_factor_vec.safe_push (rfnew);
3860 }
3861 }
3862 }
3863
3864 /* Sort the repeated factor vector by (a) increasing occurrence count,
3865 and (b) decreasing rank. */
3866 repeat_factor_vec.qsort (compare_repeat_factors);
3867
3868 /* It is generally best to combine as many base factors as possible
3869 into a product before applying __builtin_powi to the result.
3870 However, the sort order chosen for the repeated factor vector
3871 allows us to cache partial results for the product of the base
3872 factors for subsequent use. When we already have a cached partial
3873 result from a previous iteration, it is best to make use of it
3874 before looking for another __builtin_pow opportunity.
3875
3876 As an example, consider x * x * y * y * y * z * z * z * z.
3877 We want to first compose the product x * y * z, raise it to the
3878 second power, then multiply this by y * z, and finally multiply
3879 by z. This can be done in 5 multiplies provided we cache y * z
3880 for use in both expressions:
3881
3882 t1 = y * z
3883 t2 = t1 * x
3884 t3 = t2 * t2
3885 t4 = t1 * t3
3886 result = t4 * z
3887
3888 If we instead ignored the cached y * z and first multiplied by
3889 the __builtin_pow opportunity z * z, we would get the inferior:
3890
3891 t1 = y * z
3892 t2 = t1 * x
3893 t3 = t2 * t2
3894 t4 = z * z
3895 t5 = t3 * t4
3896 result = t5 * y */
3897
3898 vec_len = repeat_factor_vec.length ();
3899
3900 /* Repeatedly look for opportunities to create a builtin_powi call. */
3901 while (true)
3902 {
3903 HOST_WIDE_INT power;
3904
3905 /* First look for the largest cached product of factors from
3906 preceding iterations. If found, create a builtin_powi for
3907 it if the minimum occurrence count for its factors is at
3908 least 2, or just use this cached product as our next
3909 multiplicand if the minimum occurrence count is 1. */
3910 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3911 {
3912 if (rf1->repr && rf1->count > 0)
3913 break;
3914 }
3915
3916 if (j < vec_len)
3917 {
3918 power = rf1->count;
3919
3920 if (power == 1)
3921 {
3922 iter_result = rf1->repr;
3923
3924 if (dump_file && (dump_flags & TDF_DETAILS))
3925 {
3926 unsigned elt;
3927 repeat_factor_t rf;
3928 fputs ("Multiplying by cached product ", dump_file);
3929 for (elt = j; elt < vec_len; elt++)
3930 {
3931 rf = &repeat_factor_vec[elt];
3932 print_generic_expr (dump_file, rf->factor, 0);
3933 if (elt < vec_len - 1)
3934 fputs (" * ", dump_file);
3935 }
3936 fputs ("\n", dump_file);
3937 }
3938 }
3939 else
3940 {
3941 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3942 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3943 build_int_cst (integer_type_node,
3944 power));
3945 gimple_call_set_lhs (pow_stmt, iter_result);
3946 gimple_set_location (pow_stmt, gimple_location (stmt));
3947 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3948
3949 if (dump_file && (dump_flags & TDF_DETAILS))
3950 {
3951 unsigned elt;
3952 repeat_factor_t rf;
3953 fputs ("Building __builtin_pow call for cached product (",
3954 dump_file);
3955 for (elt = j; elt < vec_len; elt++)
3956 {
3957 rf = &repeat_factor_vec[elt];
3958 print_generic_expr (dump_file, rf->factor, 0);
3959 if (elt < vec_len - 1)
3960 fputs (" * ", dump_file);
3961 }
3962 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3963 power);
3964 }
3965 }
3966 }
3967 else
3968 {
3969 /* Otherwise, find the first factor in the repeated factor
3970 vector whose occurrence count is at least 2. If no such
3971 factor exists, there are no builtin_powi opportunities
3972 remaining. */
3973 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3974 {
3975 if (rf1->count >= 2)
3976 break;
3977 }
3978
3979 if (j >= vec_len)
3980 break;
3981
3982 power = rf1->count;
3983
3984 if (dump_file && (dump_flags & TDF_DETAILS))
3985 {
3986 unsigned elt;
3987 repeat_factor_t rf;
3988 fputs ("Building __builtin_pow call for (", dump_file);
3989 for (elt = j; elt < vec_len; elt++)
3990 {
3991 rf = &repeat_factor_vec[elt];
3992 print_generic_expr (dump_file, rf->factor, 0);
3993 if (elt < vec_len - 1)
3994 fputs (" * ", dump_file);
3995 }
3996 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3997 }
3998
3999 reassociate_stats.pows_created++;
4000
4001 /* Visit each element of the vector in reverse order (so that
4002 high-occurrence elements are visited first, and within the
4003 same occurrence count, lower-ranked elements are visited
4004 first). Form a linear product of all elements in this order
4005 whose occurrencce count is at least that of element J.
4006 Record the SSA name representing the product of each element
4007 with all subsequent elements in the vector. */
4008 if (j == vec_len - 1)
4009 rf1->repr = rf1->factor;
4010 else
4011 {
4012 for (ii = vec_len - 2; ii >= (int)j; ii--)
4013 {
4014 tree op1, op2;
4015
4016 rf1 = &repeat_factor_vec[ii];
4017 rf2 = &repeat_factor_vec[ii + 1];
4018
4019 /* Init the last factor's representative to be itself. */
4020 if (!rf2->repr)
4021 rf2->repr = rf2->factor;
4022
4023 op1 = rf1->factor;
4024 op2 = rf2->repr;
4025
4026 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4027 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4028 target_ssa,
4029 op1, op2);
4030 gimple_set_location (mul_stmt, gimple_location (stmt));
4031 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4032 rf1->repr = target_ssa;
4033
4034 /* Don't reprocess the multiply we just introduced. */
4035 gimple_set_visited (mul_stmt, true);
4036 }
4037 }
4038
4039 /* Form a call to __builtin_powi for the maximum product
4040 just formed, raised to the power obtained earlier. */
4041 rf1 = &repeat_factor_vec[j];
4042 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4043 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4044 build_int_cst (integer_type_node,
4045 power));
4046 gimple_call_set_lhs (pow_stmt, iter_result);
4047 gimple_set_location (pow_stmt, gimple_location (stmt));
4048 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4049 }
4050
4051 /* If we previously formed at least one other builtin_powi call,
4052 form the product of this one and those others. */
4053 if (result)
4054 {
4055 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4056 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4057 result, iter_result);
4058 gimple_set_location (mul_stmt, gimple_location (stmt));
4059 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4060 gimple_set_visited (mul_stmt, true);
4061 result = new_result;
4062 }
4063 else
4064 result = iter_result;
4065
4066 /* Decrement the occurrence count of each element in the product
4067 by the count found above, and remove this many copies of each
4068 factor from OPS. */
4069 for (i = j; i < vec_len; i++)
4070 {
4071 unsigned k = power;
4072 unsigned n;
4073
4074 rf1 = &repeat_factor_vec[i];
4075 rf1->count -= power;
4076
4077 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4078 {
4079 if (oe->op == rf1->factor)
4080 {
4081 if (oe->count <= k)
4082 {
4083 ops->ordered_remove (n);
4084 k -= oe->count;
4085
4086 if (k == 0)
4087 break;
4088 }
4089 else
4090 {
4091 oe->count -= k;
4092 break;
4093 }
4094 }
4095 }
4096 }
4097 }
4098
4099 /* At this point all elements in the repeated factor vector have a
4100 remaining occurrence count of 0 or 1, and those with a count of 1
4101 don't have cached representatives. Re-sort the ops vector and
4102 clean up. */
4103 ops->qsort (sort_by_operand_rank);
4104 repeat_factor_vec.release ();
4105
4106 /* Return the final product computed herein. Note that there may
4107 still be some elements with single occurrence count left in OPS;
4108 those will be handled by the normal reassociation logic. */
4109 return result;
4110 }
4111
4112 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4113
4114 static void
4115 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4116 {
4117 tree rhs1;
4118
4119 if (dump_file && (dump_flags & TDF_DETAILS))
4120 {
4121 fprintf (dump_file, "Transforming ");
4122 print_gimple_stmt (dump_file, stmt, 0, 0);
4123 }
4124
4125 rhs1 = gimple_assign_rhs1 (stmt);
4126 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4127 update_stmt (stmt);
4128 remove_visited_stmt_chain (rhs1);
4129
4130 if (dump_file && (dump_flags & TDF_DETAILS))
4131 {
4132 fprintf (dump_file, " into ");
4133 print_gimple_stmt (dump_file, stmt, 0, 0);
4134 }
4135 }
4136
4137 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4138
4139 static void
4140 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4141 tree rhs1, tree rhs2)
4142 {
4143 if (dump_file && (dump_flags & TDF_DETAILS))
4144 {
4145 fprintf (dump_file, "Transforming ");
4146 print_gimple_stmt (dump_file, stmt, 0, 0);
4147 }
4148
4149 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4150 update_stmt (gsi_stmt (*gsi));
4151 remove_visited_stmt_chain (rhs1);
4152
4153 if (dump_file && (dump_flags & TDF_DETAILS))
4154 {
4155 fprintf (dump_file, " into ");
4156 print_gimple_stmt (dump_file, stmt, 0, 0);
4157 }
4158 }
4159
4160 /* Reassociate expressions in basic block BB and its post-dominator as
4161 children. */
4162
4163 static void
4164 reassociate_bb (basic_block bb)
4165 {
4166 gimple_stmt_iterator gsi;
4167 basic_block son;
4168 gimple stmt = last_stmt (bb);
4169
4170 if (stmt && !gimple_visited_p (stmt))
4171 maybe_optimize_range_tests (stmt);
4172
4173 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4174 {
4175 stmt = gsi_stmt (gsi);
4176
4177 if (is_gimple_assign (stmt)
4178 && !stmt_could_throw_p (stmt))
4179 {
4180 tree lhs, rhs1, rhs2;
4181 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4182
4183 /* If this is not a gimple binary expression, there is
4184 nothing for us to do with it. */
4185 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4186 continue;
4187
4188 /* If this was part of an already processed statement,
4189 we don't need to touch it again. */
4190 if (gimple_visited_p (stmt))
4191 {
4192 /* This statement might have become dead because of previous
4193 reassociations. */
4194 if (has_zero_uses (gimple_get_lhs (stmt)))
4195 {
4196 gsi_remove (&gsi, true);
4197 release_defs (stmt);
4198 /* We might end up removing the last stmt above which
4199 places the iterator to the end of the sequence.
4200 Reset it to the last stmt in this case which might
4201 be the end of the sequence as well if we removed
4202 the last statement of the sequence. In which case
4203 we need to bail out. */
4204 if (gsi_end_p (gsi))
4205 {
4206 gsi = gsi_last_bb (bb);
4207 if (gsi_end_p (gsi))
4208 break;
4209 }
4210 }
4211 continue;
4212 }
4213
4214 lhs = gimple_assign_lhs (stmt);
4215 rhs1 = gimple_assign_rhs1 (stmt);
4216 rhs2 = gimple_assign_rhs2 (stmt);
4217
4218 /* For non-bit or min/max operations we can't associate
4219 all types. Verify that here. */
4220 if (rhs_code != BIT_IOR_EXPR
4221 && rhs_code != BIT_AND_EXPR
4222 && rhs_code != BIT_XOR_EXPR
4223 && rhs_code != MIN_EXPR
4224 && rhs_code != MAX_EXPR
4225 && (!can_reassociate_p (lhs)
4226 || !can_reassociate_p (rhs1)
4227 || !can_reassociate_p (rhs2)))
4228 continue;
4229
4230 if (associative_tree_code (rhs_code))
4231 {
4232 vec<operand_entry_t> ops = vNULL;
4233 tree powi_result = NULL_TREE;
4234
4235 /* There may be no immediate uses left by the time we
4236 get here because we may have eliminated them all. */
4237 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4238 continue;
4239
4240 gimple_set_visited (stmt, true);
4241 linearize_expr_tree (&ops, stmt, true, true);
4242 ops.qsort (sort_by_operand_rank);
4243 optimize_ops_list (rhs_code, &ops);
4244 if (undistribute_ops_list (rhs_code, &ops,
4245 loop_containing_stmt (stmt)))
4246 {
4247 ops.qsort (sort_by_operand_rank);
4248 optimize_ops_list (rhs_code, &ops);
4249 }
4250
4251 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4252 optimize_range_tests (rhs_code, &ops);
4253
4254 if (first_pass_instance
4255 && rhs_code == MULT_EXPR
4256 && flag_unsafe_math_optimizations)
4257 powi_result = attempt_builtin_powi (stmt, &ops);
4258
4259 /* If the operand vector is now empty, all operands were
4260 consumed by the __builtin_powi optimization. */
4261 if (ops.length () == 0)
4262 transform_stmt_to_copy (&gsi, stmt, powi_result);
4263 else if (ops.length () == 1)
4264 {
4265 tree last_op = ops.last ()->op;
4266
4267 if (powi_result)
4268 transform_stmt_to_multiply (&gsi, stmt, last_op,
4269 powi_result);
4270 else
4271 transform_stmt_to_copy (&gsi, stmt, last_op);
4272 }
4273 else
4274 {
4275 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4276 int ops_num = ops.length ();
4277 int width = get_reassociation_width (ops_num, rhs_code, mode);
4278
4279 if (dump_file && (dump_flags & TDF_DETAILS))
4280 fprintf (dump_file,
4281 "Width = %d was chosen for reassociation\n", width);
4282
4283 if (width > 1
4284 && ops.length () > 3)
4285 rewrite_expr_tree_parallel (stmt, width, ops);
4286 else
4287 {
4288 /* When there are three operands left, we want
4289 to make sure the ones that get the double
4290 binary op are chosen wisely. */
4291 int len = ops.length ();
4292 if (len >= 3)
4293 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4294
4295 rewrite_expr_tree (stmt, 0, ops, false);
4296 }
4297
4298 /* If we combined some repeated factors into a
4299 __builtin_powi call, multiply that result by the
4300 reassociated operands. */
4301 if (powi_result)
4302 {
4303 gimple mul_stmt;
4304 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4305 tree target_ssa = make_temp_ssa_name (type, NULL,
4306 "reassocpow");
4307 gimple_set_lhs (stmt, target_ssa);
4308 update_stmt (stmt);
4309 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4310 powi_result,
4311 target_ssa);
4312 gimple_set_location (mul_stmt, gimple_location (stmt));
4313 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4314 }
4315 }
4316
4317 ops.release ();
4318 }
4319 }
4320 }
4321 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4322 son;
4323 son = next_dom_son (CDI_POST_DOMINATORS, son))
4324 reassociate_bb (son);
4325 }
4326
4327 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4328 void debug_ops_vector (vec<operand_entry_t> ops);
4329
4330 /* Dump the operand entry vector OPS to FILE. */
4331
4332 void
4333 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4334 {
4335 operand_entry_t oe;
4336 unsigned int i;
4337
4338 FOR_EACH_VEC_ELT (ops, i, oe)
4339 {
4340 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4341 print_generic_expr (file, oe->op, 0);
4342 }
4343 }
4344
4345 /* Dump the operand entry vector OPS to STDERR. */
4346
4347 DEBUG_FUNCTION void
4348 debug_ops_vector (vec<operand_entry_t> ops)
4349 {
4350 dump_ops_vector (stderr, ops);
4351 }
4352
4353 static void
4354 do_reassoc (void)
4355 {
4356 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4357 renumber_gimple_stmt_uids ();
4358 reassociate_bb (EXIT_BLOCK_PTR);
4359 }
4360
4361 /* Initialize the reassociation pass. */
4362
4363 static void
4364 init_reassoc (void)
4365 {
4366 int i;
4367 long rank = 2;
4368 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4369
4370 /* Find the loops, so that we can prevent moving calculations in
4371 them. */
4372 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4373
4374 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4375
4376 operand_entry_pool = create_alloc_pool ("operand entry pool",
4377 sizeof (struct operand_entry), 30);
4378 next_operand_entry_id = 0;
4379
4380 /* Reverse RPO (Reverse Post Order) will give us something where
4381 deeper loops come later. */
4382 pre_and_rev_post_order_compute (NULL, bbs, false);
4383 bb_rank = XCNEWVEC (long, last_basic_block);
4384 operand_rank = pointer_map_create ();
4385
4386 /* Give each default definition a distinct rank. This includes
4387 parameters and the static chain. Walk backwards over all
4388 SSA names so that we get proper rank ordering according
4389 to tree_swap_operands_p. */
4390 for (i = num_ssa_names - 1; i > 0; --i)
4391 {
4392 tree name = ssa_name (i);
4393 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4394 insert_operand_rank (name, ++rank);
4395 }
4396
4397 /* Set up rank for each BB */
4398 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4399 bb_rank[bbs[i]] = ++rank << 16;
4400
4401 free (bbs);
4402 calculate_dominance_info (CDI_POST_DOMINATORS);
4403 plus_negates = vNULL;
4404 }
4405
4406 /* Cleanup after the reassociation pass, and print stats if
4407 requested. */
4408
4409 static void
4410 fini_reassoc (void)
4411 {
4412 statistics_counter_event (cfun, "Linearized",
4413 reassociate_stats.linearized);
4414 statistics_counter_event (cfun, "Constants eliminated",
4415 reassociate_stats.constants_eliminated);
4416 statistics_counter_event (cfun, "Ops eliminated",
4417 reassociate_stats.ops_eliminated);
4418 statistics_counter_event (cfun, "Statements rewritten",
4419 reassociate_stats.rewritten);
4420 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4421 reassociate_stats.pows_encountered);
4422 statistics_counter_event (cfun, "Built-in powi calls created",
4423 reassociate_stats.pows_created);
4424
4425 pointer_map_destroy (operand_rank);
4426 free_alloc_pool (operand_entry_pool);
4427 free (bb_rank);
4428 plus_negates.release ();
4429 free_dominance_info (CDI_POST_DOMINATORS);
4430 loop_optimizer_finalize ();
4431 }
4432
4433 /* Gate and execute functions for Reassociation. */
4434
4435 static unsigned int
4436 execute_reassoc (void)
4437 {
4438 init_reassoc ();
4439
4440 do_reassoc ();
4441 repropagate_negates ();
4442
4443 fini_reassoc ();
4444 return 0;
4445 }
4446
4447 static bool
4448 gate_tree_ssa_reassoc (void)
4449 {
4450 return flag_tree_reassoc != 0;
4451 }
4452
4453 namespace {
4454
4455 const pass_data pass_data_reassoc =
4456 {
4457 GIMPLE_PASS, /* type */
4458 "reassoc", /* name */
4459 OPTGROUP_NONE, /* optinfo_flags */
4460 true, /* has_gate */
4461 true, /* has_execute */
4462 TV_TREE_REASSOC, /* tv_id */
4463 ( PROP_cfg | PROP_ssa ), /* properties_required */
4464 0, /* properties_provided */
4465 0, /* properties_destroyed */
4466 0, /* todo_flags_start */
4467 ( TODO_verify_ssa
4468 | TODO_update_ssa_only_virtuals
4469 | TODO_verify_flow ), /* todo_flags_finish */
4470 };
4471
4472 class pass_reassoc : public gimple_opt_pass
4473 {
4474 public:
4475 pass_reassoc(gcc::context *ctxt)
4476 : gimple_opt_pass(pass_data_reassoc, ctxt)
4477 {}
4478
4479 /* opt_pass methods: */
4480 opt_pass * clone () { return new pass_reassoc (ctxt_); }
4481 bool gate () { return gate_tree_ssa_reassoc (); }
4482 unsigned int execute () { return execute_reassoc (); }
4483
4484 }; // class pass_reassoc
4485
4486 } // anon namespace
4487
4488 gimple_opt_pass *
4489 make_pass_reassoc (gcc::context *ctxt)
4490 {
4491 return new pass_reassoc (ctxt);
4492 }