calls.c (store_one_arg): Remove incorrect const qualification on the type of the...
[gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
63
64 /* This is a simple global reassociation pass. It is, in part, based
65 on the LLVM pass of the same name (They do some things more/less
66 than we do, in different orders, etc).
67
68 It consists of five steps:
69
70 1. Breaking up subtract operations into addition + negate, where
71 it would promote the reassociation of adds.
72
73 2. Left linearization of the expression trees, so that (A+B)+(C+D)
74 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
75 During linearization, we place the operands of the binary
76 expressions into a vector of operand_entry_t
77
78 3. Optimization of the operand lists, eliminating things like a +
79 -a, a & a, etc.
80
81 3a. Combine repeated factors with the same occurrence counts
82 into a __builtin_powi call that will later be optimized into
83 an optimal number of multiplies.
84
85 4. Rewrite the expression trees we linearized and optimized so
86 they are in proper rank order.
87
88 5. Repropagate negates, as nothing else will clean it up ATM.
89
90 A bit of theory on #4, since nobody seems to write anything down
91 about why it makes sense to do it the way they do it:
92
93 We could do this much nicer theoretically, but don't (for reasons
94 explained after how to do it theoretically nice :P).
95
96 In order to promote the most redundancy elimination, you want
97 binary expressions whose operands are the same rank (or
98 preferably, the same value) exposed to the redundancy eliminator,
99 for possible elimination.
100
101 So the way to do this if we really cared, is to build the new op
102 tree from the leaves to the roots, merging as you go, and putting the
103 new op on the end of the worklist, until you are left with one
104 thing on the worklist.
105
106 IE if you have to rewrite the following set of operands (listed with
107 rank in parentheses), with opcode PLUS_EXPR:
108
109 a (1), b (1), c (1), d (2), e (2)
110
111
112 We start with our merge worklist empty, and the ops list with all of
113 those on it.
114
115 You want to first merge all leaves of the same rank, as much as
116 possible.
117
118 So first build a binary op of
119
120 mergetmp = a + b, and put "mergetmp" on the merge worklist.
121
122 Because there is no three operand form of PLUS_EXPR, c is not going to
123 be exposed to redundancy elimination as a rank 1 operand.
124
125 So you might as well throw it on the merge worklist (you could also
126 consider it to now be a rank two operand, and merge it with d and e,
127 but in this case, you then have evicted e from a binary op. So at
128 least in this situation, you can't win.)
129
130 Then build a binary op of d + e
131 mergetmp2 = d + e
132
133 and put mergetmp2 on the merge worklist.
134
135 so merge worklist = {mergetmp, c, mergetmp2}
136
137 Continue building binary ops of these operations until you have only
138 one operation left on the worklist.
139
140 So we have
141
142 build binary op
143 mergetmp3 = mergetmp + c
144
145 worklist = {mergetmp2, mergetmp3}
146
147 mergetmp4 = mergetmp2 + mergetmp3
148
149 worklist = {mergetmp4}
150
151 because we have one operation left, we can now just set the original
152 statement equal to the result of that operation.
153
154 This will at least expose a + b and d + e to redundancy elimination
155 as binary operations.
156
157 For extra points, you can reuse the old statements to build the
158 mergetmps, since you shouldn't run out.
159
160 So why don't we do this?
161
162 Because it's expensive, and rarely will help. Most trees we are
163 reassociating have 3 or less ops. If they have 2 ops, they already
164 will be written into a nice single binary op. If you have 3 ops, a
165 single simple check suffices to tell you whether the first two are of the
166 same rank. If so, you know to order it
167
168 mergetmp = op1 + op2
169 newstmt = mergetmp + op3
170
171 instead of
172 mergetmp = op2 + op3
173 newstmt = mergetmp + op1
174
175 If all three are of the same rank, you can't expose them all in a
176 single binary operator anyway, so the above is *still* the best you
177 can do.
178
179 Thus, this is what we do. When we have three ops left, we check to see
180 what order to put them in, and call it a day. As a nod to vector sum
181 reduction, we check if any of the ops are really a phi node that is a
182 destructive update for the associating op, and keep the destructive
183 update together for vector sum reduction recognition. */
184
185
186 /* Statistics */
187 static struct
188 {
189 int linearized;
190 int constants_eliminated;
191 int ops_eliminated;
192 int rewritten;
193 int pows_encountered;
194 int pows_created;
195 } reassociate_stats;
196
197 /* Operator, rank pair. */
198 typedef struct operand_entry
199 {
200 unsigned int rank;
201 int id;
202 tree op;
203 unsigned int count;
204 } *operand_entry_t;
205
206 static alloc_pool operand_entry_pool;
207
208 /* This is used to assign a unique ID to each struct operand_entry
209 so that qsort results are identical on different hosts. */
210 static int next_operand_entry_id;
211
212 /* Starting rank number for a given basic block, so that we can rank
213 operations using unmovable instructions in that BB based on the bb
214 depth. */
215 static long *bb_rank;
216
217 /* Operand->rank hashtable. */
218 static struct pointer_map_t *operand_rank;
219
220 /* Forward decls. */
221 static long get_rank (tree);
222 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
223
224
225 /* Bias amount for loop-carried phis. We want this to be larger than
226 the depth of any reassociation tree we can see, but not larger than
227 the rank difference between two blocks. */
228 #define PHI_LOOP_BIAS (1 << 15)
229
230 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
231 an innermost loop, and the phi has only a single use which is inside
232 the loop, then the rank is the block rank of the loop latch plus an
233 extra bias for the loop-carried dependence. This causes expressions
234 calculated into an accumulator variable to be independent for each
235 iteration of the loop. If STMT is some other phi, the rank is the
236 block rank of its containing block. */
237 static long
238 phi_rank (gimple stmt)
239 {
240 basic_block bb = gimple_bb (stmt);
241 struct loop *father = bb->loop_father;
242 tree res;
243 unsigned i;
244 use_operand_p use;
245 gimple use_stmt;
246
247 /* We only care about real loops (those with a latch). */
248 if (!father->latch)
249 return bb_rank[bb->index];
250
251 /* Interesting phis must be in headers of innermost loops. */
252 if (bb != father->header
253 || father->inner)
254 return bb_rank[bb->index];
255
256 /* Ignore virtual SSA_NAMEs. */
257 res = gimple_phi_result (stmt);
258 if (virtual_operand_p (res))
259 return bb_rank[bb->index];
260
261 /* The phi definition must have a single use, and that use must be
262 within the loop. Otherwise this isn't an accumulator pattern. */
263 if (!single_imm_use (res, &use, &use_stmt)
264 || gimple_bb (use_stmt)->loop_father != father)
265 return bb_rank[bb->index];
266
267 /* Look for phi arguments from within the loop. If found, bias this phi. */
268 for (i = 0; i < gimple_phi_num_args (stmt); i++)
269 {
270 tree arg = gimple_phi_arg_def (stmt, i);
271 if (TREE_CODE (arg) == SSA_NAME
272 && !SSA_NAME_IS_DEFAULT_DEF (arg))
273 {
274 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
275 if (gimple_bb (def_stmt)->loop_father == father)
276 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
277 }
278 }
279
280 /* Must be an uninteresting phi. */
281 return bb_rank[bb->index];
282 }
283
284 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
285 loop-carried dependence of an innermost loop, return TRUE; else
286 return FALSE. */
287 static bool
288 loop_carried_phi (tree exp)
289 {
290 gimple phi_stmt;
291 long block_rank;
292
293 if (TREE_CODE (exp) != SSA_NAME
294 || SSA_NAME_IS_DEFAULT_DEF (exp))
295 return false;
296
297 phi_stmt = SSA_NAME_DEF_STMT (exp);
298
299 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
300 return false;
301
302 /* Non-loop-carried phis have block rank. Loop-carried phis have
303 an additional bias added in. If this phi doesn't have block rank,
304 it's biased and should not be propagated. */
305 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
306
307 if (phi_rank (phi_stmt) != block_rank)
308 return true;
309
310 return false;
311 }
312
313 /* Return the maximum of RANK and the rank that should be propagated
314 from expression OP. For most operands, this is just the rank of OP.
315 For loop-carried phis, the value is zero to avoid undoing the bias
316 in favor of the phi. */
317 static long
318 propagate_rank (long rank, tree op)
319 {
320 long op_rank;
321
322 if (loop_carried_phi (op))
323 return rank;
324
325 op_rank = get_rank (op);
326
327 return MAX (rank, op_rank);
328 }
329
330 /* Look up the operand rank structure for expression E. */
331
332 static inline long
333 find_operand_rank (tree e)
334 {
335 void **slot = pointer_map_contains (operand_rank, e);
336 return slot ? (long) (intptr_t) *slot : -1;
337 }
338
339 /* Insert {E,RANK} into the operand rank hashtable. */
340
341 static inline void
342 insert_operand_rank (tree e, long rank)
343 {
344 void **slot;
345 gcc_assert (rank > 0);
346 slot = pointer_map_insert (operand_rank, e);
347 gcc_assert (!*slot);
348 *slot = (void *) (intptr_t) rank;
349 }
350
351 /* Given an expression E, return the rank of the expression. */
352
353 static long
354 get_rank (tree e)
355 {
356 /* Constants have rank 0. */
357 if (is_gimple_min_invariant (e))
358 return 0;
359
360 /* SSA_NAME's have the rank of the expression they are the result
361 of.
362 For globals and uninitialized values, the rank is 0.
363 For function arguments, use the pre-setup rank.
364 For PHI nodes, stores, asm statements, etc, we use the rank of
365 the BB.
366 For simple operations, the rank is the maximum rank of any of
367 its operands, or the bb_rank, whichever is less.
368 I make no claims that this is optimal, however, it gives good
369 results. */
370
371 /* We make an exception to the normal ranking system to break
372 dependences of accumulator variables in loops. Suppose we
373 have a simple one-block loop containing:
374
375 x_1 = phi(x_0, x_2)
376 b = a + x_1
377 c = b + d
378 x_2 = c + e
379
380 As shown, each iteration of the calculation into x is fully
381 dependent upon the iteration before it. We would prefer to
382 see this in the form:
383
384 x_1 = phi(x_0, x_2)
385 b = a + d
386 c = b + e
387 x_2 = c + x_1
388
389 If the loop is unrolled, the calculations of b and c from
390 different iterations can be interleaved.
391
392 To obtain this result during reassociation, we bias the rank
393 of the phi definition x_1 upward, when it is recognized as an
394 accumulator pattern. The artificial rank causes it to be
395 added last, providing the desired independence. */
396
397 if (TREE_CODE (e) == SSA_NAME)
398 {
399 gimple stmt;
400 long rank;
401 int i, n;
402 tree op;
403
404 if (SSA_NAME_IS_DEFAULT_DEF (e))
405 return find_operand_rank (e);
406
407 stmt = SSA_NAME_DEF_STMT (e);
408 if (gimple_code (stmt) == GIMPLE_PHI)
409 return phi_rank (stmt);
410
411 if (!is_gimple_assign (stmt)
412 || gimple_vdef (stmt))
413 return bb_rank[gimple_bb (stmt)->index];
414
415 /* If we already have a rank for this expression, use that. */
416 rank = find_operand_rank (e);
417 if (rank != -1)
418 return rank;
419
420 /* Otherwise, find the maximum rank for the operands. As an
421 exception, remove the bias from loop-carried phis when propagating
422 the rank so that dependent operations are not also biased. */
423 rank = 0;
424 if (gimple_assign_single_p (stmt))
425 {
426 tree rhs = gimple_assign_rhs1 (stmt);
427 n = TREE_OPERAND_LENGTH (rhs);
428 if (n == 0)
429 rank = propagate_rank (rank, rhs);
430 else
431 {
432 for (i = 0; i < n; i++)
433 {
434 op = TREE_OPERAND (rhs, i);
435
436 if (op != NULL_TREE)
437 rank = propagate_rank (rank, op);
438 }
439 }
440 }
441 else
442 {
443 n = gimple_num_ops (stmt);
444 for (i = 1; i < n; i++)
445 {
446 op = gimple_op (stmt, i);
447 gcc_assert (op);
448 rank = propagate_rank (rank, op);
449 }
450 }
451
452 if (dump_file && (dump_flags & TDF_DETAILS))
453 {
454 fprintf (dump_file, "Rank for ");
455 print_generic_expr (dump_file, e, 0);
456 fprintf (dump_file, " is %ld\n", (rank + 1));
457 }
458
459 /* Note the rank in the hashtable so we don't recompute it. */
460 insert_operand_rank (e, (rank + 1));
461 return (rank + 1);
462 }
463
464 /* Globals, etc, are rank 0 */
465 return 0;
466 }
467
468
469 /* We want integer ones to end up last no matter what, since they are
470 the ones we can do the most with. */
471 #define INTEGER_CONST_TYPE 1 << 3
472 #define FLOAT_CONST_TYPE 1 << 2
473 #define OTHER_CONST_TYPE 1 << 1
474
475 /* Classify an invariant tree into integer, float, or other, so that
476 we can sort them to be near other constants of the same type. */
477 static inline int
478 constant_type (tree t)
479 {
480 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
481 return INTEGER_CONST_TYPE;
482 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
483 return FLOAT_CONST_TYPE;
484 else
485 return OTHER_CONST_TYPE;
486 }
487
488 /* qsort comparison function to sort operand entries PA and PB by rank
489 so that the sorted array is ordered by rank in decreasing order. */
490 static int
491 sort_by_operand_rank (const void *pa, const void *pb)
492 {
493 const operand_entry_t oea = *(const operand_entry_t *)pa;
494 const operand_entry_t oeb = *(const operand_entry_t *)pb;
495
496 /* It's nicer for optimize_expression if constants that are likely
497 to fold when added/multiplied//whatever are put next to each
498 other. Since all constants have rank 0, order them by type. */
499 if (oeb->rank == 0 && oea->rank == 0)
500 {
501 if (constant_type (oeb->op) != constant_type (oea->op))
502 return constant_type (oeb->op) - constant_type (oea->op);
503 else
504 /* To make sorting result stable, we use unique IDs to determine
505 order. */
506 return oeb->id - oea->id;
507 }
508
509 /* Lastly, make sure the versions that are the same go next to each
510 other. */
511 if ((oeb->rank - oea->rank == 0)
512 && TREE_CODE (oea->op) == SSA_NAME
513 && TREE_CODE (oeb->op) == SSA_NAME)
514 {
515 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
516 versions of removed SSA_NAMEs, so if possible, prefer to sort
517 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
518 See PR60418. */
519 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
520 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
521 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
522 {
523 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
524 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
525 basic_block bba = gimple_bb (stmta);
526 basic_block bbb = gimple_bb (stmtb);
527 if (bbb != bba)
528 {
529 if (bb_rank[bbb->index] != bb_rank[bba->index])
530 return bb_rank[bbb->index] - bb_rank[bba->index];
531 }
532 else
533 {
534 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
535 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
536 if (da != db)
537 return da ? 1 : -1;
538 }
539 }
540
541 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
542 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
543 else
544 return oeb->id - oea->id;
545 }
546
547 if (oeb->rank != oea->rank)
548 return oeb->rank - oea->rank;
549 else
550 return oeb->id - oea->id;
551 }
552
553 /* Add an operand entry to *OPS for the tree operand OP. */
554
555 static void
556 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
557 {
558 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
559
560 oe->op = op;
561 oe->rank = get_rank (op);
562 oe->id = next_operand_entry_id++;
563 oe->count = 1;
564 ops->safe_push (oe);
565 }
566
567 /* Add an operand entry to *OPS for the tree operand OP with repeat
568 count REPEAT. */
569
570 static void
571 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
572 HOST_WIDE_INT repeat)
573 {
574 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
575
576 oe->op = op;
577 oe->rank = get_rank (op);
578 oe->id = next_operand_entry_id++;
579 oe->count = repeat;
580 ops->safe_push (oe);
581
582 reassociate_stats.pows_encountered++;
583 }
584
585 /* Return true if STMT is reassociable operation containing a binary
586 operation with tree code CODE, and is inside LOOP. */
587
588 static bool
589 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
590 {
591 basic_block bb = gimple_bb (stmt);
592
593 if (gimple_bb (stmt) == NULL)
594 return false;
595
596 if (!flow_bb_inside_loop_p (loop, bb))
597 return false;
598
599 if (is_gimple_assign (stmt)
600 && gimple_assign_rhs_code (stmt) == code
601 && has_single_use (gimple_assign_lhs (stmt)))
602 return true;
603
604 return false;
605 }
606
607
608 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
609 operand of the negate operation. Otherwise, return NULL. */
610
611 static tree
612 get_unary_op (tree name, enum tree_code opcode)
613 {
614 gimple stmt = SSA_NAME_DEF_STMT (name);
615
616 if (!is_gimple_assign (stmt))
617 return NULL_TREE;
618
619 if (gimple_assign_rhs_code (stmt) == opcode)
620 return gimple_assign_rhs1 (stmt);
621 return NULL_TREE;
622 }
623
624 /* If CURR and LAST are a pair of ops that OPCODE allows us to
625 eliminate through equivalences, do so, remove them from OPS, and
626 return true. Otherwise, return false. */
627
628 static bool
629 eliminate_duplicate_pair (enum tree_code opcode,
630 vec<operand_entry_t> *ops,
631 bool *all_done,
632 unsigned int i,
633 operand_entry_t curr,
634 operand_entry_t last)
635 {
636
637 /* If we have two of the same op, and the opcode is & |, min, or max,
638 we can eliminate one of them.
639 If we have two of the same op, and the opcode is ^, we can
640 eliminate both of them. */
641
642 if (last && last->op == curr->op)
643 {
644 switch (opcode)
645 {
646 case MAX_EXPR:
647 case MIN_EXPR:
648 case BIT_IOR_EXPR:
649 case BIT_AND_EXPR:
650 if (dump_file && (dump_flags & TDF_DETAILS))
651 {
652 fprintf (dump_file, "Equivalence: ");
653 print_generic_expr (dump_file, curr->op, 0);
654 fprintf (dump_file, " [&|minmax] ");
655 print_generic_expr (dump_file, last->op, 0);
656 fprintf (dump_file, " -> ");
657 print_generic_stmt (dump_file, last->op, 0);
658 }
659
660 ops->ordered_remove (i);
661 reassociate_stats.ops_eliminated ++;
662
663 return true;
664
665 case BIT_XOR_EXPR:
666 if (dump_file && (dump_flags & TDF_DETAILS))
667 {
668 fprintf (dump_file, "Equivalence: ");
669 print_generic_expr (dump_file, curr->op, 0);
670 fprintf (dump_file, " ^ ");
671 print_generic_expr (dump_file, last->op, 0);
672 fprintf (dump_file, " -> nothing\n");
673 }
674
675 reassociate_stats.ops_eliminated += 2;
676
677 if (ops->length () == 2)
678 {
679 ops->create (0);
680 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
681 *all_done = true;
682 }
683 else
684 {
685 ops->ordered_remove (i-1);
686 ops->ordered_remove (i-1);
687 }
688
689 return true;
690
691 default:
692 break;
693 }
694 }
695 return false;
696 }
697
698 static vec<tree> plus_negates;
699
700 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
701 expression, look in OPS for a corresponding positive operation to cancel
702 it out. If we find one, remove the other from OPS, replace
703 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
704 return false. */
705
706 static bool
707 eliminate_plus_minus_pair (enum tree_code opcode,
708 vec<operand_entry_t> *ops,
709 unsigned int currindex,
710 operand_entry_t curr)
711 {
712 tree negateop;
713 tree notop;
714 unsigned int i;
715 operand_entry_t oe;
716
717 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
718 return false;
719
720 negateop = get_unary_op (curr->op, NEGATE_EXPR);
721 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
722 if (negateop == NULL_TREE && notop == NULL_TREE)
723 return false;
724
725 /* Any non-negated version will have a rank that is one less than
726 the current rank. So once we hit those ranks, if we don't find
727 one, we can stop. */
728
729 for (i = currindex + 1;
730 ops->iterate (i, &oe)
731 && oe->rank >= curr->rank - 1 ;
732 i++)
733 {
734 if (oe->op == negateop)
735 {
736
737 if (dump_file && (dump_flags & TDF_DETAILS))
738 {
739 fprintf (dump_file, "Equivalence: ");
740 print_generic_expr (dump_file, negateop, 0);
741 fprintf (dump_file, " + -");
742 print_generic_expr (dump_file, oe->op, 0);
743 fprintf (dump_file, " -> 0\n");
744 }
745
746 ops->ordered_remove (i);
747 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
748 ops->ordered_remove (currindex);
749 reassociate_stats.ops_eliminated ++;
750
751 return true;
752 }
753 else if (oe->op == notop)
754 {
755 tree op_type = TREE_TYPE (oe->op);
756
757 if (dump_file && (dump_flags & TDF_DETAILS))
758 {
759 fprintf (dump_file, "Equivalence: ");
760 print_generic_expr (dump_file, notop, 0);
761 fprintf (dump_file, " + ~");
762 print_generic_expr (dump_file, oe->op, 0);
763 fprintf (dump_file, " -> -1\n");
764 }
765
766 ops->ordered_remove (i);
767 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
768 ops->ordered_remove (currindex);
769 reassociate_stats.ops_eliminated ++;
770
771 return true;
772 }
773 }
774
775 /* CURR->OP is a negate expr in a plus expr: save it for later
776 inspection in repropagate_negates(). */
777 if (negateop != NULL_TREE)
778 plus_negates.safe_push (curr->op);
779
780 return false;
781 }
782
783 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
784 bitwise not expression, look in OPS for a corresponding operand to
785 cancel it out. If we find one, remove the other from OPS, replace
786 OPS[CURRINDEX] with 0, and return true. Otherwise, return
787 false. */
788
789 static bool
790 eliminate_not_pairs (enum tree_code opcode,
791 vec<operand_entry_t> *ops,
792 unsigned int currindex,
793 operand_entry_t curr)
794 {
795 tree notop;
796 unsigned int i;
797 operand_entry_t oe;
798
799 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
800 || TREE_CODE (curr->op) != SSA_NAME)
801 return false;
802
803 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
804 if (notop == NULL_TREE)
805 return false;
806
807 /* Any non-not version will have a rank that is one less than
808 the current rank. So once we hit those ranks, if we don't find
809 one, we can stop. */
810
811 for (i = currindex + 1;
812 ops->iterate (i, &oe)
813 && oe->rank >= curr->rank - 1;
814 i++)
815 {
816 if (oe->op == notop)
817 {
818 if (dump_file && (dump_flags & TDF_DETAILS))
819 {
820 fprintf (dump_file, "Equivalence: ");
821 print_generic_expr (dump_file, notop, 0);
822 if (opcode == BIT_AND_EXPR)
823 fprintf (dump_file, " & ~");
824 else if (opcode == BIT_IOR_EXPR)
825 fprintf (dump_file, " | ~");
826 print_generic_expr (dump_file, oe->op, 0);
827 if (opcode == BIT_AND_EXPR)
828 fprintf (dump_file, " -> 0\n");
829 else if (opcode == BIT_IOR_EXPR)
830 fprintf (dump_file, " -> -1\n");
831 }
832
833 if (opcode == BIT_AND_EXPR)
834 oe->op = build_zero_cst (TREE_TYPE (oe->op));
835 else if (opcode == BIT_IOR_EXPR)
836 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
837
838 reassociate_stats.ops_eliminated += ops->length () - 1;
839 ops->truncate (0);
840 ops->quick_push (oe);
841 return true;
842 }
843 }
844
845 return false;
846 }
847
848 /* Use constant value that may be present in OPS to try to eliminate
849 operands. Note that this function is only really used when we've
850 eliminated ops for other reasons, or merged constants. Across
851 single statements, fold already does all of this, plus more. There
852 is little point in duplicating logic, so I've only included the
853 identities that I could ever construct testcases to trigger. */
854
855 static void
856 eliminate_using_constants (enum tree_code opcode,
857 vec<operand_entry_t> *ops)
858 {
859 operand_entry_t oelast = ops->last ();
860 tree type = TREE_TYPE (oelast->op);
861
862 if (oelast->rank == 0
863 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
864 {
865 switch (opcode)
866 {
867 case BIT_AND_EXPR:
868 if (integer_zerop (oelast->op))
869 {
870 if (ops->length () != 1)
871 {
872 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file, "Found & 0, removing all other ops\n");
874
875 reassociate_stats.ops_eliminated += ops->length () - 1;
876
877 ops->truncate (0);
878 ops->quick_push (oelast);
879 return;
880 }
881 }
882 else if (integer_all_onesp (oelast->op))
883 {
884 if (ops->length () != 1)
885 {
886 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "Found & -1, removing\n");
888 ops->pop ();
889 reassociate_stats.ops_eliminated++;
890 }
891 }
892 break;
893 case BIT_IOR_EXPR:
894 if (integer_all_onesp (oelast->op))
895 {
896 if (ops->length () != 1)
897 {
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found | -1, removing all other ops\n");
900
901 reassociate_stats.ops_eliminated += ops->length () - 1;
902
903 ops->truncate (0);
904 ops->quick_push (oelast);
905 return;
906 }
907 }
908 else if (integer_zerop (oelast->op))
909 {
910 if (ops->length () != 1)
911 {
912 if (dump_file && (dump_flags & TDF_DETAILS))
913 fprintf (dump_file, "Found | 0, removing\n");
914 ops->pop ();
915 reassociate_stats.ops_eliminated++;
916 }
917 }
918 break;
919 case MULT_EXPR:
920 if (integer_zerop (oelast->op)
921 || (FLOAT_TYPE_P (type)
922 && !HONOR_NANS (TYPE_MODE (type))
923 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
924 && real_zerop (oelast->op)))
925 {
926 if (ops->length () != 1)
927 {
928 if (dump_file && (dump_flags & TDF_DETAILS))
929 fprintf (dump_file, "Found * 0, removing all other ops\n");
930
931 reassociate_stats.ops_eliminated += ops->length () - 1;
932 ops->truncate (1);
933 ops->quick_push (oelast);
934 return;
935 }
936 }
937 else if (integer_onep (oelast->op)
938 || (FLOAT_TYPE_P (type)
939 && !HONOR_SNANS (TYPE_MODE (type))
940 && real_onep (oelast->op)))
941 {
942 if (ops->length () != 1)
943 {
944 if (dump_file && (dump_flags & TDF_DETAILS))
945 fprintf (dump_file, "Found * 1, removing\n");
946 ops->pop ();
947 reassociate_stats.ops_eliminated++;
948 return;
949 }
950 }
951 break;
952 case BIT_XOR_EXPR:
953 case PLUS_EXPR:
954 case MINUS_EXPR:
955 if (integer_zerop (oelast->op)
956 || (FLOAT_TYPE_P (type)
957 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
958 && fold_real_zero_addition_p (type, oelast->op,
959 opcode == MINUS_EXPR)))
960 {
961 if (ops->length () != 1)
962 {
963 if (dump_file && (dump_flags & TDF_DETAILS))
964 fprintf (dump_file, "Found [|^+] 0, removing\n");
965 ops->pop ();
966 reassociate_stats.ops_eliminated++;
967 return;
968 }
969 }
970 break;
971 default:
972 break;
973 }
974 }
975 }
976
977
978 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
979 bool, bool);
980
981 /* Structure for tracking and counting operands. */
982 typedef struct oecount_s {
983 int cnt;
984 int id;
985 enum tree_code oecode;
986 tree op;
987 } oecount;
988
989
990 /* The heap for the oecount hashtable and the sorted list of operands. */
991 static vec<oecount> cvec;
992
993
994 /* Oecount hashtable helpers. */
995
996 struct oecount_hasher : typed_noop_remove <void>
997 {
998 /* Note that this hash table stores integers, not pointers.
999 So, observe the casting in the member functions. */
1000 typedef void value_type;
1001 typedef void compare_type;
1002 static inline hashval_t hash (const value_type *);
1003 static inline bool equal (const value_type *, const compare_type *);
1004 };
1005
1006 /* Hash function for oecount. */
1007
1008 inline hashval_t
1009 oecount_hasher::hash (const value_type *p)
1010 {
1011 const oecount *c = &cvec[(size_t)p - 42];
1012 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1013 }
1014
1015 /* Comparison function for oecount. */
1016
1017 inline bool
1018 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
1019 {
1020 const oecount *c1 = &cvec[(size_t)p1 - 42];
1021 const oecount *c2 = &cvec[(size_t)p2 - 42];
1022 return (c1->oecode == c2->oecode
1023 && c1->op == c2->op);
1024 }
1025
1026 /* Comparison function for qsort sorting oecount elements by count. */
1027
1028 static int
1029 oecount_cmp (const void *p1, const void *p2)
1030 {
1031 const oecount *c1 = (const oecount *)p1;
1032 const oecount *c2 = (const oecount *)p2;
1033 if (c1->cnt != c2->cnt)
1034 return c1->cnt - c2->cnt;
1035 else
1036 /* If counts are identical, use unique IDs to stabilize qsort. */
1037 return c1->id - c2->id;
1038 }
1039
1040 /* Return TRUE iff STMT represents a builtin call that raises OP
1041 to some exponent. */
1042
1043 static bool
1044 stmt_is_power_of_op (gimple stmt, tree op)
1045 {
1046 tree fndecl;
1047
1048 if (!is_gimple_call (stmt))
1049 return false;
1050
1051 fndecl = gimple_call_fndecl (stmt);
1052
1053 if (!fndecl
1054 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1055 return false;
1056
1057 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1058 {
1059 CASE_FLT_FN (BUILT_IN_POW):
1060 CASE_FLT_FN (BUILT_IN_POWI):
1061 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1062
1063 default:
1064 return false;
1065 }
1066 }
1067
1068 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1069 in place and return the result. Assumes that stmt_is_power_of_op
1070 was previously called for STMT and returned TRUE. */
1071
1072 static HOST_WIDE_INT
1073 decrement_power (gimple stmt)
1074 {
1075 REAL_VALUE_TYPE c, cint;
1076 HOST_WIDE_INT power;
1077 tree arg1;
1078
1079 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1080 {
1081 CASE_FLT_FN (BUILT_IN_POW):
1082 arg1 = gimple_call_arg (stmt, 1);
1083 c = TREE_REAL_CST (arg1);
1084 power = real_to_integer (&c) - 1;
1085 real_from_integer (&cint, VOIDmode, power, 0, 0);
1086 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1087 return power;
1088
1089 CASE_FLT_FN (BUILT_IN_POWI):
1090 arg1 = gimple_call_arg (stmt, 1);
1091 power = TREE_INT_CST_LOW (arg1) - 1;
1092 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1093 return power;
1094
1095 default:
1096 gcc_unreachable ();
1097 }
1098 }
1099
1100 /* Find the single immediate use of STMT's LHS, and replace it
1101 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1102 replace *DEF with OP as well. */
1103
1104 static void
1105 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1106 {
1107 tree lhs;
1108 gimple use_stmt;
1109 use_operand_p use;
1110 gimple_stmt_iterator gsi;
1111
1112 if (is_gimple_call (stmt))
1113 lhs = gimple_call_lhs (stmt);
1114 else
1115 lhs = gimple_assign_lhs (stmt);
1116
1117 gcc_assert (has_single_use (lhs));
1118 single_imm_use (lhs, &use, &use_stmt);
1119 if (lhs == *def)
1120 *def = op;
1121 SET_USE (use, op);
1122 if (TREE_CODE (op) != SSA_NAME)
1123 update_stmt (use_stmt);
1124 gsi = gsi_for_stmt (stmt);
1125 unlink_stmt_vdef (stmt);
1126 gsi_remove (&gsi, true);
1127 release_defs (stmt);
1128 }
1129
1130 /* Walks the linear chain with result *DEF searching for an operation
1131 with operand OP and code OPCODE removing that from the chain. *DEF
1132 is updated if there is only one operand but no operation left. */
1133
1134 static void
1135 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1136 {
1137 gimple stmt = SSA_NAME_DEF_STMT (*def);
1138
1139 do
1140 {
1141 tree name;
1142
1143 if (opcode == MULT_EXPR
1144 && stmt_is_power_of_op (stmt, op))
1145 {
1146 if (decrement_power (stmt) == 1)
1147 propagate_op_to_single_use (op, stmt, def);
1148 return;
1149 }
1150
1151 name = gimple_assign_rhs1 (stmt);
1152
1153 /* If this is the operation we look for and one of the operands
1154 is ours simply propagate the other operand into the stmts
1155 single use. */
1156 if (gimple_assign_rhs_code (stmt) == opcode
1157 && (name == op
1158 || gimple_assign_rhs2 (stmt) == op))
1159 {
1160 if (name == op)
1161 name = gimple_assign_rhs2 (stmt);
1162 propagate_op_to_single_use (name, stmt, def);
1163 return;
1164 }
1165
1166 /* We might have a multiply of two __builtin_pow* calls, and
1167 the operand might be hiding in the rightmost one. */
1168 if (opcode == MULT_EXPR
1169 && gimple_assign_rhs_code (stmt) == opcode
1170 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1171 && has_single_use (gimple_assign_rhs2 (stmt)))
1172 {
1173 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1174 if (stmt_is_power_of_op (stmt2, op))
1175 {
1176 if (decrement_power (stmt2) == 1)
1177 propagate_op_to_single_use (op, stmt2, def);
1178 return;
1179 }
1180 }
1181
1182 /* Continue walking the chain. */
1183 gcc_assert (name != op
1184 && TREE_CODE (name) == SSA_NAME);
1185 stmt = SSA_NAME_DEF_STMT (name);
1186 }
1187 while (1);
1188 }
1189
1190 /* Returns true if statement S1 dominates statement S2. Like
1191 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1192
1193 static bool
1194 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1195 {
1196 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1197
1198 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1199 SSA_NAME. Assume it lives at the beginning of function and
1200 thus dominates everything. */
1201 if (!bb1 || s1 == s2)
1202 return true;
1203
1204 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1205 if (!bb2)
1206 return false;
1207
1208 if (bb1 == bb2)
1209 {
1210 /* PHIs in the same basic block are assumed to be
1211 executed all in parallel, if only one stmt is a PHI,
1212 it dominates the other stmt in the same basic block. */
1213 if (gimple_code (s1) == GIMPLE_PHI)
1214 return true;
1215
1216 if (gimple_code (s2) == GIMPLE_PHI)
1217 return false;
1218
1219 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1220
1221 if (gimple_uid (s1) < gimple_uid (s2))
1222 return true;
1223
1224 if (gimple_uid (s1) > gimple_uid (s2))
1225 return false;
1226
1227 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1228 unsigned int uid = gimple_uid (s1);
1229 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1230 {
1231 gimple s = gsi_stmt (gsi);
1232 if (gimple_uid (s) != uid)
1233 break;
1234 if (s == s2)
1235 return true;
1236 }
1237
1238 return false;
1239 }
1240
1241 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1242 }
1243
1244 /* Insert STMT after INSERT_POINT. */
1245
1246 static void
1247 insert_stmt_after (gimple stmt, gimple insert_point)
1248 {
1249 gimple_stmt_iterator gsi;
1250 basic_block bb;
1251
1252 if (gimple_code (insert_point) == GIMPLE_PHI)
1253 bb = gimple_bb (insert_point);
1254 else if (!stmt_ends_bb_p (insert_point))
1255 {
1256 gsi = gsi_for_stmt (insert_point);
1257 gimple_set_uid (stmt, gimple_uid (insert_point));
1258 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1259 return;
1260 }
1261 else
1262 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1263 thus if it must end a basic block, it should be a call that can
1264 throw, or some assignment that can throw. If it throws, the LHS
1265 of it will not be initialized though, so only valid places using
1266 the SSA_NAME should be dominated by the fallthru edge. */
1267 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1268 gsi = gsi_after_labels (bb);
1269 if (gsi_end_p (gsi))
1270 {
1271 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1272 gimple_set_uid (stmt,
1273 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1274 }
1275 else
1276 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1277 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1278 }
1279
1280 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1281 the result. Places the statement after the definition of either
1282 OP1 or OP2. Returns the new statement. */
1283
1284 static gimple
1285 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1286 {
1287 gimple op1def = NULL, op2def = NULL;
1288 gimple_stmt_iterator gsi;
1289 tree op;
1290 gimple sum;
1291
1292 /* Create the addition statement. */
1293 op = make_ssa_name (type, NULL);
1294 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1295
1296 /* Find an insertion place and insert. */
1297 if (TREE_CODE (op1) == SSA_NAME)
1298 op1def = SSA_NAME_DEF_STMT (op1);
1299 if (TREE_CODE (op2) == SSA_NAME)
1300 op2def = SSA_NAME_DEF_STMT (op2);
1301 if ((!op1def || gimple_nop_p (op1def))
1302 && (!op2def || gimple_nop_p (op2def)))
1303 {
1304 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1305 if (gsi_end_p (gsi))
1306 {
1307 gimple_stmt_iterator gsi2
1308 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1309 gimple_set_uid (sum,
1310 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1311 }
1312 else
1313 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1314 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1315 }
1316 else
1317 {
1318 gimple insert_point;
1319 if ((!op1def || gimple_nop_p (op1def))
1320 || (op2def && !gimple_nop_p (op2def)
1321 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1322 insert_point = op2def;
1323 else
1324 insert_point = op1def;
1325 insert_stmt_after (sum, insert_point);
1326 }
1327 update_stmt (sum);
1328
1329 return sum;
1330 }
1331
1332 /* Perform un-distribution of divisions and multiplications.
1333 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1334 to (A + B) / X for real X.
1335
1336 The algorithm is organized as follows.
1337
1338 - First we walk the addition chain *OPS looking for summands that
1339 are defined by a multiplication or a real division. This results
1340 in the candidates bitmap with relevant indices into *OPS.
1341
1342 - Second we build the chains of multiplications or divisions for
1343 these candidates, counting the number of occurrences of (operand, code)
1344 pairs in all of the candidates chains.
1345
1346 - Third we sort the (operand, code) pairs by number of occurrence and
1347 process them starting with the pair with the most uses.
1348
1349 * For each such pair we walk the candidates again to build a
1350 second candidate bitmap noting all multiplication/division chains
1351 that have at least one occurrence of (operand, code).
1352
1353 * We build an alternate addition chain only covering these
1354 candidates with one (operand, code) operation removed from their
1355 multiplication/division chain.
1356
1357 * The first candidate gets replaced by the alternate addition chain
1358 multiplied/divided by the operand.
1359
1360 * All candidate chains get disabled for further processing and
1361 processing of (operand, code) pairs continues.
1362
1363 The alternate addition chains built are re-processed by the main
1364 reassociation algorithm which allows optimizing a * x * y + b * y * x
1365 to (a + b ) * x * y in one invocation of the reassociation pass. */
1366
1367 static bool
1368 undistribute_ops_list (enum tree_code opcode,
1369 vec<operand_entry_t> *ops, struct loop *loop)
1370 {
1371 unsigned int length = ops->length ();
1372 operand_entry_t oe1;
1373 unsigned i, j;
1374 sbitmap candidates, candidates2;
1375 unsigned nr_candidates, nr_candidates2;
1376 sbitmap_iterator sbi0;
1377 vec<operand_entry_t> *subops;
1378 hash_table <oecount_hasher> ctable;
1379 bool changed = false;
1380 int next_oecount_id = 0;
1381
1382 if (length <= 1
1383 || opcode != PLUS_EXPR)
1384 return false;
1385
1386 /* Build a list of candidates to process. */
1387 candidates = sbitmap_alloc (length);
1388 bitmap_clear (candidates);
1389 nr_candidates = 0;
1390 FOR_EACH_VEC_ELT (*ops, i, oe1)
1391 {
1392 enum tree_code dcode;
1393 gimple oe1def;
1394
1395 if (TREE_CODE (oe1->op) != SSA_NAME)
1396 continue;
1397 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1398 if (!is_gimple_assign (oe1def))
1399 continue;
1400 dcode = gimple_assign_rhs_code (oe1def);
1401 if ((dcode != MULT_EXPR
1402 && dcode != RDIV_EXPR)
1403 || !is_reassociable_op (oe1def, dcode, loop))
1404 continue;
1405
1406 bitmap_set_bit (candidates, i);
1407 nr_candidates++;
1408 }
1409
1410 if (nr_candidates < 2)
1411 {
1412 sbitmap_free (candidates);
1413 return false;
1414 }
1415
1416 if (dump_file && (dump_flags & TDF_DETAILS))
1417 {
1418 fprintf (dump_file, "searching for un-distribute opportunities ");
1419 print_generic_expr (dump_file,
1420 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1421 fprintf (dump_file, " %d\n", nr_candidates);
1422 }
1423
1424 /* Build linearized sub-operand lists and the counting table. */
1425 cvec.create (0);
1426 ctable.create (15);
1427 /* ??? Macro arguments cannot have multi-argument template types in
1428 them. This typedef is needed to workaround that limitation. */
1429 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1430 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1431 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1432 {
1433 gimple oedef;
1434 enum tree_code oecode;
1435 unsigned j;
1436
1437 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1438 oecode = gimple_assign_rhs_code (oedef);
1439 linearize_expr_tree (&subops[i], oedef,
1440 associative_tree_code (oecode), false);
1441
1442 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1443 {
1444 oecount c;
1445 void **slot;
1446 size_t idx;
1447 c.oecode = oecode;
1448 c.cnt = 1;
1449 c.id = next_oecount_id++;
1450 c.op = oe1->op;
1451 cvec.safe_push (c);
1452 idx = cvec.length () + 41;
1453 slot = ctable.find_slot ((void *)idx, INSERT);
1454 if (!*slot)
1455 {
1456 *slot = (void *)idx;
1457 }
1458 else
1459 {
1460 cvec.pop ();
1461 cvec[(size_t)*slot - 42].cnt++;
1462 }
1463 }
1464 }
1465 ctable.dispose ();
1466
1467 /* Sort the counting table. */
1468 cvec.qsort (oecount_cmp);
1469
1470 if (dump_file && (dump_flags & TDF_DETAILS))
1471 {
1472 oecount *c;
1473 fprintf (dump_file, "Candidates:\n");
1474 FOR_EACH_VEC_ELT (cvec, j, c)
1475 {
1476 fprintf (dump_file, " %u %s: ", c->cnt,
1477 c->oecode == MULT_EXPR
1478 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1479 print_generic_expr (dump_file, c->op, 0);
1480 fprintf (dump_file, "\n");
1481 }
1482 }
1483
1484 /* Process the (operand, code) pairs in order of most occurrence. */
1485 candidates2 = sbitmap_alloc (length);
1486 while (!cvec.is_empty ())
1487 {
1488 oecount *c = &cvec.last ();
1489 if (c->cnt < 2)
1490 break;
1491
1492 /* Now collect the operands in the outer chain that contain
1493 the common operand in their inner chain. */
1494 bitmap_clear (candidates2);
1495 nr_candidates2 = 0;
1496 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1497 {
1498 gimple oedef;
1499 enum tree_code oecode;
1500 unsigned j;
1501 tree op = (*ops)[i]->op;
1502
1503 /* If we undistributed in this chain already this may be
1504 a constant. */
1505 if (TREE_CODE (op) != SSA_NAME)
1506 continue;
1507
1508 oedef = SSA_NAME_DEF_STMT (op);
1509 oecode = gimple_assign_rhs_code (oedef);
1510 if (oecode != c->oecode)
1511 continue;
1512
1513 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1514 {
1515 if (oe1->op == c->op)
1516 {
1517 bitmap_set_bit (candidates2, i);
1518 ++nr_candidates2;
1519 break;
1520 }
1521 }
1522 }
1523
1524 if (nr_candidates2 >= 2)
1525 {
1526 operand_entry_t oe1, oe2;
1527 gimple prod;
1528 int first = bitmap_first_set_bit (candidates2);
1529
1530 /* Build the new addition chain. */
1531 oe1 = (*ops)[first];
1532 if (dump_file && (dump_flags & TDF_DETAILS))
1533 {
1534 fprintf (dump_file, "Building (");
1535 print_generic_expr (dump_file, oe1->op, 0);
1536 }
1537 zero_one_operation (&oe1->op, c->oecode, c->op);
1538 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1539 {
1540 gimple sum;
1541 oe2 = (*ops)[i];
1542 if (dump_file && (dump_flags & TDF_DETAILS))
1543 {
1544 fprintf (dump_file, " + ");
1545 print_generic_expr (dump_file, oe2->op, 0);
1546 }
1547 zero_one_operation (&oe2->op, c->oecode, c->op);
1548 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1549 oe1->op, oe2->op, opcode);
1550 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1551 oe2->rank = 0;
1552 oe1->op = gimple_get_lhs (sum);
1553 }
1554
1555 /* Apply the multiplication/division. */
1556 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1557 oe1->op, c->op, c->oecode);
1558 if (dump_file && (dump_flags & TDF_DETAILS))
1559 {
1560 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1561 print_generic_expr (dump_file, c->op, 0);
1562 fprintf (dump_file, "\n");
1563 }
1564
1565 /* Record it in the addition chain and disable further
1566 undistribution with this op. */
1567 oe1->op = gimple_assign_lhs (prod);
1568 oe1->rank = get_rank (oe1->op);
1569 subops[first].release ();
1570
1571 changed = true;
1572 }
1573
1574 cvec.pop ();
1575 }
1576
1577 for (i = 0; i < ops->length (); ++i)
1578 subops[i].release ();
1579 free (subops);
1580 cvec.release ();
1581 sbitmap_free (candidates);
1582 sbitmap_free (candidates2);
1583
1584 return changed;
1585 }
1586
1587 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1588 expression, examine the other OPS to see if any of them are comparisons
1589 of the same values, which we may be able to combine or eliminate.
1590 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1591
1592 static bool
1593 eliminate_redundant_comparison (enum tree_code opcode,
1594 vec<operand_entry_t> *ops,
1595 unsigned int currindex,
1596 operand_entry_t curr)
1597 {
1598 tree op1, op2;
1599 enum tree_code lcode, rcode;
1600 gimple def1, def2;
1601 int i;
1602 operand_entry_t oe;
1603
1604 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1605 return false;
1606
1607 /* Check that CURR is a comparison. */
1608 if (TREE_CODE (curr->op) != SSA_NAME)
1609 return false;
1610 def1 = SSA_NAME_DEF_STMT (curr->op);
1611 if (!is_gimple_assign (def1))
1612 return false;
1613 lcode = gimple_assign_rhs_code (def1);
1614 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1615 return false;
1616 op1 = gimple_assign_rhs1 (def1);
1617 op2 = gimple_assign_rhs2 (def1);
1618
1619 /* Now look for a similar comparison in the remaining OPS. */
1620 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1621 {
1622 tree t;
1623
1624 if (TREE_CODE (oe->op) != SSA_NAME)
1625 continue;
1626 def2 = SSA_NAME_DEF_STMT (oe->op);
1627 if (!is_gimple_assign (def2))
1628 continue;
1629 rcode = gimple_assign_rhs_code (def2);
1630 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1631 continue;
1632
1633 /* If we got here, we have a match. See if we can combine the
1634 two comparisons. */
1635 if (opcode == BIT_IOR_EXPR)
1636 t = maybe_fold_or_comparisons (lcode, op1, op2,
1637 rcode, gimple_assign_rhs1 (def2),
1638 gimple_assign_rhs2 (def2));
1639 else
1640 t = maybe_fold_and_comparisons (lcode, op1, op2,
1641 rcode, gimple_assign_rhs1 (def2),
1642 gimple_assign_rhs2 (def2));
1643 if (!t)
1644 continue;
1645
1646 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1647 always give us a boolean_type_node value back. If the original
1648 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1649 we need to convert. */
1650 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1651 t = fold_convert (TREE_TYPE (curr->op), t);
1652
1653 if (TREE_CODE (t) != INTEGER_CST
1654 && !operand_equal_p (t, curr->op, 0))
1655 {
1656 enum tree_code subcode;
1657 tree newop1, newop2;
1658 if (!COMPARISON_CLASS_P (t))
1659 continue;
1660 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1661 STRIP_USELESS_TYPE_CONVERSION (newop1);
1662 STRIP_USELESS_TYPE_CONVERSION (newop2);
1663 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1664 continue;
1665 }
1666
1667 if (dump_file && (dump_flags & TDF_DETAILS))
1668 {
1669 fprintf (dump_file, "Equivalence: ");
1670 print_generic_expr (dump_file, curr->op, 0);
1671 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1672 print_generic_expr (dump_file, oe->op, 0);
1673 fprintf (dump_file, " -> ");
1674 print_generic_expr (dump_file, t, 0);
1675 fprintf (dump_file, "\n");
1676 }
1677
1678 /* Now we can delete oe, as it has been subsumed by the new combined
1679 expression t. */
1680 ops->ordered_remove (i);
1681 reassociate_stats.ops_eliminated ++;
1682
1683 /* If t is the same as curr->op, we're done. Otherwise we must
1684 replace curr->op with t. Special case is if we got a constant
1685 back, in which case we add it to the end instead of in place of
1686 the current entry. */
1687 if (TREE_CODE (t) == INTEGER_CST)
1688 {
1689 ops->ordered_remove (currindex);
1690 add_to_ops_vec (ops, t);
1691 }
1692 else if (!operand_equal_p (t, curr->op, 0))
1693 {
1694 gimple sum;
1695 enum tree_code subcode;
1696 tree newop1;
1697 tree newop2;
1698 gcc_assert (COMPARISON_CLASS_P (t));
1699 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1700 STRIP_USELESS_TYPE_CONVERSION (newop1);
1701 STRIP_USELESS_TYPE_CONVERSION (newop2);
1702 gcc_checking_assert (is_gimple_val (newop1)
1703 && is_gimple_val (newop2));
1704 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1705 curr->op = gimple_get_lhs (sum);
1706 }
1707 return true;
1708 }
1709
1710 return false;
1711 }
1712
1713 /* Perform various identities and other optimizations on the list of
1714 operand entries, stored in OPS. The tree code for the binary
1715 operation between all the operands is OPCODE. */
1716
1717 static void
1718 optimize_ops_list (enum tree_code opcode,
1719 vec<operand_entry_t> *ops)
1720 {
1721 unsigned int length = ops->length ();
1722 unsigned int i;
1723 operand_entry_t oe;
1724 operand_entry_t oelast = NULL;
1725 bool iterate = false;
1726
1727 if (length == 1)
1728 return;
1729
1730 oelast = ops->last ();
1731
1732 /* If the last two are constants, pop the constants off, merge them
1733 and try the next two. */
1734 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1735 {
1736 operand_entry_t oelm1 = (*ops)[length - 2];
1737
1738 if (oelm1->rank == 0
1739 && is_gimple_min_invariant (oelm1->op)
1740 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1741 TREE_TYPE (oelast->op)))
1742 {
1743 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1744 oelm1->op, oelast->op);
1745
1746 if (folded && is_gimple_min_invariant (folded))
1747 {
1748 if (dump_file && (dump_flags & TDF_DETAILS))
1749 fprintf (dump_file, "Merging constants\n");
1750
1751 ops->pop ();
1752 ops->pop ();
1753
1754 add_to_ops_vec (ops, folded);
1755 reassociate_stats.constants_eliminated++;
1756
1757 optimize_ops_list (opcode, ops);
1758 return;
1759 }
1760 }
1761 }
1762
1763 eliminate_using_constants (opcode, ops);
1764 oelast = NULL;
1765
1766 for (i = 0; ops->iterate (i, &oe);)
1767 {
1768 bool done = false;
1769
1770 if (eliminate_not_pairs (opcode, ops, i, oe))
1771 return;
1772 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1773 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1774 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1775 {
1776 if (done)
1777 return;
1778 iterate = true;
1779 oelast = NULL;
1780 continue;
1781 }
1782 oelast = oe;
1783 i++;
1784 }
1785
1786 length = ops->length ();
1787 oelast = ops->last ();
1788
1789 if (iterate)
1790 optimize_ops_list (opcode, ops);
1791 }
1792
1793 /* The following functions are subroutines to optimize_range_tests and allow
1794 it to try to change a logical combination of comparisons into a range
1795 test.
1796
1797 For example, both
1798 X == 2 || X == 5 || X == 3 || X == 4
1799 and
1800 X >= 2 && X <= 5
1801 are converted to
1802 (unsigned) (X - 2) <= 3
1803
1804 For more information see comments above fold_test_range in fold-const.c,
1805 this implementation is for GIMPLE. */
1806
1807 struct range_entry
1808 {
1809 tree exp;
1810 tree low;
1811 tree high;
1812 bool in_p;
1813 bool strict_overflow_p;
1814 unsigned int idx, next;
1815 };
1816
1817 /* This is similar to make_range in fold-const.c, but on top of
1818 GIMPLE instead of trees. If EXP is non-NULL, it should be
1819 an SSA_NAME and STMT argument is ignored, otherwise STMT
1820 argument should be a GIMPLE_COND. */
1821
1822 static void
1823 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1824 {
1825 int in_p;
1826 tree low, high;
1827 bool is_bool, strict_overflow_p;
1828
1829 r->exp = NULL_TREE;
1830 r->in_p = false;
1831 r->strict_overflow_p = false;
1832 r->low = NULL_TREE;
1833 r->high = NULL_TREE;
1834 if (exp != NULL_TREE
1835 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1836 return;
1837
1838 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1839 and see if we can refine the range. Some of the cases below may not
1840 happen, but it doesn't seem worth worrying about this. We "continue"
1841 the outer loop when we've changed something; otherwise we "break"
1842 the switch, which will "break" the while. */
1843 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1844 high = low;
1845 in_p = 0;
1846 strict_overflow_p = false;
1847 is_bool = false;
1848 if (exp == NULL_TREE)
1849 is_bool = true;
1850 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1851 {
1852 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1853 is_bool = true;
1854 else
1855 return;
1856 }
1857 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1858 is_bool = true;
1859
1860 while (1)
1861 {
1862 enum tree_code code;
1863 tree arg0, arg1, exp_type;
1864 tree nexp;
1865 location_t loc;
1866
1867 if (exp != NULL_TREE)
1868 {
1869 if (TREE_CODE (exp) != SSA_NAME
1870 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1871 break;
1872
1873 stmt = SSA_NAME_DEF_STMT (exp);
1874 if (!is_gimple_assign (stmt))
1875 break;
1876
1877 code = gimple_assign_rhs_code (stmt);
1878 arg0 = gimple_assign_rhs1 (stmt);
1879 arg1 = gimple_assign_rhs2 (stmt);
1880 exp_type = TREE_TYPE (exp);
1881 }
1882 else
1883 {
1884 code = gimple_cond_code (stmt);
1885 arg0 = gimple_cond_lhs (stmt);
1886 arg1 = gimple_cond_rhs (stmt);
1887 exp_type = boolean_type_node;
1888 }
1889
1890 if (TREE_CODE (arg0) != SSA_NAME)
1891 break;
1892 loc = gimple_location (stmt);
1893 switch (code)
1894 {
1895 case BIT_NOT_EXPR:
1896 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1897 /* Ensure the range is either +[-,0], +[0,0],
1898 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1899 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1900 or similar expression of unconditional true or
1901 false, it should not be negated. */
1902 && ((high && integer_zerop (high))
1903 || (low && integer_onep (low))))
1904 {
1905 in_p = !in_p;
1906 exp = arg0;
1907 continue;
1908 }
1909 break;
1910 case SSA_NAME:
1911 exp = arg0;
1912 continue;
1913 CASE_CONVERT:
1914 if (is_bool)
1915 goto do_default;
1916 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1917 {
1918 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1919 is_bool = true;
1920 else
1921 return;
1922 }
1923 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1924 is_bool = true;
1925 goto do_default;
1926 case EQ_EXPR:
1927 case NE_EXPR:
1928 case LT_EXPR:
1929 case LE_EXPR:
1930 case GE_EXPR:
1931 case GT_EXPR:
1932 is_bool = true;
1933 /* FALLTHRU */
1934 default:
1935 if (!is_bool)
1936 return;
1937 do_default:
1938 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1939 &low, &high, &in_p,
1940 &strict_overflow_p);
1941 if (nexp != NULL_TREE)
1942 {
1943 exp = nexp;
1944 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1945 continue;
1946 }
1947 break;
1948 }
1949 break;
1950 }
1951 if (is_bool)
1952 {
1953 r->exp = exp;
1954 r->in_p = in_p;
1955 r->low = low;
1956 r->high = high;
1957 r->strict_overflow_p = strict_overflow_p;
1958 }
1959 }
1960
1961 /* Comparison function for qsort. Sort entries
1962 without SSA_NAME exp first, then with SSA_NAMEs sorted
1963 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1964 by increasing ->low and if ->low is the same, by increasing
1965 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1966 maximum. */
1967
1968 static int
1969 range_entry_cmp (const void *a, const void *b)
1970 {
1971 const struct range_entry *p = (const struct range_entry *) a;
1972 const struct range_entry *q = (const struct range_entry *) b;
1973
1974 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1975 {
1976 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1977 {
1978 /* Group range_entries for the same SSA_NAME together. */
1979 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1980 return -1;
1981 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1982 return 1;
1983 /* If ->low is different, NULL low goes first, then by
1984 ascending low. */
1985 if (p->low != NULL_TREE)
1986 {
1987 if (q->low != NULL_TREE)
1988 {
1989 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1990 p->low, q->low);
1991 if (tem && integer_onep (tem))
1992 return -1;
1993 tem = fold_binary (GT_EXPR, boolean_type_node,
1994 p->low, q->low);
1995 if (tem && integer_onep (tem))
1996 return 1;
1997 }
1998 else
1999 return 1;
2000 }
2001 else if (q->low != NULL_TREE)
2002 return -1;
2003 /* If ->high is different, NULL high goes last, before that by
2004 ascending high. */
2005 if (p->high != NULL_TREE)
2006 {
2007 if (q->high != NULL_TREE)
2008 {
2009 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2010 p->high, q->high);
2011 if (tem && integer_onep (tem))
2012 return -1;
2013 tem = fold_binary (GT_EXPR, boolean_type_node,
2014 p->high, q->high);
2015 if (tem && integer_onep (tem))
2016 return 1;
2017 }
2018 else
2019 return -1;
2020 }
2021 else if (p->high != NULL_TREE)
2022 return 1;
2023 /* If both ranges are the same, sort below by ascending idx. */
2024 }
2025 else
2026 return 1;
2027 }
2028 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2029 return -1;
2030
2031 if (p->idx < q->idx)
2032 return -1;
2033 else
2034 {
2035 gcc_checking_assert (p->idx > q->idx);
2036 return 1;
2037 }
2038 }
2039
2040 /* Helper routine of optimize_range_test.
2041 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2042 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2043 OPCODE and OPS are arguments of optimize_range_tests. Return
2044 true if the range merge has been successful.
2045 If OPCODE is ERROR_MARK, this is called from within
2046 maybe_optimize_range_tests and is performing inter-bb range optimization.
2047 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2048 oe->rank. */
2049
2050 static bool
2051 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2052 unsigned int count, enum tree_code opcode,
2053 vec<operand_entry_t> *ops, tree exp, bool in_p,
2054 tree low, tree high, bool strict_overflow_p)
2055 {
2056 operand_entry_t oe = (*ops)[range->idx];
2057 tree op = oe->op;
2058 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2059 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2060 location_t loc = gimple_location (stmt);
2061 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2062 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2063 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2064 gimple_stmt_iterator gsi;
2065
2066 if (tem == NULL_TREE)
2067 return false;
2068
2069 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2070 warning_at (loc, OPT_Wstrict_overflow,
2071 "assuming signed overflow does not occur "
2072 "when simplifying range test");
2073
2074 if (dump_file && (dump_flags & TDF_DETAILS))
2075 {
2076 struct range_entry *r;
2077 fprintf (dump_file, "Optimizing range tests ");
2078 print_generic_expr (dump_file, range->exp, 0);
2079 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2080 print_generic_expr (dump_file, range->low, 0);
2081 fprintf (dump_file, ", ");
2082 print_generic_expr (dump_file, range->high, 0);
2083 fprintf (dump_file, "]");
2084 for (r = otherrange; r < otherrange + count; r++)
2085 {
2086 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2087 print_generic_expr (dump_file, r->low, 0);
2088 fprintf (dump_file, ", ");
2089 print_generic_expr (dump_file, r->high, 0);
2090 fprintf (dump_file, "]");
2091 }
2092 fprintf (dump_file, "\n into ");
2093 print_generic_expr (dump_file, tem, 0);
2094 fprintf (dump_file, "\n");
2095 }
2096
2097 if (opcode == BIT_IOR_EXPR
2098 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2099 tem = invert_truthvalue_loc (loc, tem);
2100
2101 tem = fold_convert_loc (loc, optype, tem);
2102 gsi = gsi_for_stmt (stmt);
2103 /* In rare cases range->exp can be equal to lhs of stmt.
2104 In that case we have to insert after the stmt rather then before
2105 it. */
2106 if (op == range->exp)
2107 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2108 GSI_CONTINUE_LINKING);
2109 else
2110 {
2111 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2112 GSI_SAME_STMT);
2113 gsi_prev (&gsi);
2114 }
2115 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2116 if (gimple_uid (gsi_stmt (gsi)))
2117 break;
2118 else
2119 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2120
2121 oe->op = tem;
2122 range->exp = exp;
2123 range->low = low;
2124 range->high = high;
2125 range->in_p = in_p;
2126 range->strict_overflow_p = false;
2127
2128 for (range = otherrange; range < otherrange + count; range++)
2129 {
2130 oe = (*ops)[range->idx];
2131 /* Now change all the other range test immediate uses, so that
2132 those tests will be optimized away. */
2133 if (opcode == ERROR_MARK)
2134 {
2135 if (oe->op)
2136 oe->op = build_int_cst (TREE_TYPE (oe->op),
2137 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2138 else
2139 oe->op = (oe->rank == BIT_IOR_EXPR
2140 ? boolean_false_node : boolean_true_node);
2141 }
2142 else
2143 oe->op = error_mark_node;
2144 range->exp = NULL_TREE;
2145 }
2146 return true;
2147 }
2148
2149 /* Optimize X == CST1 || X == CST2
2150 if popcount (CST1 ^ CST2) == 1 into
2151 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2152 Similarly for ranges. E.g.
2153 X != 2 && X != 3 && X != 10 && X != 11
2154 will be transformed by the previous optimization into
2155 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2156 and this loop can transform that into
2157 !(((X & ~8) - 2U) <= 1U). */
2158
2159 static bool
2160 optimize_range_tests_xor (enum tree_code opcode, tree type,
2161 tree lowi, tree lowj, tree highi, tree highj,
2162 vec<operand_entry_t> *ops,
2163 struct range_entry *rangei,
2164 struct range_entry *rangej)
2165 {
2166 tree lowxor, highxor, tem, exp;
2167 /* Check highi ^ lowi == highj ^ lowj and
2168 popcount (highi ^ lowi) == 1. */
2169 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2170 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2171 return false;
2172 if (tree_log2 (lowxor) < 0)
2173 return false;
2174 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2175 if (!tree_int_cst_equal (lowxor, highxor))
2176 return false;
2177
2178 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2179 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2180 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2181 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2182 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2183 rangei->in_p, lowj, highj,
2184 rangei->strict_overflow_p
2185 || rangej->strict_overflow_p))
2186 return true;
2187 return false;
2188 }
2189
2190 /* Optimize X == CST1 || X == CST2
2191 if popcount (CST2 - CST1) == 1 into
2192 ((X - CST1) & ~(CST2 - CST1)) == 0.
2193 Similarly for ranges. E.g.
2194 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2195 || X == 75 || X == 45
2196 will be transformed by the previous optimization into
2197 (X - 43U) <= 3U || (X - 75U) <= 3U
2198 and this loop can transform that into
2199 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2200 static bool
2201 optimize_range_tests_diff (enum tree_code opcode, tree type,
2202 tree lowi, tree lowj, tree highi, tree highj,
2203 vec<operand_entry_t> *ops,
2204 struct range_entry *rangei,
2205 struct range_entry *rangej)
2206 {
2207 tree tem1, tem2, mask;
2208 /* Check highi - lowi == highj - lowj. */
2209 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2210 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2211 return false;
2212 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2213 if (!tree_int_cst_equal (tem1, tem2))
2214 return false;
2215 /* Check popcount (lowj - lowi) == 1. */
2216 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2217 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2218 return false;
2219 if (tree_log2 (tem1) < 0)
2220 return false;
2221
2222 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2223 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2224 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2225 lowj = build_int_cst (type, 0);
2226 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2227 rangei->in_p, lowj, tem2,
2228 rangei->strict_overflow_p
2229 || rangej->strict_overflow_p))
2230 return true;
2231 return false;
2232 }
2233
2234 /* It does some common checks for function optimize_range_tests_xor and
2235 optimize_range_tests_diff.
2236 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2237 Else it calls optimize_range_tests_diff. */
2238
2239 static bool
2240 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2241 bool optimize_xor, vec<operand_entry_t> *ops,
2242 struct range_entry *ranges)
2243 {
2244 int i, j;
2245 bool any_changes = false;
2246 for (i = first; i < length; i++)
2247 {
2248 tree lowi, highi, lowj, highj, type, tem;
2249
2250 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2251 continue;
2252 type = TREE_TYPE (ranges[i].exp);
2253 if (!INTEGRAL_TYPE_P (type))
2254 continue;
2255 lowi = ranges[i].low;
2256 if (lowi == NULL_TREE)
2257 lowi = TYPE_MIN_VALUE (type);
2258 highi = ranges[i].high;
2259 if (highi == NULL_TREE)
2260 continue;
2261 for (j = i + 1; j < length && j < i + 64; j++)
2262 {
2263 bool changes;
2264 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2265 continue;
2266 lowj = ranges[j].low;
2267 if (lowj == NULL_TREE)
2268 continue;
2269 highj = ranges[j].high;
2270 if (highj == NULL_TREE)
2271 highj = TYPE_MAX_VALUE (type);
2272 /* Check lowj > highi. */
2273 tem = fold_binary (GT_EXPR, boolean_type_node,
2274 lowj, highi);
2275 if (tem == NULL_TREE || !integer_onep (tem))
2276 continue;
2277 if (optimize_xor)
2278 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2279 highi, highj, ops,
2280 ranges + i, ranges + j);
2281 else
2282 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2283 highi, highj, ops,
2284 ranges + i, ranges + j);
2285 if (changes)
2286 {
2287 any_changes = true;
2288 break;
2289 }
2290 }
2291 }
2292 return any_changes;
2293 }
2294
2295 /* Optimize range tests, similarly how fold_range_test optimizes
2296 it on trees. The tree code for the binary
2297 operation between all the operands is OPCODE.
2298 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2299 maybe_optimize_range_tests for inter-bb range optimization.
2300 In that case if oe->op is NULL, oe->id is bb->index whose
2301 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2302 the actual opcode. */
2303
2304 static bool
2305 optimize_range_tests (enum tree_code opcode,
2306 vec<operand_entry_t> *ops)
2307 {
2308 unsigned int length = ops->length (), i, j, first;
2309 operand_entry_t oe;
2310 struct range_entry *ranges;
2311 bool any_changes = false;
2312
2313 if (length == 1)
2314 return false;
2315
2316 ranges = XNEWVEC (struct range_entry, length);
2317 for (i = 0; i < length; i++)
2318 {
2319 oe = (*ops)[i];
2320 ranges[i].idx = i;
2321 init_range_entry (ranges + i, oe->op,
2322 oe->op ? NULL :
2323 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2324 /* For | invert it now, we will invert it again before emitting
2325 the optimized expression. */
2326 if (opcode == BIT_IOR_EXPR
2327 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2328 ranges[i].in_p = !ranges[i].in_p;
2329 }
2330
2331 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2332 for (i = 0; i < length; i++)
2333 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2334 break;
2335
2336 /* Try to merge ranges. */
2337 for (first = i; i < length; i++)
2338 {
2339 tree low = ranges[i].low;
2340 tree high = ranges[i].high;
2341 int in_p = ranges[i].in_p;
2342 bool strict_overflow_p = ranges[i].strict_overflow_p;
2343 int update_fail_count = 0;
2344
2345 for (j = i + 1; j < length; j++)
2346 {
2347 if (ranges[i].exp != ranges[j].exp)
2348 break;
2349 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2350 ranges[j].in_p, ranges[j].low, ranges[j].high))
2351 break;
2352 strict_overflow_p |= ranges[j].strict_overflow_p;
2353 }
2354
2355 if (j == i + 1)
2356 continue;
2357
2358 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2359 ops, ranges[i].exp, in_p, low, high,
2360 strict_overflow_p))
2361 {
2362 i = j - 1;
2363 any_changes = true;
2364 }
2365 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2366 while update_range_test would fail. */
2367 else if (update_fail_count == 64)
2368 i = j - 1;
2369 else
2370 ++update_fail_count;
2371 }
2372
2373 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2374 ops, ranges);
2375
2376 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2377 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2378 ops, ranges);
2379
2380 if (any_changes && opcode != ERROR_MARK)
2381 {
2382 j = 0;
2383 FOR_EACH_VEC_ELT (*ops, i, oe)
2384 {
2385 if (oe->op == error_mark_node)
2386 continue;
2387 else if (i != j)
2388 (*ops)[j] = oe;
2389 j++;
2390 }
2391 ops->truncate (j);
2392 }
2393
2394 XDELETEVEC (ranges);
2395 return any_changes;
2396 }
2397
2398 /* Return true if STMT is a cast like:
2399 <bb N>:
2400 ...
2401 _123 = (int) _234;
2402
2403 <bb M>:
2404 # _345 = PHI <_123(N), 1(...), 1(...)>
2405 where _234 has bool type, _123 has single use and
2406 bb N has a single successor M. This is commonly used in
2407 the last block of a range test. */
2408
2409 static bool
2410 final_range_test_p (gimple stmt)
2411 {
2412 basic_block bb, rhs_bb;
2413 edge e;
2414 tree lhs, rhs;
2415 use_operand_p use_p;
2416 gimple use_stmt;
2417
2418 if (!gimple_assign_cast_p (stmt))
2419 return false;
2420 bb = gimple_bb (stmt);
2421 if (!single_succ_p (bb))
2422 return false;
2423 e = single_succ_edge (bb);
2424 if (e->flags & EDGE_COMPLEX)
2425 return false;
2426
2427 lhs = gimple_assign_lhs (stmt);
2428 rhs = gimple_assign_rhs1 (stmt);
2429 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2430 || TREE_CODE (rhs) != SSA_NAME
2431 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2432 return false;
2433
2434 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2435 if (!single_imm_use (lhs, &use_p, &use_stmt))
2436 return false;
2437
2438 if (gimple_code (use_stmt) != GIMPLE_PHI
2439 || gimple_bb (use_stmt) != e->dest)
2440 return false;
2441
2442 /* And that the rhs is defined in the same loop. */
2443 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2444 if (rhs_bb == NULL
2445 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2446 return false;
2447
2448 return true;
2449 }
2450
2451 /* Return true if BB is suitable basic block for inter-bb range test
2452 optimization. If BACKWARD is true, BB should be the only predecessor
2453 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2454 or compared with to find a common basic block to which all conditions
2455 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2456 be the only predecessor of BB. */
2457
2458 static bool
2459 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2460 bool backward)
2461 {
2462 edge_iterator ei, ei2;
2463 edge e, e2;
2464 gimple stmt;
2465 gimple_stmt_iterator gsi;
2466 bool other_edge_seen = false;
2467 bool is_cond;
2468
2469 if (test_bb == bb)
2470 return false;
2471 /* Check last stmt first. */
2472 stmt = last_stmt (bb);
2473 if (stmt == NULL
2474 || (gimple_code (stmt) != GIMPLE_COND
2475 && (backward || !final_range_test_p (stmt)))
2476 || gimple_visited_p (stmt)
2477 || stmt_could_throw_p (stmt)
2478 || *other_bb == bb)
2479 return false;
2480 is_cond = gimple_code (stmt) == GIMPLE_COND;
2481 if (is_cond)
2482 {
2483 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2484 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2485 to *OTHER_BB (if not set yet, try to find it out). */
2486 if (EDGE_COUNT (bb->succs) != 2)
2487 return false;
2488 FOR_EACH_EDGE (e, ei, bb->succs)
2489 {
2490 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2491 return false;
2492 if (e->dest == test_bb)
2493 {
2494 if (backward)
2495 continue;
2496 else
2497 return false;
2498 }
2499 if (e->dest == bb)
2500 return false;
2501 if (*other_bb == NULL)
2502 {
2503 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2504 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2505 return false;
2506 else if (e->dest == e2->dest)
2507 *other_bb = e->dest;
2508 if (*other_bb == NULL)
2509 return false;
2510 }
2511 if (e->dest == *other_bb)
2512 other_edge_seen = true;
2513 else if (backward)
2514 return false;
2515 }
2516 if (*other_bb == NULL || !other_edge_seen)
2517 return false;
2518 }
2519 else if (single_succ (bb) != *other_bb)
2520 return false;
2521
2522 /* Now check all PHIs of *OTHER_BB. */
2523 e = find_edge (bb, *other_bb);
2524 e2 = find_edge (test_bb, *other_bb);
2525 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2526 {
2527 gimple phi = gsi_stmt (gsi);
2528 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2529 corresponding to BB and TEST_BB predecessor must be the same. */
2530 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2531 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2532 {
2533 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2534 one of the PHIs should have the lhs of the last stmt in
2535 that block as PHI arg and that PHI should have 0 or 1
2536 corresponding to it in all other range test basic blocks
2537 considered. */
2538 if (!is_cond)
2539 {
2540 if (gimple_phi_arg_def (phi, e->dest_idx)
2541 == gimple_assign_lhs (stmt)
2542 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2543 || integer_onep (gimple_phi_arg_def (phi,
2544 e2->dest_idx))))
2545 continue;
2546 }
2547 else
2548 {
2549 gimple test_last = last_stmt (test_bb);
2550 if (gimple_code (test_last) != GIMPLE_COND
2551 && gimple_phi_arg_def (phi, e2->dest_idx)
2552 == gimple_assign_lhs (test_last)
2553 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2554 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2555 continue;
2556 }
2557
2558 return false;
2559 }
2560 }
2561 return true;
2562 }
2563
2564 /* Return true if BB doesn't have side-effects that would disallow
2565 range test optimization, all SSA_NAMEs set in the bb are consumed
2566 in the bb and there are no PHIs. */
2567
2568 static bool
2569 no_side_effect_bb (basic_block bb)
2570 {
2571 gimple_stmt_iterator gsi;
2572 gimple last;
2573
2574 if (!gimple_seq_empty_p (phi_nodes (bb)))
2575 return false;
2576 last = last_stmt (bb);
2577 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2578 {
2579 gimple stmt = gsi_stmt (gsi);
2580 tree lhs;
2581 imm_use_iterator imm_iter;
2582 use_operand_p use_p;
2583
2584 if (is_gimple_debug (stmt))
2585 continue;
2586 if (gimple_has_side_effects (stmt))
2587 return false;
2588 if (stmt == last)
2589 return true;
2590 if (!is_gimple_assign (stmt))
2591 return false;
2592 lhs = gimple_assign_lhs (stmt);
2593 if (TREE_CODE (lhs) != SSA_NAME)
2594 return false;
2595 if (gimple_assign_rhs_could_trap_p (stmt))
2596 return false;
2597 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2598 {
2599 gimple use_stmt = USE_STMT (use_p);
2600 if (is_gimple_debug (use_stmt))
2601 continue;
2602 if (gimple_bb (use_stmt) != bb)
2603 return false;
2604 }
2605 }
2606 return false;
2607 }
2608
2609 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2610 return true and fill in *OPS recursively. */
2611
2612 static bool
2613 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2614 struct loop *loop)
2615 {
2616 gimple stmt = SSA_NAME_DEF_STMT (var);
2617 tree rhs[2];
2618 int i;
2619
2620 if (!is_reassociable_op (stmt, code, loop))
2621 return false;
2622
2623 rhs[0] = gimple_assign_rhs1 (stmt);
2624 rhs[1] = gimple_assign_rhs2 (stmt);
2625 gimple_set_visited (stmt, true);
2626 for (i = 0; i < 2; i++)
2627 if (TREE_CODE (rhs[i]) == SSA_NAME
2628 && !get_ops (rhs[i], code, ops, loop)
2629 && has_single_use (rhs[i]))
2630 {
2631 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2632
2633 oe->op = rhs[i];
2634 oe->rank = code;
2635 oe->id = 0;
2636 oe->count = 1;
2637 ops->safe_push (oe);
2638 }
2639 return true;
2640 }
2641
2642 /* Find the ops that were added by get_ops starting from VAR, see if
2643 they were changed during update_range_test and if yes, create new
2644 stmts. */
2645
2646 static tree
2647 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2648 unsigned int *pidx, struct loop *loop)
2649 {
2650 gimple stmt = SSA_NAME_DEF_STMT (var);
2651 tree rhs[4];
2652 int i;
2653
2654 if (!is_reassociable_op (stmt, code, loop))
2655 return NULL;
2656
2657 rhs[0] = gimple_assign_rhs1 (stmt);
2658 rhs[1] = gimple_assign_rhs2 (stmt);
2659 rhs[2] = rhs[0];
2660 rhs[3] = rhs[1];
2661 for (i = 0; i < 2; i++)
2662 if (TREE_CODE (rhs[i]) == SSA_NAME)
2663 {
2664 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2665 if (rhs[2 + i] == NULL_TREE)
2666 {
2667 if (has_single_use (rhs[i]))
2668 rhs[2 + i] = ops[(*pidx)++]->op;
2669 else
2670 rhs[2 + i] = rhs[i];
2671 }
2672 }
2673 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2674 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2675 {
2676 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2677 var = make_ssa_name (TREE_TYPE (var), NULL);
2678 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2679 var, rhs[2], rhs[3]);
2680 gimple_set_uid (g, gimple_uid (stmt));
2681 gimple_set_visited (g, true);
2682 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2683 }
2684 return var;
2685 }
2686
2687 /* Structure to track the initial value passed to get_ops and
2688 the range in the ops vector for each basic block. */
2689
2690 struct inter_bb_range_test_entry
2691 {
2692 tree op;
2693 unsigned int first_idx, last_idx;
2694 };
2695
2696 /* Inter-bb range test optimization. */
2697
2698 static void
2699 maybe_optimize_range_tests (gimple stmt)
2700 {
2701 basic_block first_bb = gimple_bb (stmt);
2702 basic_block last_bb = first_bb;
2703 basic_block other_bb = NULL;
2704 basic_block bb;
2705 edge_iterator ei;
2706 edge e;
2707 auto_vec<operand_entry_t> ops;
2708 auto_vec<inter_bb_range_test_entry> bbinfo;
2709 bool any_changes = false;
2710
2711 /* Consider only basic blocks that end with GIMPLE_COND or
2712 a cast statement satisfying final_range_test_p. All
2713 but the last bb in the first_bb .. last_bb range
2714 should end with GIMPLE_COND. */
2715 if (gimple_code (stmt) == GIMPLE_COND)
2716 {
2717 if (EDGE_COUNT (first_bb->succs) != 2)
2718 return;
2719 }
2720 else if (final_range_test_p (stmt))
2721 other_bb = single_succ (first_bb);
2722 else
2723 return;
2724
2725 if (stmt_could_throw_p (stmt))
2726 return;
2727
2728 /* As relative ordering of post-dominator sons isn't fixed,
2729 maybe_optimize_range_tests can be called first on any
2730 bb in the range we want to optimize. So, start searching
2731 backwards, if first_bb can be set to a predecessor. */
2732 while (single_pred_p (first_bb))
2733 {
2734 basic_block pred_bb = single_pred (first_bb);
2735 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2736 break;
2737 if (!no_side_effect_bb (first_bb))
2738 break;
2739 first_bb = pred_bb;
2740 }
2741 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2742 Before starting forward search in last_bb successors, find
2743 out the other_bb. */
2744 if (first_bb == last_bb)
2745 {
2746 other_bb = NULL;
2747 /* As non-GIMPLE_COND last stmt always terminates the range,
2748 if forward search didn't discover anything, just give up. */
2749 if (gimple_code (stmt) != GIMPLE_COND)
2750 return;
2751 /* Look at both successors. Either it ends with a GIMPLE_COND
2752 and satisfies suitable_cond_bb, or ends with a cast and
2753 other_bb is that cast's successor. */
2754 FOR_EACH_EDGE (e, ei, first_bb->succs)
2755 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2756 || e->dest == first_bb)
2757 return;
2758 else if (single_pred_p (e->dest))
2759 {
2760 stmt = last_stmt (e->dest);
2761 if (stmt
2762 && gimple_code (stmt) == GIMPLE_COND
2763 && EDGE_COUNT (e->dest->succs) == 2)
2764 {
2765 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2766 break;
2767 else
2768 other_bb = NULL;
2769 }
2770 else if (stmt
2771 && final_range_test_p (stmt)
2772 && find_edge (first_bb, single_succ (e->dest)))
2773 {
2774 other_bb = single_succ (e->dest);
2775 if (other_bb == first_bb)
2776 other_bb = NULL;
2777 }
2778 }
2779 if (other_bb == NULL)
2780 return;
2781 }
2782 /* Now do the forward search, moving last_bb to successor bbs
2783 that aren't other_bb. */
2784 while (EDGE_COUNT (last_bb->succs) == 2)
2785 {
2786 FOR_EACH_EDGE (e, ei, last_bb->succs)
2787 if (e->dest != other_bb)
2788 break;
2789 if (e == NULL)
2790 break;
2791 if (!single_pred_p (e->dest))
2792 break;
2793 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2794 break;
2795 if (!no_side_effect_bb (e->dest))
2796 break;
2797 last_bb = e->dest;
2798 }
2799 if (first_bb == last_bb)
2800 return;
2801 /* Here basic blocks first_bb through last_bb's predecessor
2802 end with GIMPLE_COND, all of them have one of the edges to
2803 other_bb and another to another block in the range,
2804 all blocks except first_bb don't have side-effects and
2805 last_bb ends with either GIMPLE_COND, or cast satisfying
2806 final_range_test_p. */
2807 for (bb = last_bb; ; bb = single_pred (bb))
2808 {
2809 enum tree_code code;
2810 tree lhs, rhs;
2811 inter_bb_range_test_entry bb_ent;
2812
2813 bb_ent.op = NULL_TREE;
2814 bb_ent.first_idx = ops.length ();
2815 bb_ent.last_idx = bb_ent.first_idx;
2816 e = find_edge (bb, other_bb);
2817 stmt = last_stmt (bb);
2818 gimple_set_visited (stmt, true);
2819 if (gimple_code (stmt) != GIMPLE_COND)
2820 {
2821 use_operand_p use_p;
2822 gimple phi;
2823 edge e2;
2824 unsigned int d;
2825
2826 lhs = gimple_assign_lhs (stmt);
2827 rhs = gimple_assign_rhs1 (stmt);
2828 gcc_assert (bb == last_bb);
2829
2830 /* stmt is
2831 _123 = (int) _234;
2832
2833 followed by:
2834 <bb M>:
2835 # _345 = PHI <_123(N), 1(...), 1(...)>
2836
2837 or 0 instead of 1. If it is 0, the _234
2838 range test is anded together with all the
2839 other range tests, if it is 1, it is ored with
2840 them. */
2841 single_imm_use (lhs, &use_p, &phi);
2842 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2843 e2 = find_edge (first_bb, other_bb);
2844 d = e2->dest_idx;
2845 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2846 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2847 code = BIT_AND_EXPR;
2848 else
2849 {
2850 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2851 code = BIT_IOR_EXPR;
2852 }
2853
2854 /* If _234 SSA_NAME_DEF_STMT is
2855 _234 = _567 | _789;
2856 (or &, corresponding to 1/0 in the phi arguments,
2857 push into ops the individual range test arguments
2858 of the bitwise or resp. and, recursively. */
2859 if (!get_ops (rhs, code, &ops,
2860 loop_containing_stmt (stmt))
2861 && has_single_use (rhs))
2862 {
2863 /* Otherwise, push the _234 range test itself. */
2864 operand_entry_t oe
2865 = (operand_entry_t) pool_alloc (operand_entry_pool);
2866
2867 oe->op = rhs;
2868 oe->rank = code;
2869 oe->id = 0;
2870 oe->count = 1;
2871 ops.safe_push (oe);
2872 bb_ent.last_idx++;
2873 }
2874 else
2875 bb_ent.last_idx = ops.length ();
2876 bb_ent.op = rhs;
2877 bbinfo.safe_push (bb_ent);
2878 continue;
2879 }
2880 /* Otherwise stmt is GIMPLE_COND. */
2881 code = gimple_cond_code (stmt);
2882 lhs = gimple_cond_lhs (stmt);
2883 rhs = gimple_cond_rhs (stmt);
2884 if (TREE_CODE (lhs) == SSA_NAME
2885 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2886 && ((code != EQ_EXPR && code != NE_EXPR)
2887 || rhs != boolean_false_node
2888 /* Either push into ops the individual bitwise
2889 or resp. and operands, depending on which
2890 edge is other_bb. */
2891 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2892 ^ (code == EQ_EXPR))
2893 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2894 loop_containing_stmt (stmt))))
2895 {
2896 /* Or push the GIMPLE_COND stmt itself. */
2897 operand_entry_t oe
2898 = (operand_entry_t) pool_alloc (operand_entry_pool);
2899
2900 oe->op = NULL;
2901 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2902 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2903 /* oe->op = NULL signs that there is no SSA_NAME
2904 for the range test, and oe->id instead is the
2905 basic block number, at which's end the GIMPLE_COND
2906 is. */
2907 oe->id = bb->index;
2908 oe->count = 1;
2909 ops.safe_push (oe);
2910 bb_ent.op = NULL;
2911 bb_ent.last_idx++;
2912 }
2913 else if (ops.length () > bb_ent.first_idx)
2914 {
2915 bb_ent.op = lhs;
2916 bb_ent.last_idx = ops.length ();
2917 }
2918 bbinfo.safe_push (bb_ent);
2919 if (bb == first_bb)
2920 break;
2921 }
2922 if (ops.length () > 1)
2923 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2924 if (any_changes)
2925 {
2926 unsigned int idx;
2927 /* update_ops relies on has_single_use predicates returning the
2928 same values as it did during get_ops earlier. Additionally it
2929 never removes statements, only adds new ones and it should walk
2930 from the single imm use and check the predicate already before
2931 making those changes.
2932 On the other side, the handling of GIMPLE_COND directly can turn
2933 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2934 it needs to be done in a separate loop afterwards. */
2935 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2936 {
2937 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2938 && bbinfo[idx].op != NULL_TREE)
2939 {
2940 tree new_op;
2941
2942 stmt = last_stmt (bb);
2943 new_op = update_ops (bbinfo[idx].op,
2944 (enum tree_code)
2945 ops[bbinfo[idx].first_idx]->rank,
2946 ops, &bbinfo[idx].first_idx,
2947 loop_containing_stmt (stmt));
2948 if (new_op == NULL_TREE)
2949 {
2950 gcc_assert (bb == last_bb);
2951 new_op = ops[bbinfo[idx].first_idx++]->op;
2952 }
2953 if (bbinfo[idx].op != new_op)
2954 {
2955 imm_use_iterator iter;
2956 use_operand_p use_p;
2957 gimple use_stmt, cast_stmt = NULL;
2958
2959 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2960 if (is_gimple_debug (use_stmt))
2961 continue;
2962 else if (gimple_code (use_stmt) == GIMPLE_COND
2963 || gimple_code (use_stmt) == GIMPLE_PHI)
2964 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2965 SET_USE (use_p, new_op);
2966 else if (gimple_assign_cast_p (use_stmt))
2967 cast_stmt = use_stmt;
2968 else
2969 gcc_unreachable ();
2970 if (cast_stmt)
2971 {
2972 gcc_assert (bb == last_bb);
2973 tree lhs = gimple_assign_lhs (cast_stmt);
2974 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2975 enum tree_code rhs_code
2976 = gimple_assign_rhs_code (cast_stmt);
2977 gimple g;
2978 if (is_gimple_min_invariant (new_op))
2979 {
2980 new_op = fold_convert (TREE_TYPE (lhs), new_op);
2981 g = gimple_build_assign (new_lhs, new_op);
2982 }
2983 else
2984 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
2985 new_op, NULL_TREE);
2986 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2987 gimple_set_uid (g, gimple_uid (cast_stmt));
2988 gimple_set_visited (g, true);
2989 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2990 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2991 if (is_gimple_debug (use_stmt))
2992 continue;
2993 else if (gimple_code (use_stmt) == GIMPLE_COND
2994 || gimple_code (use_stmt) == GIMPLE_PHI)
2995 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2996 SET_USE (use_p, new_lhs);
2997 else
2998 gcc_unreachable ();
2999 }
3000 }
3001 }
3002 if (bb == first_bb)
3003 break;
3004 }
3005 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3006 {
3007 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3008 && bbinfo[idx].op == NULL_TREE
3009 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3010 {
3011 stmt = last_stmt (bb);
3012 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3013 gimple_cond_make_false (stmt);
3014 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3015 gimple_cond_make_true (stmt);
3016 else
3017 {
3018 gimple_cond_set_code (stmt, NE_EXPR);
3019 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3020 gimple_cond_set_rhs (stmt, boolean_false_node);
3021 }
3022 update_stmt (stmt);
3023 }
3024 if (bb == first_bb)
3025 break;
3026 }
3027 }
3028 }
3029
3030 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3031 of STMT in it's operands. This is also known as a "destructive
3032 update" operation. */
3033
3034 static bool
3035 is_phi_for_stmt (gimple stmt, tree operand)
3036 {
3037 gimple def_stmt;
3038 tree lhs;
3039 use_operand_p arg_p;
3040 ssa_op_iter i;
3041
3042 if (TREE_CODE (operand) != SSA_NAME)
3043 return false;
3044
3045 lhs = gimple_assign_lhs (stmt);
3046
3047 def_stmt = SSA_NAME_DEF_STMT (operand);
3048 if (gimple_code (def_stmt) != GIMPLE_PHI)
3049 return false;
3050
3051 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3052 if (lhs == USE_FROM_PTR (arg_p))
3053 return true;
3054 return false;
3055 }
3056
3057 /* Remove def stmt of VAR if VAR has zero uses and recurse
3058 on rhs1 operand if so. */
3059
3060 static void
3061 remove_visited_stmt_chain (tree var)
3062 {
3063 gimple stmt;
3064 gimple_stmt_iterator gsi;
3065
3066 while (1)
3067 {
3068 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3069 return;
3070 stmt = SSA_NAME_DEF_STMT (var);
3071 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3072 {
3073 var = gimple_assign_rhs1 (stmt);
3074 gsi = gsi_for_stmt (stmt);
3075 gsi_remove (&gsi, true);
3076 release_defs (stmt);
3077 }
3078 else
3079 return;
3080 }
3081 }
3082
3083 /* This function checks three consequtive operands in
3084 passed operands vector OPS starting from OPINDEX and
3085 swaps two operands if it is profitable for binary operation
3086 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3087
3088 We pair ops with the same rank if possible.
3089
3090 The alternative we try is to see if STMT is a destructive
3091 update style statement, which is like:
3092 b = phi (a, ...)
3093 a = c + b;
3094 In that case, we want to use the destructive update form to
3095 expose the possible vectorizer sum reduction opportunity.
3096 In that case, the third operand will be the phi node. This
3097 check is not performed if STMT is null.
3098
3099 We could, of course, try to be better as noted above, and do a
3100 lot of work to try to find these opportunities in >3 operand
3101 cases, but it is unlikely to be worth it. */
3102
3103 static void
3104 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3105 unsigned int opindex, gimple stmt)
3106 {
3107 operand_entry_t oe1, oe2, oe3;
3108
3109 oe1 = ops[opindex];
3110 oe2 = ops[opindex + 1];
3111 oe3 = ops[opindex + 2];
3112
3113 if ((oe1->rank == oe2->rank
3114 && oe2->rank != oe3->rank)
3115 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3116 && !is_phi_for_stmt (stmt, oe1->op)
3117 && !is_phi_for_stmt (stmt, oe2->op)))
3118 {
3119 struct operand_entry temp = *oe3;
3120 oe3->op = oe1->op;
3121 oe3->rank = oe1->rank;
3122 oe1->op = temp.op;
3123 oe1->rank= temp.rank;
3124 }
3125 else if ((oe1->rank == oe3->rank
3126 && oe2->rank != oe3->rank)
3127 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3128 && !is_phi_for_stmt (stmt, oe1->op)
3129 && !is_phi_for_stmt (stmt, oe3->op)))
3130 {
3131 struct operand_entry temp = *oe2;
3132 oe2->op = oe1->op;
3133 oe2->rank = oe1->rank;
3134 oe1->op = temp.op;
3135 oe1->rank = temp.rank;
3136 }
3137 }
3138
3139 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3140 two definitions, otherwise return STMT. */
3141
3142 static inline gimple
3143 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3144 {
3145 if (TREE_CODE (rhs1) == SSA_NAME
3146 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3147 stmt = SSA_NAME_DEF_STMT (rhs1);
3148 if (TREE_CODE (rhs2) == SSA_NAME
3149 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3150 stmt = SSA_NAME_DEF_STMT (rhs2);
3151 return stmt;
3152 }
3153
3154 /* Recursively rewrite our linearized statements so that the operators
3155 match those in OPS[OPINDEX], putting the computation in rank
3156 order. Return new lhs. */
3157
3158 static tree
3159 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3160 vec<operand_entry_t> ops, bool changed)
3161 {
3162 tree rhs1 = gimple_assign_rhs1 (stmt);
3163 tree rhs2 = gimple_assign_rhs2 (stmt);
3164 tree lhs = gimple_assign_lhs (stmt);
3165 operand_entry_t oe;
3166
3167 /* The final recursion case for this function is that you have
3168 exactly two operations left.
3169 If we had one exactly one op in the entire list to start with, we
3170 would have never called this function, and the tail recursion
3171 rewrites them one at a time. */
3172 if (opindex + 2 == ops.length ())
3173 {
3174 operand_entry_t oe1, oe2;
3175
3176 oe1 = ops[opindex];
3177 oe2 = ops[opindex + 1];
3178
3179 if (rhs1 != oe1->op || rhs2 != oe2->op)
3180 {
3181 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3182 unsigned int uid = gimple_uid (stmt);
3183
3184 if (dump_file && (dump_flags & TDF_DETAILS))
3185 {
3186 fprintf (dump_file, "Transforming ");
3187 print_gimple_stmt (dump_file, stmt, 0, 0);
3188 }
3189
3190 if (changed)
3191 {
3192 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3193 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3194 stmt
3195 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3196 lhs, oe1->op, oe2->op);
3197 gimple_set_uid (stmt, uid);
3198 gimple_set_visited (stmt, true);
3199 if (insert_point == gsi_stmt (gsi))
3200 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3201 else
3202 insert_stmt_after (stmt, insert_point);
3203 }
3204 else
3205 {
3206 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3207 == stmt);
3208 gimple_assign_set_rhs1 (stmt, oe1->op);
3209 gimple_assign_set_rhs2 (stmt, oe2->op);
3210 update_stmt (stmt);
3211 }
3212
3213 if (rhs1 != oe1->op && rhs1 != oe2->op)
3214 remove_visited_stmt_chain (rhs1);
3215
3216 if (dump_file && (dump_flags & TDF_DETAILS))
3217 {
3218 fprintf (dump_file, " into ");
3219 print_gimple_stmt (dump_file, stmt, 0, 0);
3220 }
3221 }
3222 return lhs;
3223 }
3224
3225 /* If we hit here, we should have 3 or more ops left. */
3226 gcc_assert (opindex + 2 < ops.length ());
3227
3228 /* Rewrite the next operator. */
3229 oe = ops[opindex];
3230
3231 /* Recurse on the LHS of the binary operator, which is guaranteed to
3232 be the non-leaf side. */
3233 tree new_rhs1
3234 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3235 changed || oe->op != rhs2);
3236
3237 if (oe->op != rhs2 || new_rhs1 != rhs1)
3238 {
3239 if (dump_file && (dump_flags & TDF_DETAILS))
3240 {
3241 fprintf (dump_file, "Transforming ");
3242 print_gimple_stmt (dump_file, stmt, 0, 0);
3243 }
3244
3245 /* If changed is false, this is either opindex == 0
3246 or all outer rhs2's were equal to corresponding oe->op,
3247 and powi_result is NULL.
3248 That means lhs is equivalent before and after reassociation.
3249 Otherwise ensure the old lhs SSA_NAME is not reused and
3250 create a new stmt as well, so that any debug stmts will be
3251 properly adjusted. */
3252 if (changed)
3253 {
3254 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3255 unsigned int uid = gimple_uid (stmt);
3256 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3257
3258 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3259 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3260 lhs, new_rhs1, oe->op);
3261 gimple_set_uid (stmt, uid);
3262 gimple_set_visited (stmt, true);
3263 if (insert_point == gsi_stmt (gsi))
3264 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3265 else
3266 insert_stmt_after (stmt, insert_point);
3267 }
3268 else
3269 {
3270 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3271 == stmt);
3272 gimple_assign_set_rhs1 (stmt, new_rhs1);
3273 gimple_assign_set_rhs2 (stmt, oe->op);
3274 update_stmt (stmt);
3275 }
3276
3277 if (dump_file && (dump_flags & TDF_DETAILS))
3278 {
3279 fprintf (dump_file, " into ");
3280 print_gimple_stmt (dump_file, stmt, 0, 0);
3281 }
3282 }
3283 return lhs;
3284 }
3285
3286 /* Find out how many cycles we need to compute statements chain.
3287 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3288 maximum number of independent statements we may execute per cycle. */
3289
3290 static int
3291 get_required_cycles (int ops_num, int cpu_width)
3292 {
3293 int res;
3294 int elog;
3295 unsigned int rest;
3296
3297 /* While we have more than 2 * cpu_width operands
3298 we may reduce number of operands by cpu_width
3299 per cycle. */
3300 res = ops_num / (2 * cpu_width);
3301
3302 /* Remained operands count may be reduced twice per cycle
3303 until we have only one operand. */
3304 rest = (unsigned)(ops_num - res * cpu_width);
3305 elog = exact_log2 (rest);
3306 if (elog >= 0)
3307 res += elog;
3308 else
3309 res += floor_log2 (rest) + 1;
3310
3311 return res;
3312 }
3313
3314 /* Returns an optimal number of registers to use for computation of
3315 given statements. */
3316
3317 static int
3318 get_reassociation_width (int ops_num, enum tree_code opc,
3319 enum machine_mode mode)
3320 {
3321 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3322 int width;
3323 int width_min;
3324 int cycles_best;
3325
3326 if (param_width > 0)
3327 width = param_width;
3328 else
3329 width = targetm.sched.reassociation_width (opc, mode);
3330
3331 if (width == 1)
3332 return width;
3333
3334 /* Get the minimal time required for sequence computation. */
3335 cycles_best = get_required_cycles (ops_num, width);
3336
3337 /* Check if we may use less width and still compute sequence for
3338 the same time. It will allow us to reduce registers usage.
3339 get_required_cycles is monotonically increasing with lower width
3340 so we can perform a binary search for the minimal width that still
3341 results in the optimal cycle count. */
3342 width_min = 1;
3343 while (width > width_min)
3344 {
3345 int width_mid = (width + width_min) / 2;
3346
3347 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3348 width = width_mid;
3349 else if (width_min < width_mid)
3350 width_min = width_mid;
3351 else
3352 break;
3353 }
3354
3355 return width;
3356 }
3357
3358 /* Recursively rewrite our linearized statements so that the operators
3359 match those in OPS[OPINDEX], putting the computation in rank
3360 order and trying to allow operations to be executed in
3361 parallel. */
3362
3363 static void
3364 rewrite_expr_tree_parallel (gimple stmt, int width,
3365 vec<operand_entry_t> ops)
3366 {
3367 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3368 int op_num = ops.length ();
3369 int stmt_num = op_num - 1;
3370 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3371 int op_index = op_num - 1;
3372 int stmt_index = 0;
3373 int ready_stmts_end = 0;
3374 int i = 0;
3375 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3376
3377 /* We start expression rewriting from the top statements.
3378 So, in this loop we create a full list of statements
3379 we will work with. */
3380 stmts[stmt_num - 1] = stmt;
3381 for (i = stmt_num - 2; i >= 0; i--)
3382 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3383
3384 for (i = 0; i < stmt_num; i++)
3385 {
3386 tree op1, op2;
3387
3388 /* Determine whether we should use results of
3389 already handled statements or not. */
3390 if (ready_stmts_end == 0
3391 && (i - stmt_index >= width || op_index < 1))
3392 ready_stmts_end = i;
3393
3394 /* Now we choose operands for the next statement. Non zero
3395 value in ready_stmts_end means here that we should use
3396 the result of already generated statements as new operand. */
3397 if (ready_stmts_end > 0)
3398 {
3399 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3400 if (ready_stmts_end > stmt_index)
3401 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3402 else if (op_index >= 0)
3403 op2 = ops[op_index--]->op;
3404 else
3405 {
3406 gcc_assert (stmt_index < i);
3407 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3408 }
3409
3410 if (stmt_index >= ready_stmts_end)
3411 ready_stmts_end = 0;
3412 }
3413 else
3414 {
3415 if (op_index > 1)
3416 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3417 op2 = ops[op_index--]->op;
3418 op1 = ops[op_index--]->op;
3419 }
3420
3421 /* If we emit the last statement then we should put
3422 operands into the last statement. It will also
3423 break the loop. */
3424 if (op_index < 0 && stmt_index == i)
3425 i = stmt_num - 1;
3426
3427 if (dump_file && (dump_flags & TDF_DETAILS))
3428 {
3429 fprintf (dump_file, "Transforming ");
3430 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3431 }
3432
3433 /* We keep original statement only for the last one. All
3434 others are recreated. */
3435 if (i == stmt_num - 1)
3436 {
3437 gimple_assign_set_rhs1 (stmts[i], op1);
3438 gimple_assign_set_rhs2 (stmts[i], op2);
3439 update_stmt (stmts[i]);
3440 }
3441 else
3442 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3443
3444 if (dump_file && (dump_flags & TDF_DETAILS))
3445 {
3446 fprintf (dump_file, " into ");
3447 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3448 }
3449 }
3450
3451 remove_visited_stmt_chain (last_rhs1);
3452 }
3453
3454 /* Transform STMT, which is really (A +B) + (C + D) into the left
3455 linear form, ((A+B)+C)+D.
3456 Recurse on D if necessary. */
3457
3458 static void
3459 linearize_expr (gimple stmt)
3460 {
3461 gimple_stmt_iterator gsi;
3462 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3463 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3464 gimple oldbinrhs = binrhs;
3465 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3466 gimple newbinrhs = NULL;
3467 struct loop *loop = loop_containing_stmt (stmt);
3468 tree lhs = gimple_assign_lhs (stmt);
3469
3470 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3471 && is_reassociable_op (binrhs, rhscode, loop));
3472
3473 gsi = gsi_for_stmt (stmt);
3474
3475 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3476 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3477 make_ssa_name (TREE_TYPE (lhs), NULL),
3478 gimple_assign_lhs (binlhs),
3479 gimple_assign_rhs2 (binrhs));
3480 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3481 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3482 gimple_set_uid (binrhs, gimple_uid (stmt));
3483
3484 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3485 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3486
3487 if (dump_file && (dump_flags & TDF_DETAILS))
3488 {
3489 fprintf (dump_file, "Linearized: ");
3490 print_gimple_stmt (dump_file, stmt, 0, 0);
3491 }
3492
3493 reassociate_stats.linearized++;
3494 update_stmt (stmt);
3495
3496 gsi = gsi_for_stmt (oldbinrhs);
3497 gsi_remove (&gsi, true);
3498 release_defs (oldbinrhs);
3499
3500 gimple_set_visited (stmt, true);
3501 gimple_set_visited (binlhs, true);
3502 gimple_set_visited (binrhs, true);
3503
3504 /* Tail recurse on the new rhs if it still needs reassociation. */
3505 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3506 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3507 want to change the algorithm while converting to tuples. */
3508 linearize_expr (stmt);
3509 }
3510
3511 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3512 it. Otherwise, return NULL. */
3513
3514 static gimple
3515 get_single_immediate_use (tree lhs)
3516 {
3517 use_operand_p immuse;
3518 gimple immusestmt;
3519
3520 if (TREE_CODE (lhs) == SSA_NAME
3521 && single_imm_use (lhs, &immuse, &immusestmt)
3522 && is_gimple_assign (immusestmt))
3523 return immusestmt;
3524
3525 return NULL;
3526 }
3527
3528 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3529 representing the negated value. Insertions of any necessary
3530 instructions go before GSI.
3531 This function is recursive in that, if you hand it "a_5" as the
3532 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3533 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3534
3535 static tree
3536 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3537 {
3538 gimple negatedefstmt = NULL;
3539 tree resultofnegate;
3540 gimple_stmt_iterator gsi;
3541 unsigned int uid;
3542
3543 /* If we are trying to negate a name, defined by an add, negate the
3544 add operands instead. */
3545 if (TREE_CODE (tonegate) == SSA_NAME)
3546 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3547 if (TREE_CODE (tonegate) == SSA_NAME
3548 && is_gimple_assign (negatedefstmt)
3549 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3550 && has_single_use (gimple_assign_lhs (negatedefstmt))
3551 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3552 {
3553 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3554 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3555 tree lhs = gimple_assign_lhs (negatedefstmt);
3556 gimple g;
3557
3558 gsi = gsi_for_stmt (negatedefstmt);
3559 rhs1 = negate_value (rhs1, &gsi);
3560
3561 gsi = gsi_for_stmt (negatedefstmt);
3562 rhs2 = negate_value (rhs2, &gsi);
3563
3564 gsi = gsi_for_stmt (negatedefstmt);
3565 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3566 gimple_set_visited (negatedefstmt, true);
3567 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3568 gimple_set_uid (g, gimple_uid (negatedefstmt));
3569 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3570 return lhs;
3571 }
3572
3573 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3574 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3575 NULL_TREE, true, GSI_SAME_STMT);
3576 gsi = *gsip;
3577 uid = gimple_uid (gsi_stmt (gsi));
3578 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3579 {
3580 gimple stmt = gsi_stmt (gsi);
3581 if (gimple_uid (stmt) != 0)
3582 break;
3583 gimple_set_uid (stmt, uid);
3584 }
3585 return resultofnegate;
3586 }
3587
3588 /* Return true if we should break up the subtract in STMT into an add
3589 with negate. This is true when we the subtract operands are really
3590 adds, or the subtract itself is used in an add expression. In
3591 either case, breaking up the subtract into an add with negate
3592 exposes the adds to reassociation. */
3593
3594 static bool
3595 should_break_up_subtract (gimple stmt)
3596 {
3597 tree lhs = gimple_assign_lhs (stmt);
3598 tree binlhs = gimple_assign_rhs1 (stmt);
3599 tree binrhs = gimple_assign_rhs2 (stmt);
3600 gimple immusestmt;
3601 struct loop *loop = loop_containing_stmt (stmt);
3602
3603 if (TREE_CODE (binlhs) == SSA_NAME
3604 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3605 return true;
3606
3607 if (TREE_CODE (binrhs) == SSA_NAME
3608 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3609 return true;
3610
3611 if (TREE_CODE (lhs) == SSA_NAME
3612 && (immusestmt = get_single_immediate_use (lhs))
3613 && is_gimple_assign (immusestmt)
3614 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3615 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3616 return true;
3617 return false;
3618 }
3619
3620 /* Transform STMT from A - B into A + -B. */
3621
3622 static void
3623 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3624 {
3625 tree rhs1 = gimple_assign_rhs1 (stmt);
3626 tree rhs2 = gimple_assign_rhs2 (stmt);
3627
3628 if (dump_file && (dump_flags & TDF_DETAILS))
3629 {
3630 fprintf (dump_file, "Breaking up subtract ");
3631 print_gimple_stmt (dump_file, stmt, 0, 0);
3632 }
3633
3634 rhs2 = negate_value (rhs2, gsip);
3635 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3636 update_stmt (stmt);
3637 }
3638
3639 /* Determine whether STMT is a builtin call that raises an SSA name
3640 to an integer power and has only one use. If so, and this is early
3641 reassociation and unsafe math optimizations are permitted, place
3642 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3643 If any of these conditions does not hold, return FALSE. */
3644
3645 static bool
3646 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3647 {
3648 tree fndecl, arg1;
3649 REAL_VALUE_TYPE c, cint;
3650
3651 if (!first_pass_instance
3652 || !flag_unsafe_math_optimizations
3653 || !is_gimple_call (stmt)
3654 || !has_single_use (gimple_call_lhs (stmt)))
3655 return false;
3656
3657 fndecl = gimple_call_fndecl (stmt);
3658
3659 if (!fndecl
3660 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3661 return false;
3662
3663 switch (DECL_FUNCTION_CODE (fndecl))
3664 {
3665 CASE_FLT_FN (BUILT_IN_POW):
3666 *base = gimple_call_arg (stmt, 0);
3667 arg1 = gimple_call_arg (stmt, 1);
3668
3669 if (TREE_CODE (arg1) != REAL_CST)
3670 return false;
3671
3672 c = TREE_REAL_CST (arg1);
3673
3674 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3675 return false;
3676
3677 *exponent = real_to_integer (&c);
3678 real_from_integer (&cint, VOIDmode, *exponent,
3679 *exponent < 0 ? -1 : 0, 0);
3680 if (!real_identical (&c, &cint))
3681 return false;
3682
3683 break;
3684
3685 CASE_FLT_FN (BUILT_IN_POWI):
3686 *base = gimple_call_arg (stmt, 0);
3687 arg1 = gimple_call_arg (stmt, 1);
3688
3689 if (!tree_fits_shwi_p (arg1))
3690 return false;
3691
3692 *exponent = tree_to_shwi (arg1);
3693 break;
3694
3695 default:
3696 return false;
3697 }
3698
3699 /* Expanding negative exponents is generally unproductive, so we don't
3700 complicate matters with those. Exponents of zero and one should
3701 have been handled by expression folding. */
3702 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3703 return false;
3704
3705 return true;
3706 }
3707
3708 /* Recursively linearize a binary expression that is the RHS of STMT.
3709 Place the operands of the expression tree in the vector named OPS. */
3710
3711 static void
3712 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3713 bool is_associative, bool set_visited)
3714 {
3715 tree binlhs = gimple_assign_rhs1 (stmt);
3716 tree binrhs = gimple_assign_rhs2 (stmt);
3717 gimple binlhsdef = NULL, binrhsdef = NULL;
3718 bool binlhsisreassoc = false;
3719 bool binrhsisreassoc = false;
3720 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3721 struct loop *loop = loop_containing_stmt (stmt);
3722 tree base = NULL_TREE;
3723 HOST_WIDE_INT exponent = 0;
3724
3725 if (set_visited)
3726 gimple_set_visited (stmt, true);
3727
3728 if (TREE_CODE (binlhs) == SSA_NAME)
3729 {
3730 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3731 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3732 && !stmt_could_throw_p (binlhsdef));
3733 }
3734
3735 if (TREE_CODE (binrhs) == SSA_NAME)
3736 {
3737 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3738 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3739 && !stmt_could_throw_p (binrhsdef));
3740 }
3741
3742 /* If the LHS is not reassociable, but the RHS is, we need to swap
3743 them. If neither is reassociable, there is nothing we can do, so
3744 just put them in the ops vector. If the LHS is reassociable,
3745 linearize it. If both are reassociable, then linearize the RHS
3746 and the LHS. */
3747
3748 if (!binlhsisreassoc)
3749 {
3750 tree temp;
3751
3752 /* If this is not a associative operation like division, give up. */
3753 if (!is_associative)
3754 {
3755 add_to_ops_vec (ops, binrhs);
3756 return;
3757 }
3758
3759 if (!binrhsisreassoc)
3760 {
3761 if (rhscode == MULT_EXPR
3762 && TREE_CODE (binrhs) == SSA_NAME
3763 && acceptable_pow_call (binrhsdef, &base, &exponent))
3764 {
3765 add_repeat_to_ops_vec (ops, base, exponent);
3766 gimple_set_visited (binrhsdef, true);
3767 }
3768 else
3769 add_to_ops_vec (ops, binrhs);
3770
3771 if (rhscode == MULT_EXPR
3772 && TREE_CODE (binlhs) == SSA_NAME
3773 && acceptable_pow_call (binlhsdef, &base, &exponent))
3774 {
3775 add_repeat_to_ops_vec (ops, base, exponent);
3776 gimple_set_visited (binlhsdef, true);
3777 }
3778 else
3779 add_to_ops_vec (ops, binlhs);
3780
3781 return;
3782 }
3783
3784 if (dump_file && (dump_flags & TDF_DETAILS))
3785 {
3786 fprintf (dump_file, "swapping operands of ");
3787 print_gimple_stmt (dump_file, stmt, 0, 0);
3788 }
3789
3790 swap_ssa_operands (stmt,
3791 gimple_assign_rhs1_ptr (stmt),
3792 gimple_assign_rhs2_ptr (stmt));
3793 update_stmt (stmt);
3794
3795 if (dump_file && (dump_flags & TDF_DETAILS))
3796 {
3797 fprintf (dump_file, " is now ");
3798 print_gimple_stmt (dump_file, stmt, 0, 0);
3799 }
3800
3801 /* We want to make it so the lhs is always the reassociative op,
3802 so swap. */
3803 temp = binlhs;
3804 binlhs = binrhs;
3805 binrhs = temp;
3806 }
3807 else if (binrhsisreassoc)
3808 {
3809 linearize_expr (stmt);
3810 binlhs = gimple_assign_rhs1 (stmt);
3811 binrhs = gimple_assign_rhs2 (stmt);
3812 }
3813
3814 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3815 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3816 rhscode, loop));
3817 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3818 is_associative, set_visited);
3819
3820 if (rhscode == MULT_EXPR
3821 && TREE_CODE (binrhs) == SSA_NAME
3822 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3823 {
3824 add_repeat_to_ops_vec (ops, base, exponent);
3825 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3826 }
3827 else
3828 add_to_ops_vec (ops, binrhs);
3829 }
3830
3831 /* Repropagate the negates back into subtracts, since no other pass
3832 currently does it. */
3833
3834 static void
3835 repropagate_negates (void)
3836 {
3837 unsigned int i = 0;
3838 tree negate;
3839
3840 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3841 {
3842 gimple user = get_single_immediate_use (negate);
3843
3844 if (!user || !is_gimple_assign (user))
3845 continue;
3846
3847 /* The negate operand can be either operand of a PLUS_EXPR
3848 (it can be the LHS if the RHS is a constant for example).
3849
3850 Force the negate operand to the RHS of the PLUS_EXPR, then
3851 transform the PLUS_EXPR into a MINUS_EXPR. */
3852 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3853 {
3854 /* If the negated operand appears on the LHS of the
3855 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3856 to force the negated operand to the RHS of the PLUS_EXPR. */
3857 if (gimple_assign_rhs1 (user) == negate)
3858 {
3859 swap_ssa_operands (user,
3860 gimple_assign_rhs1_ptr (user),
3861 gimple_assign_rhs2_ptr (user));
3862 }
3863
3864 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3865 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3866 if (gimple_assign_rhs2 (user) == negate)
3867 {
3868 tree rhs1 = gimple_assign_rhs1 (user);
3869 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3870 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3871 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3872 update_stmt (user);
3873 }
3874 }
3875 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3876 {
3877 if (gimple_assign_rhs1 (user) == negate)
3878 {
3879 /* We have
3880 x = -a
3881 y = x - b
3882 which we transform into
3883 x = a + b
3884 y = -x .
3885 This pushes down the negate which we possibly can merge
3886 into some other operation, hence insert it into the
3887 plus_negates vector. */
3888 gimple feed = SSA_NAME_DEF_STMT (negate);
3889 tree a = gimple_assign_rhs1 (feed);
3890 tree b = gimple_assign_rhs2 (user);
3891 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3892 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3893 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3894 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3895 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3896 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3897 user = gsi_stmt (gsi2);
3898 update_stmt (user);
3899 gsi_remove (&gsi, true);
3900 release_defs (feed);
3901 plus_negates.safe_push (gimple_assign_lhs (user));
3902 }
3903 else
3904 {
3905 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3906 rid of one operation. */
3907 gimple feed = SSA_NAME_DEF_STMT (negate);
3908 tree a = gimple_assign_rhs1 (feed);
3909 tree rhs1 = gimple_assign_rhs1 (user);
3910 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3911 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3912 update_stmt (gsi_stmt (gsi));
3913 }
3914 }
3915 }
3916 }
3917
3918 /* Returns true if OP is of a type for which we can do reassociation.
3919 That is for integral or non-saturating fixed-point types, and for
3920 floating point type when associative-math is enabled. */
3921
3922 static bool
3923 can_reassociate_p (tree op)
3924 {
3925 tree type = TREE_TYPE (op);
3926 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3927 || NON_SAT_FIXED_POINT_TYPE_P (type)
3928 || (flag_associative_math && FLOAT_TYPE_P (type)))
3929 return true;
3930 return false;
3931 }
3932
3933 /* Break up subtract operations in block BB.
3934
3935 We do this top down because we don't know whether the subtract is
3936 part of a possible chain of reassociation except at the top.
3937
3938 IE given
3939 d = f + g
3940 c = a + e
3941 b = c - d
3942 q = b - r
3943 k = t - q
3944
3945 we want to break up k = t - q, but we won't until we've transformed q
3946 = b - r, which won't be broken up until we transform b = c - d.
3947
3948 En passant, clear the GIMPLE visited flag on every statement
3949 and set UIDs within each basic block. */
3950
3951 static void
3952 break_up_subtract_bb (basic_block bb)
3953 {
3954 gimple_stmt_iterator gsi;
3955 basic_block son;
3956 unsigned int uid = 1;
3957
3958 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3959 {
3960 gimple stmt = gsi_stmt (gsi);
3961 gimple_set_visited (stmt, false);
3962 gimple_set_uid (stmt, uid++);
3963
3964 if (!is_gimple_assign (stmt)
3965 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3966 continue;
3967
3968 /* Look for simple gimple subtract operations. */
3969 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3970 {
3971 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3972 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3973 continue;
3974
3975 /* Check for a subtract used only in an addition. If this
3976 is the case, transform it into add of a negate for better
3977 reassociation. IE transform C = A-B into C = A + -B if C
3978 is only used in an addition. */
3979 if (should_break_up_subtract (stmt))
3980 break_up_subtract (stmt, &gsi);
3981 }
3982 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3983 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3984 plus_negates.safe_push (gimple_assign_lhs (stmt));
3985 }
3986 for (son = first_dom_son (CDI_DOMINATORS, bb);
3987 son;
3988 son = next_dom_son (CDI_DOMINATORS, son))
3989 break_up_subtract_bb (son);
3990 }
3991
3992 /* Used for repeated factor analysis. */
3993 struct repeat_factor_d
3994 {
3995 /* An SSA name that occurs in a multiply chain. */
3996 tree factor;
3997
3998 /* Cached rank of the factor. */
3999 unsigned rank;
4000
4001 /* Number of occurrences of the factor in the chain. */
4002 HOST_WIDE_INT count;
4003
4004 /* An SSA name representing the product of this factor and
4005 all factors appearing later in the repeated factor vector. */
4006 tree repr;
4007 };
4008
4009 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4010 typedef const struct repeat_factor_d *const_repeat_factor_t;
4011
4012
4013 static vec<repeat_factor> repeat_factor_vec;
4014
4015 /* Used for sorting the repeat factor vector. Sort primarily by
4016 ascending occurrence count, secondarily by descending rank. */
4017
4018 static int
4019 compare_repeat_factors (const void *x1, const void *x2)
4020 {
4021 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4022 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4023
4024 if (rf1->count != rf2->count)
4025 return rf1->count - rf2->count;
4026
4027 return rf2->rank - rf1->rank;
4028 }
4029
4030 /* Look for repeated operands in OPS in the multiply tree rooted at
4031 STMT. Replace them with an optimal sequence of multiplies and powi
4032 builtin calls, and remove the used operands from OPS. Return an
4033 SSA name representing the value of the replacement sequence. */
4034
4035 static tree
4036 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4037 {
4038 unsigned i, j, vec_len;
4039 int ii;
4040 operand_entry_t oe;
4041 repeat_factor_t rf1, rf2;
4042 repeat_factor rfnew;
4043 tree result = NULL_TREE;
4044 tree target_ssa, iter_result;
4045 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4046 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4047 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4048 gimple mul_stmt, pow_stmt;
4049
4050 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4051 target. */
4052 if (!powi_fndecl)
4053 return NULL_TREE;
4054
4055 /* Allocate the repeated factor vector. */
4056 repeat_factor_vec.create (10);
4057
4058 /* Scan the OPS vector for all SSA names in the product and build
4059 up a vector of occurrence counts for each factor. */
4060 FOR_EACH_VEC_ELT (*ops, i, oe)
4061 {
4062 if (TREE_CODE (oe->op) == SSA_NAME)
4063 {
4064 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4065 {
4066 if (rf1->factor == oe->op)
4067 {
4068 rf1->count += oe->count;
4069 break;
4070 }
4071 }
4072
4073 if (j >= repeat_factor_vec.length ())
4074 {
4075 rfnew.factor = oe->op;
4076 rfnew.rank = oe->rank;
4077 rfnew.count = oe->count;
4078 rfnew.repr = NULL_TREE;
4079 repeat_factor_vec.safe_push (rfnew);
4080 }
4081 }
4082 }
4083
4084 /* Sort the repeated factor vector by (a) increasing occurrence count,
4085 and (b) decreasing rank. */
4086 repeat_factor_vec.qsort (compare_repeat_factors);
4087
4088 /* It is generally best to combine as many base factors as possible
4089 into a product before applying __builtin_powi to the result.
4090 However, the sort order chosen for the repeated factor vector
4091 allows us to cache partial results for the product of the base
4092 factors for subsequent use. When we already have a cached partial
4093 result from a previous iteration, it is best to make use of it
4094 before looking for another __builtin_pow opportunity.
4095
4096 As an example, consider x * x * y * y * y * z * z * z * z.
4097 We want to first compose the product x * y * z, raise it to the
4098 second power, then multiply this by y * z, and finally multiply
4099 by z. This can be done in 5 multiplies provided we cache y * z
4100 for use in both expressions:
4101
4102 t1 = y * z
4103 t2 = t1 * x
4104 t3 = t2 * t2
4105 t4 = t1 * t3
4106 result = t4 * z
4107
4108 If we instead ignored the cached y * z and first multiplied by
4109 the __builtin_pow opportunity z * z, we would get the inferior:
4110
4111 t1 = y * z
4112 t2 = t1 * x
4113 t3 = t2 * t2
4114 t4 = z * z
4115 t5 = t3 * t4
4116 result = t5 * y */
4117
4118 vec_len = repeat_factor_vec.length ();
4119
4120 /* Repeatedly look for opportunities to create a builtin_powi call. */
4121 while (true)
4122 {
4123 HOST_WIDE_INT power;
4124
4125 /* First look for the largest cached product of factors from
4126 preceding iterations. If found, create a builtin_powi for
4127 it if the minimum occurrence count for its factors is at
4128 least 2, or just use this cached product as our next
4129 multiplicand if the minimum occurrence count is 1. */
4130 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4131 {
4132 if (rf1->repr && rf1->count > 0)
4133 break;
4134 }
4135
4136 if (j < vec_len)
4137 {
4138 power = rf1->count;
4139
4140 if (power == 1)
4141 {
4142 iter_result = rf1->repr;
4143
4144 if (dump_file && (dump_flags & TDF_DETAILS))
4145 {
4146 unsigned elt;
4147 repeat_factor_t rf;
4148 fputs ("Multiplying by cached product ", dump_file);
4149 for (elt = j; elt < vec_len; elt++)
4150 {
4151 rf = &repeat_factor_vec[elt];
4152 print_generic_expr (dump_file, rf->factor, 0);
4153 if (elt < vec_len - 1)
4154 fputs (" * ", dump_file);
4155 }
4156 fputs ("\n", dump_file);
4157 }
4158 }
4159 else
4160 {
4161 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4162 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4163 build_int_cst (integer_type_node,
4164 power));
4165 gimple_call_set_lhs (pow_stmt, iter_result);
4166 gimple_set_location (pow_stmt, gimple_location (stmt));
4167 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4168
4169 if (dump_file && (dump_flags & TDF_DETAILS))
4170 {
4171 unsigned elt;
4172 repeat_factor_t rf;
4173 fputs ("Building __builtin_pow call for cached product (",
4174 dump_file);
4175 for (elt = j; elt < vec_len; elt++)
4176 {
4177 rf = &repeat_factor_vec[elt];
4178 print_generic_expr (dump_file, rf->factor, 0);
4179 if (elt < vec_len - 1)
4180 fputs (" * ", dump_file);
4181 }
4182 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4183 power);
4184 }
4185 }
4186 }
4187 else
4188 {
4189 /* Otherwise, find the first factor in the repeated factor
4190 vector whose occurrence count is at least 2. If no such
4191 factor exists, there are no builtin_powi opportunities
4192 remaining. */
4193 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4194 {
4195 if (rf1->count >= 2)
4196 break;
4197 }
4198
4199 if (j >= vec_len)
4200 break;
4201
4202 power = rf1->count;
4203
4204 if (dump_file && (dump_flags & TDF_DETAILS))
4205 {
4206 unsigned elt;
4207 repeat_factor_t rf;
4208 fputs ("Building __builtin_pow call for (", dump_file);
4209 for (elt = j; elt < vec_len; elt++)
4210 {
4211 rf = &repeat_factor_vec[elt];
4212 print_generic_expr (dump_file, rf->factor, 0);
4213 if (elt < vec_len - 1)
4214 fputs (" * ", dump_file);
4215 }
4216 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4217 }
4218
4219 reassociate_stats.pows_created++;
4220
4221 /* Visit each element of the vector in reverse order (so that
4222 high-occurrence elements are visited first, and within the
4223 same occurrence count, lower-ranked elements are visited
4224 first). Form a linear product of all elements in this order
4225 whose occurrencce count is at least that of element J.
4226 Record the SSA name representing the product of each element
4227 with all subsequent elements in the vector. */
4228 if (j == vec_len - 1)
4229 rf1->repr = rf1->factor;
4230 else
4231 {
4232 for (ii = vec_len - 2; ii >= (int)j; ii--)
4233 {
4234 tree op1, op2;
4235
4236 rf1 = &repeat_factor_vec[ii];
4237 rf2 = &repeat_factor_vec[ii + 1];
4238
4239 /* Init the last factor's representative to be itself. */
4240 if (!rf2->repr)
4241 rf2->repr = rf2->factor;
4242
4243 op1 = rf1->factor;
4244 op2 = rf2->repr;
4245
4246 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4247 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4248 target_ssa,
4249 op1, op2);
4250 gimple_set_location (mul_stmt, gimple_location (stmt));
4251 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4252 rf1->repr = target_ssa;
4253
4254 /* Don't reprocess the multiply we just introduced. */
4255 gimple_set_visited (mul_stmt, true);
4256 }
4257 }
4258
4259 /* Form a call to __builtin_powi for the maximum product
4260 just formed, raised to the power obtained earlier. */
4261 rf1 = &repeat_factor_vec[j];
4262 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4263 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4264 build_int_cst (integer_type_node,
4265 power));
4266 gimple_call_set_lhs (pow_stmt, iter_result);
4267 gimple_set_location (pow_stmt, gimple_location (stmt));
4268 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4269 }
4270
4271 /* If we previously formed at least one other builtin_powi call,
4272 form the product of this one and those others. */
4273 if (result)
4274 {
4275 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4276 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4277 result, iter_result);
4278 gimple_set_location (mul_stmt, gimple_location (stmt));
4279 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4280 gimple_set_visited (mul_stmt, true);
4281 result = new_result;
4282 }
4283 else
4284 result = iter_result;
4285
4286 /* Decrement the occurrence count of each element in the product
4287 by the count found above, and remove this many copies of each
4288 factor from OPS. */
4289 for (i = j; i < vec_len; i++)
4290 {
4291 unsigned k = power;
4292 unsigned n;
4293
4294 rf1 = &repeat_factor_vec[i];
4295 rf1->count -= power;
4296
4297 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4298 {
4299 if (oe->op == rf1->factor)
4300 {
4301 if (oe->count <= k)
4302 {
4303 ops->ordered_remove (n);
4304 k -= oe->count;
4305
4306 if (k == 0)
4307 break;
4308 }
4309 else
4310 {
4311 oe->count -= k;
4312 break;
4313 }
4314 }
4315 }
4316 }
4317 }
4318
4319 /* At this point all elements in the repeated factor vector have a
4320 remaining occurrence count of 0 or 1, and those with a count of 1
4321 don't have cached representatives. Re-sort the ops vector and
4322 clean up. */
4323 ops->qsort (sort_by_operand_rank);
4324 repeat_factor_vec.release ();
4325
4326 /* Return the final product computed herein. Note that there may
4327 still be some elements with single occurrence count left in OPS;
4328 those will be handled by the normal reassociation logic. */
4329 return result;
4330 }
4331
4332 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4333
4334 static void
4335 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4336 {
4337 tree rhs1;
4338
4339 if (dump_file && (dump_flags & TDF_DETAILS))
4340 {
4341 fprintf (dump_file, "Transforming ");
4342 print_gimple_stmt (dump_file, stmt, 0, 0);
4343 }
4344
4345 rhs1 = gimple_assign_rhs1 (stmt);
4346 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4347 update_stmt (stmt);
4348 remove_visited_stmt_chain (rhs1);
4349
4350 if (dump_file && (dump_flags & TDF_DETAILS))
4351 {
4352 fprintf (dump_file, " into ");
4353 print_gimple_stmt (dump_file, stmt, 0, 0);
4354 }
4355 }
4356
4357 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4358
4359 static void
4360 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4361 tree rhs1, tree rhs2)
4362 {
4363 if (dump_file && (dump_flags & TDF_DETAILS))
4364 {
4365 fprintf (dump_file, "Transforming ");
4366 print_gimple_stmt (dump_file, stmt, 0, 0);
4367 }
4368
4369 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4370 update_stmt (gsi_stmt (*gsi));
4371 remove_visited_stmt_chain (rhs1);
4372
4373 if (dump_file && (dump_flags & TDF_DETAILS))
4374 {
4375 fprintf (dump_file, " into ");
4376 print_gimple_stmt (dump_file, stmt, 0, 0);
4377 }
4378 }
4379
4380 /* Reassociate expressions in basic block BB and its post-dominator as
4381 children. */
4382
4383 static void
4384 reassociate_bb (basic_block bb)
4385 {
4386 gimple_stmt_iterator gsi;
4387 basic_block son;
4388 gimple stmt = last_stmt (bb);
4389
4390 if (stmt && !gimple_visited_p (stmt))
4391 maybe_optimize_range_tests (stmt);
4392
4393 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4394 {
4395 stmt = gsi_stmt (gsi);
4396
4397 if (is_gimple_assign (stmt)
4398 && !stmt_could_throw_p (stmt))
4399 {
4400 tree lhs, rhs1, rhs2;
4401 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4402
4403 /* If this is not a gimple binary expression, there is
4404 nothing for us to do with it. */
4405 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4406 continue;
4407
4408 /* If this was part of an already processed statement,
4409 we don't need to touch it again. */
4410 if (gimple_visited_p (stmt))
4411 {
4412 /* This statement might have become dead because of previous
4413 reassociations. */
4414 if (has_zero_uses (gimple_get_lhs (stmt)))
4415 {
4416 gsi_remove (&gsi, true);
4417 release_defs (stmt);
4418 /* We might end up removing the last stmt above which
4419 places the iterator to the end of the sequence.
4420 Reset it to the last stmt in this case which might
4421 be the end of the sequence as well if we removed
4422 the last statement of the sequence. In which case
4423 we need to bail out. */
4424 if (gsi_end_p (gsi))
4425 {
4426 gsi = gsi_last_bb (bb);
4427 if (gsi_end_p (gsi))
4428 break;
4429 }
4430 }
4431 continue;
4432 }
4433
4434 lhs = gimple_assign_lhs (stmt);
4435 rhs1 = gimple_assign_rhs1 (stmt);
4436 rhs2 = gimple_assign_rhs2 (stmt);
4437
4438 /* For non-bit or min/max operations we can't associate
4439 all types. Verify that here. */
4440 if (rhs_code != BIT_IOR_EXPR
4441 && rhs_code != BIT_AND_EXPR
4442 && rhs_code != BIT_XOR_EXPR
4443 && rhs_code != MIN_EXPR
4444 && rhs_code != MAX_EXPR
4445 && (!can_reassociate_p (lhs)
4446 || !can_reassociate_p (rhs1)
4447 || !can_reassociate_p (rhs2)))
4448 continue;
4449
4450 if (associative_tree_code (rhs_code))
4451 {
4452 auto_vec<operand_entry_t> ops;
4453 tree powi_result = NULL_TREE;
4454
4455 /* There may be no immediate uses left by the time we
4456 get here because we may have eliminated them all. */
4457 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4458 continue;
4459
4460 gimple_set_visited (stmt, true);
4461 linearize_expr_tree (&ops, stmt, true, true);
4462 ops.qsort (sort_by_operand_rank);
4463 optimize_ops_list (rhs_code, &ops);
4464 if (undistribute_ops_list (rhs_code, &ops,
4465 loop_containing_stmt (stmt)))
4466 {
4467 ops.qsort (sort_by_operand_rank);
4468 optimize_ops_list (rhs_code, &ops);
4469 }
4470
4471 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4472 optimize_range_tests (rhs_code, &ops);
4473
4474 if (first_pass_instance
4475 && rhs_code == MULT_EXPR
4476 && flag_unsafe_math_optimizations)
4477 powi_result = attempt_builtin_powi (stmt, &ops);
4478
4479 /* If the operand vector is now empty, all operands were
4480 consumed by the __builtin_powi optimization. */
4481 if (ops.length () == 0)
4482 transform_stmt_to_copy (&gsi, stmt, powi_result);
4483 else if (ops.length () == 1)
4484 {
4485 tree last_op = ops.last ()->op;
4486
4487 if (powi_result)
4488 transform_stmt_to_multiply (&gsi, stmt, last_op,
4489 powi_result);
4490 else
4491 transform_stmt_to_copy (&gsi, stmt, last_op);
4492 }
4493 else
4494 {
4495 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4496 int ops_num = ops.length ();
4497 int width = get_reassociation_width (ops_num, rhs_code, mode);
4498 tree new_lhs = lhs;
4499
4500 if (dump_file && (dump_flags & TDF_DETAILS))
4501 fprintf (dump_file,
4502 "Width = %d was chosen for reassociation\n", width);
4503
4504 if (width > 1
4505 && ops.length () > 3)
4506 rewrite_expr_tree_parallel (stmt, width, ops);
4507 else
4508 {
4509 /* When there are three operands left, we want
4510 to make sure the ones that get the double
4511 binary op are chosen wisely. */
4512 int len = ops.length ();
4513 if (len >= 3)
4514 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4515
4516 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4517 powi_result != NULL);
4518 }
4519
4520 /* If we combined some repeated factors into a
4521 __builtin_powi call, multiply that result by the
4522 reassociated operands. */
4523 if (powi_result)
4524 {
4525 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4526 tree type = TREE_TYPE (lhs);
4527 tree target_ssa = make_temp_ssa_name (type, NULL,
4528 "reassocpow");
4529 gimple_set_lhs (lhs_stmt, target_ssa);
4530 update_stmt (lhs_stmt);
4531 if (lhs != new_lhs)
4532 target_ssa = new_lhs;
4533 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4534 powi_result,
4535 target_ssa);
4536 gimple_set_location (mul_stmt, gimple_location (stmt));
4537 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4538 }
4539 }
4540 }
4541 }
4542 }
4543 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4544 son;
4545 son = next_dom_son (CDI_POST_DOMINATORS, son))
4546 reassociate_bb (son);
4547 }
4548
4549 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4550 void debug_ops_vector (vec<operand_entry_t> ops);
4551
4552 /* Dump the operand entry vector OPS to FILE. */
4553
4554 void
4555 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4556 {
4557 operand_entry_t oe;
4558 unsigned int i;
4559
4560 FOR_EACH_VEC_ELT (ops, i, oe)
4561 {
4562 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4563 print_generic_expr (file, oe->op, 0);
4564 }
4565 }
4566
4567 /* Dump the operand entry vector OPS to STDERR. */
4568
4569 DEBUG_FUNCTION void
4570 debug_ops_vector (vec<operand_entry_t> ops)
4571 {
4572 dump_ops_vector (stderr, ops);
4573 }
4574
4575 static void
4576 do_reassoc (void)
4577 {
4578 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4579 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4580 }
4581
4582 /* Initialize the reassociation pass. */
4583
4584 static void
4585 init_reassoc (void)
4586 {
4587 int i;
4588 long rank = 2;
4589 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4590
4591 /* Find the loops, so that we can prevent moving calculations in
4592 them. */
4593 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4594
4595 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4596
4597 operand_entry_pool = create_alloc_pool ("operand entry pool",
4598 sizeof (struct operand_entry), 30);
4599 next_operand_entry_id = 0;
4600
4601 /* Reverse RPO (Reverse Post Order) will give us something where
4602 deeper loops come later. */
4603 pre_and_rev_post_order_compute (NULL, bbs, false);
4604 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4605 operand_rank = pointer_map_create ();
4606
4607 /* Give each default definition a distinct rank. This includes
4608 parameters and the static chain. Walk backwards over all
4609 SSA names so that we get proper rank ordering according
4610 to tree_swap_operands_p. */
4611 for (i = num_ssa_names - 1; i > 0; --i)
4612 {
4613 tree name = ssa_name (i);
4614 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4615 insert_operand_rank (name, ++rank);
4616 }
4617
4618 /* Set up rank for each BB */
4619 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4620 bb_rank[bbs[i]] = ++rank << 16;
4621
4622 free (bbs);
4623 calculate_dominance_info (CDI_POST_DOMINATORS);
4624 plus_negates = vNULL;
4625 }
4626
4627 /* Cleanup after the reassociation pass, and print stats if
4628 requested. */
4629
4630 static void
4631 fini_reassoc (void)
4632 {
4633 statistics_counter_event (cfun, "Linearized",
4634 reassociate_stats.linearized);
4635 statistics_counter_event (cfun, "Constants eliminated",
4636 reassociate_stats.constants_eliminated);
4637 statistics_counter_event (cfun, "Ops eliminated",
4638 reassociate_stats.ops_eliminated);
4639 statistics_counter_event (cfun, "Statements rewritten",
4640 reassociate_stats.rewritten);
4641 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4642 reassociate_stats.pows_encountered);
4643 statistics_counter_event (cfun, "Built-in powi calls created",
4644 reassociate_stats.pows_created);
4645
4646 pointer_map_destroy (operand_rank);
4647 free_alloc_pool (operand_entry_pool);
4648 free (bb_rank);
4649 plus_negates.release ();
4650 free_dominance_info (CDI_POST_DOMINATORS);
4651 loop_optimizer_finalize ();
4652 }
4653
4654 /* Gate and execute functions for Reassociation. */
4655
4656 static unsigned int
4657 execute_reassoc (void)
4658 {
4659 init_reassoc ();
4660
4661 do_reassoc ();
4662 repropagate_negates ();
4663
4664 fini_reassoc ();
4665 return 0;
4666 }
4667
4668 static bool
4669 gate_tree_ssa_reassoc (void)
4670 {
4671 return flag_tree_reassoc != 0;
4672 }
4673
4674 namespace {
4675
4676 const pass_data pass_data_reassoc =
4677 {
4678 GIMPLE_PASS, /* type */
4679 "reassoc", /* name */
4680 OPTGROUP_NONE, /* optinfo_flags */
4681 true, /* has_gate */
4682 true, /* has_execute */
4683 TV_TREE_REASSOC, /* tv_id */
4684 ( PROP_cfg | PROP_ssa ), /* properties_required */
4685 0, /* properties_provided */
4686 0, /* properties_destroyed */
4687 0, /* todo_flags_start */
4688 ( TODO_verify_ssa
4689 | TODO_update_ssa_only_virtuals
4690 | TODO_verify_flow ), /* todo_flags_finish */
4691 };
4692
4693 class pass_reassoc : public gimple_opt_pass
4694 {
4695 public:
4696 pass_reassoc (gcc::context *ctxt)
4697 : gimple_opt_pass (pass_data_reassoc, ctxt)
4698 {}
4699
4700 /* opt_pass methods: */
4701 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4702 bool gate () { return gate_tree_ssa_reassoc (); }
4703 unsigned int execute () { return execute_reassoc (); }
4704
4705 }; // class pass_reassoc
4706
4707 } // anon namespace
4708
4709 gimple_opt_pass *
4710 make_pass_reassoc (gcc::context *ctxt)
4711 {
4712 return new pass_reassoc (ctxt);
4713 }