ipa-cp.c (ipcp_cloning_candidate_p): Use opt_for_fn.
[gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "cfganal.h"
41 #include "basic-block.h"
42 #include "gimple-pretty-print.h"
43 #include "tree-inline.h"
44 #include "hash-map.h"
45 #include "tree-ssa-alias.h"
46 #include "internal-fn.h"
47 #include "gimple-fold.h"
48 #include "tree-eh.h"
49 #include "gimple-expr.h"
50 #include "is-a.h"
51 #include "gimple.h"
52 #include "gimple-iterator.h"
53 #include "gimplify-me.h"
54 #include "gimple-ssa.h"
55 #include "tree-cfg.h"
56 #include "tree-phinodes.h"
57 #include "ssa-iterators.h"
58 #include "stringpool.h"
59 #include "tree-ssanames.h"
60 #include "tree-ssa-loop-niter.h"
61 #include "tree-ssa-loop.h"
62 #include "expr.h"
63 #include "tree-dfa.h"
64 #include "tree-ssa.h"
65 #include "tree-iterator.h"
66 #include "tree-pass.h"
67 #include "alloc-pool.h"
68 #include "langhooks.h"
69 #include "cfgloop.h"
70 #include "flags.h"
71 #include "target.h"
72 #include "params.h"
73 #include "diagnostic-core.h"
74 #include "builtins.h"
75 #include "gimplify.h"
76 #include "insn-codes.h"
77 #include "optabs.h"
78
79 /* This is a simple global reassociation pass. It is, in part, based
80 on the LLVM pass of the same name (They do some things more/less
81 than we do, in different orders, etc).
82
83 It consists of five steps:
84
85 1. Breaking up subtract operations into addition + negate, where
86 it would promote the reassociation of adds.
87
88 2. Left linearization of the expression trees, so that (A+B)+(C+D)
89 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
90 During linearization, we place the operands of the binary
91 expressions into a vector of operand_entry_t
92
93 3. Optimization of the operand lists, eliminating things like a +
94 -a, a & a, etc.
95
96 3a. Combine repeated factors with the same occurrence counts
97 into a __builtin_powi call that will later be optimized into
98 an optimal number of multiplies.
99
100 4. Rewrite the expression trees we linearized and optimized so
101 they are in proper rank order.
102
103 5. Repropagate negates, as nothing else will clean it up ATM.
104
105 A bit of theory on #4, since nobody seems to write anything down
106 about why it makes sense to do it the way they do it:
107
108 We could do this much nicer theoretically, but don't (for reasons
109 explained after how to do it theoretically nice :P).
110
111 In order to promote the most redundancy elimination, you want
112 binary expressions whose operands are the same rank (or
113 preferably, the same value) exposed to the redundancy eliminator,
114 for possible elimination.
115
116 So the way to do this if we really cared, is to build the new op
117 tree from the leaves to the roots, merging as you go, and putting the
118 new op on the end of the worklist, until you are left with one
119 thing on the worklist.
120
121 IE if you have to rewrite the following set of operands (listed with
122 rank in parentheses), with opcode PLUS_EXPR:
123
124 a (1), b (1), c (1), d (2), e (2)
125
126
127 We start with our merge worklist empty, and the ops list with all of
128 those on it.
129
130 You want to first merge all leaves of the same rank, as much as
131 possible.
132
133 So first build a binary op of
134
135 mergetmp = a + b, and put "mergetmp" on the merge worklist.
136
137 Because there is no three operand form of PLUS_EXPR, c is not going to
138 be exposed to redundancy elimination as a rank 1 operand.
139
140 So you might as well throw it on the merge worklist (you could also
141 consider it to now be a rank two operand, and merge it with d and e,
142 but in this case, you then have evicted e from a binary op. So at
143 least in this situation, you can't win.)
144
145 Then build a binary op of d + e
146 mergetmp2 = d + e
147
148 and put mergetmp2 on the merge worklist.
149
150 so merge worklist = {mergetmp, c, mergetmp2}
151
152 Continue building binary ops of these operations until you have only
153 one operation left on the worklist.
154
155 So we have
156
157 build binary op
158 mergetmp3 = mergetmp + c
159
160 worklist = {mergetmp2, mergetmp3}
161
162 mergetmp4 = mergetmp2 + mergetmp3
163
164 worklist = {mergetmp4}
165
166 because we have one operation left, we can now just set the original
167 statement equal to the result of that operation.
168
169 This will at least expose a + b and d + e to redundancy elimination
170 as binary operations.
171
172 For extra points, you can reuse the old statements to build the
173 mergetmps, since you shouldn't run out.
174
175 So why don't we do this?
176
177 Because it's expensive, and rarely will help. Most trees we are
178 reassociating have 3 or less ops. If they have 2 ops, they already
179 will be written into a nice single binary op. If you have 3 ops, a
180 single simple check suffices to tell you whether the first two are of the
181 same rank. If so, you know to order it
182
183 mergetmp = op1 + op2
184 newstmt = mergetmp + op3
185
186 instead of
187 mergetmp = op2 + op3
188 newstmt = mergetmp + op1
189
190 If all three are of the same rank, you can't expose them all in a
191 single binary operator anyway, so the above is *still* the best you
192 can do.
193
194 Thus, this is what we do. When we have three ops left, we check to see
195 what order to put them in, and call it a day. As a nod to vector sum
196 reduction, we check if any of the ops are really a phi node that is a
197 destructive update for the associating op, and keep the destructive
198 update together for vector sum reduction recognition. */
199
200
201 /* Statistics */
202 static struct
203 {
204 int linearized;
205 int constants_eliminated;
206 int ops_eliminated;
207 int rewritten;
208 int pows_encountered;
209 int pows_created;
210 } reassociate_stats;
211
212 /* Operator, rank pair. */
213 typedef struct operand_entry
214 {
215 unsigned int rank;
216 int id;
217 tree op;
218 unsigned int count;
219 } *operand_entry_t;
220
221 static alloc_pool operand_entry_pool;
222
223 /* This is used to assign a unique ID to each struct operand_entry
224 so that qsort results are identical on different hosts. */
225 static int next_operand_entry_id;
226
227 /* Starting rank number for a given basic block, so that we can rank
228 operations using unmovable instructions in that BB based on the bb
229 depth. */
230 static long *bb_rank;
231
232 /* Operand->rank hashtable. */
233 static hash_map<tree, long> *operand_rank;
234
235 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
236 all basic blocks the CFG should be adjusted - basic blocks
237 split right after that SSA_NAME's definition statement and before
238 the only use, which must be a bit ior. */
239 static vec<tree> reassoc_branch_fixups;
240
241 /* Forward decls. */
242 static long get_rank (tree);
243 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
244
245 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
246 possibly added by gsi_remove. */
247
248 bool
249 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
250 {
251 gimple stmt = gsi_stmt (*gsi);
252
253 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
254 return gsi_remove (gsi, true);
255
256 gimple_stmt_iterator prev = *gsi;
257 gsi_prev (&prev);
258 unsigned uid = gimple_uid (stmt);
259 basic_block bb = gimple_bb (stmt);
260 bool ret = gsi_remove (gsi, true);
261 if (!gsi_end_p (prev))
262 gsi_next (&prev);
263 else
264 prev = gsi_start_bb (bb);
265 gimple end_stmt = gsi_stmt (*gsi);
266 while ((stmt = gsi_stmt (prev)) != end_stmt)
267 {
268 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
269 gimple_set_uid (stmt, uid);
270 gsi_next (&prev);
271 }
272 return ret;
273 }
274
275 /* Bias amount for loop-carried phis. We want this to be larger than
276 the depth of any reassociation tree we can see, but not larger than
277 the rank difference between two blocks. */
278 #define PHI_LOOP_BIAS (1 << 15)
279
280 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
281 an innermost loop, and the phi has only a single use which is inside
282 the loop, then the rank is the block rank of the loop latch plus an
283 extra bias for the loop-carried dependence. This causes expressions
284 calculated into an accumulator variable to be independent for each
285 iteration of the loop. If STMT is some other phi, the rank is the
286 block rank of its containing block. */
287 static long
288 phi_rank (gimple stmt)
289 {
290 basic_block bb = gimple_bb (stmt);
291 struct loop *father = bb->loop_father;
292 tree res;
293 unsigned i;
294 use_operand_p use;
295 gimple use_stmt;
296
297 /* We only care about real loops (those with a latch). */
298 if (!father->latch)
299 return bb_rank[bb->index];
300
301 /* Interesting phis must be in headers of innermost loops. */
302 if (bb != father->header
303 || father->inner)
304 return bb_rank[bb->index];
305
306 /* Ignore virtual SSA_NAMEs. */
307 res = gimple_phi_result (stmt);
308 if (virtual_operand_p (res))
309 return bb_rank[bb->index];
310
311 /* The phi definition must have a single use, and that use must be
312 within the loop. Otherwise this isn't an accumulator pattern. */
313 if (!single_imm_use (res, &use, &use_stmt)
314 || gimple_bb (use_stmt)->loop_father != father)
315 return bb_rank[bb->index];
316
317 /* Look for phi arguments from within the loop. If found, bias this phi. */
318 for (i = 0; i < gimple_phi_num_args (stmt); i++)
319 {
320 tree arg = gimple_phi_arg_def (stmt, i);
321 if (TREE_CODE (arg) == SSA_NAME
322 && !SSA_NAME_IS_DEFAULT_DEF (arg))
323 {
324 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
325 if (gimple_bb (def_stmt)->loop_father == father)
326 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
327 }
328 }
329
330 /* Must be an uninteresting phi. */
331 return bb_rank[bb->index];
332 }
333
334 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
335 loop-carried dependence of an innermost loop, return TRUE; else
336 return FALSE. */
337 static bool
338 loop_carried_phi (tree exp)
339 {
340 gimple phi_stmt;
341 long block_rank;
342
343 if (TREE_CODE (exp) != SSA_NAME
344 || SSA_NAME_IS_DEFAULT_DEF (exp))
345 return false;
346
347 phi_stmt = SSA_NAME_DEF_STMT (exp);
348
349 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
350 return false;
351
352 /* Non-loop-carried phis have block rank. Loop-carried phis have
353 an additional bias added in. If this phi doesn't have block rank,
354 it's biased and should not be propagated. */
355 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
356
357 if (phi_rank (phi_stmt) != block_rank)
358 return true;
359
360 return false;
361 }
362
363 /* Return the maximum of RANK and the rank that should be propagated
364 from expression OP. For most operands, this is just the rank of OP.
365 For loop-carried phis, the value is zero to avoid undoing the bias
366 in favor of the phi. */
367 static long
368 propagate_rank (long rank, tree op)
369 {
370 long op_rank;
371
372 if (loop_carried_phi (op))
373 return rank;
374
375 op_rank = get_rank (op);
376
377 return MAX (rank, op_rank);
378 }
379
380 /* Look up the operand rank structure for expression E. */
381
382 static inline long
383 find_operand_rank (tree e)
384 {
385 long *slot = operand_rank->get (e);
386 return slot ? *slot : -1;
387 }
388
389 /* Insert {E,RANK} into the operand rank hashtable. */
390
391 static inline void
392 insert_operand_rank (tree e, long rank)
393 {
394 gcc_assert (rank > 0);
395 gcc_assert (!operand_rank->put (e, rank));
396 }
397
398 /* Given an expression E, return the rank of the expression. */
399
400 static long
401 get_rank (tree e)
402 {
403 /* Constants have rank 0. */
404 if (is_gimple_min_invariant (e))
405 return 0;
406
407 /* SSA_NAME's have the rank of the expression they are the result
408 of.
409 For globals and uninitialized values, the rank is 0.
410 For function arguments, use the pre-setup rank.
411 For PHI nodes, stores, asm statements, etc, we use the rank of
412 the BB.
413 For simple operations, the rank is the maximum rank of any of
414 its operands, or the bb_rank, whichever is less.
415 I make no claims that this is optimal, however, it gives good
416 results. */
417
418 /* We make an exception to the normal ranking system to break
419 dependences of accumulator variables in loops. Suppose we
420 have a simple one-block loop containing:
421
422 x_1 = phi(x_0, x_2)
423 b = a + x_1
424 c = b + d
425 x_2 = c + e
426
427 As shown, each iteration of the calculation into x is fully
428 dependent upon the iteration before it. We would prefer to
429 see this in the form:
430
431 x_1 = phi(x_0, x_2)
432 b = a + d
433 c = b + e
434 x_2 = c + x_1
435
436 If the loop is unrolled, the calculations of b and c from
437 different iterations can be interleaved.
438
439 To obtain this result during reassociation, we bias the rank
440 of the phi definition x_1 upward, when it is recognized as an
441 accumulator pattern. The artificial rank causes it to be
442 added last, providing the desired independence. */
443
444 if (TREE_CODE (e) == SSA_NAME)
445 {
446 gimple stmt;
447 long rank;
448 int i, n;
449 tree op;
450
451 if (SSA_NAME_IS_DEFAULT_DEF (e))
452 return find_operand_rank (e);
453
454 stmt = SSA_NAME_DEF_STMT (e);
455 if (gimple_code (stmt) == GIMPLE_PHI)
456 return phi_rank (stmt);
457
458 if (!is_gimple_assign (stmt)
459 || gimple_vdef (stmt))
460 return bb_rank[gimple_bb (stmt)->index];
461
462 /* If we already have a rank for this expression, use that. */
463 rank = find_operand_rank (e);
464 if (rank != -1)
465 return rank;
466
467 /* Otherwise, find the maximum rank for the operands. As an
468 exception, remove the bias from loop-carried phis when propagating
469 the rank so that dependent operations are not also biased. */
470 rank = 0;
471 if (gimple_assign_single_p (stmt))
472 {
473 tree rhs = gimple_assign_rhs1 (stmt);
474 n = TREE_OPERAND_LENGTH (rhs);
475 if (n == 0)
476 rank = propagate_rank (rank, rhs);
477 else
478 {
479 for (i = 0; i < n; i++)
480 {
481 op = TREE_OPERAND (rhs, i);
482
483 if (op != NULL_TREE)
484 rank = propagate_rank (rank, op);
485 }
486 }
487 }
488 else
489 {
490 n = gimple_num_ops (stmt);
491 for (i = 1; i < n; i++)
492 {
493 op = gimple_op (stmt, i);
494 gcc_assert (op);
495 rank = propagate_rank (rank, op);
496 }
497 }
498
499 if (dump_file && (dump_flags & TDF_DETAILS))
500 {
501 fprintf (dump_file, "Rank for ");
502 print_generic_expr (dump_file, e, 0);
503 fprintf (dump_file, " is %ld\n", (rank + 1));
504 }
505
506 /* Note the rank in the hashtable so we don't recompute it. */
507 insert_operand_rank (e, (rank + 1));
508 return (rank + 1);
509 }
510
511 /* Globals, etc, are rank 0 */
512 return 0;
513 }
514
515
516 /* We want integer ones to end up last no matter what, since they are
517 the ones we can do the most with. */
518 #define INTEGER_CONST_TYPE 1 << 3
519 #define FLOAT_CONST_TYPE 1 << 2
520 #define OTHER_CONST_TYPE 1 << 1
521
522 /* Classify an invariant tree into integer, float, or other, so that
523 we can sort them to be near other constants of the same type. */
524 static inline int
525 constant_type (tree t)
526 {
527 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
528 return INTEGER_CONST_TYPE;
529 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
530 return FLOAT_CONST_TYPE;
531 else
532 return OTHER_CONST_TYPE;
533 }
534
535 /* qsort comparison function to sort operand entries PA and PB by rank
536 so that the sorted array is ordered by rank in decreasing order. */
537 static int
538 sort_by_operand_rank (const void *pa, const void *pb)
539 {
540 const operand_entry_t oea = *(const operand_entry_t *)pa;
541 const operand_entry_t oeb = *(const operand_entry_t *)pb;
542
543 /* It's nicer for optimize_expression if constants that are likely
544 to fold when added/multiplied//whatever are put next to each
545 other. Since all constants have rank 0, order them by type. */
546 if (oeb->rank == 0 && oea->rank == 0)
547 {
548 if (constant_type (oeb->op) != constant_type (oea->op))
549 return constant_type (oeb->op) - constant_type (oea->op);
550 else
551 /* To make sorting result stable, we use unique IDs to determine
552 order. */
553 return oeb->id - oea->id;
554 }
555
556 /* Lastly, make sure the versions that are the same go next to each
557 other. */
558 if ((oeb->rank - oea->rank == 0)
559 && TREE_CODE (oea->op) == SSA_NAME
560 && TREE_CODE (oeb->op) == SSA_NAME)
561 {
562 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
563 versions of removed SSA_NAMEs, so if possible, prefer to sort
564 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
565 See PR60418. */
566 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
567 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
568 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
569 {
570 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
571 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
572 basic_block bba = gimple_bb (stmta);
573 basic_block bbb = gimple_bb (stmtb);
574 if (bbb != bba)
575 {
576 if (bb_rank[bbb->index] != bb_rank[bba->index])
577 return bb_rank[bbb->index] - bb_rank[bba->index];
578 }
579 else
580 {
581 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
582 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
583 if (da != db)
584 return da ? 1 : -1;
585 }
586 }
587
588 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
589 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
590 else
591 return oeb->id - oea->id;
592 }
593
594 if (oeb->rank != oea->rank)
595 return oeb->rank - oea->rank;
596 else
597 return oeb->id - oea->id;
598 }
599
600 /* Add an operand entry to *OPS for the tree operand OP. */
601
602 static void
603 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
604 {
605 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
606
607 oe->op = op;
608 oe->rank = get_rank (op);
609 oe->id = next_operand_entry_id++;
610 oe->count = 1;
611 ops->safe_push (oe);
612 }
613
614 /* Add an operand entry to *OPS for the tree operand OP with repeat
615 count REPEAT. */
616
617 static void
618 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
619 HOST_WIDE_INT repeat)
620 {
621 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
622
623 oe->op = op;
624 oe->rank = get_rank (op);
625 oe->id = next_operand_entry_id++;
626 oe->count = repeat;
627 ops->safe_push (oe);
628
629 reassociate_stats.pows_encountered++;
630 }
631
632 /* Return true if STMT is reassociable operation containing a binary
633 operation with tree code CODE, and is inside LOOP. */
634
635 static bool
636 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
637 {
638 basic_block bb = gimple_bb (stmt);
639
640 if (gimple_bb (stmt) == NULL)
641 return false;
642
643 if (!flow_bb_inside_loop_p (loop, bb))
644 return false;
645
646 if (is_gimple_assign (stmt)
647 && gimple_assign_rhs_code (stmt) == code
648 && has_single_use (gimple_assign_lhs (stmt)))
649 return true;
650
651 return false;
652 }
653
654
655 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
656 operand of the negate operation. Otherwise, return NULL. */
657
658 static tree
659 get_unary_op (tree name, enum tree_code opcode)
660 {
661 gimple stmt = SSA_NAME_DEF_STMT (name);
662
663 if (!is_gimple_assign (stmt))
664 return NULL_TREE;
665
666 if (gimple_assign_rhs_code (stmt) == opcode)
667 return gimple_assign_rhs1 (stmt);
668 return NULL_TREE;
669 }
670
671 /* If CURR and LAST are a pair of ops that OPCODE allows us to
672 eliminate through equivalences, do so, remove them from OPS, and
673 return true. Otherwise, return false. */
674
675 static bool
676 eliminate_duplicate_pair (enum tree_code opcode,
677 vec<operand_entry_t> *ops,
678 bool *all_done,
679 unsigned int i,
680 operand_entry_t curr,
681 operand_entry_t last)
682 {
683
684 /* If we have two of the same op, and the opcode is & |, min, or max,
685 we can eliminate one of them.
686 If we have two of the same op, and the opcode is ^, we can
687 eliminate both of them. */
688
689 if (last && last->op == curr->op)
690 {
691 switch (opcode)
692 {
693 case MAX_EXPR:
694 case MIN_EXPR:
695 case BIT_IOR_EXPR:
696 case BIT_AND_EXPR:
697 if (dump_file && (dump_flags & TDF_DETAILS))
698 {
699 fprintf (dump_file, "Equivalence: ");
700 print_generic_expr (dump_file, curr->op, 0);
701 fprintf (dump_file, " [&|minmax] ");
702 print_generic_expr (dump_file, last->op, 0);
703 fprintf (dump_file, " -> ");
704 print_generic_stmt (dump_file, last->op, 0);
705 }
706
707 ops->ordered_remove (i);
708 reassociate_stats.ops_eliminated ++;
709
710 return true;
711
712 case BIT_XOR_EXPR:
713 if (dump_file && (dump_flags & TDF_DETAILS))
714 {
715 fprintf (dump_file, "Equivalence: ");
716 print_generic_expr (dump_file, curr->op, 0);
717 fprintf (dump_file, " ^ ");
718 print_generic_expr (dump_file, last->op, 0);
719 fprintf (dump_file, " -> nothing\n");
720 }
721
722 reassociate_stats.ops_eliminated += 2;
723
724 if (ops->length () == 2)
725 {
726 ops->create (0);
727 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
728 *all_done = true;
729 }
730 else
731 {
732 ops->ordered_remove (i-1);
733 ops->ordered_remove (i-1);
734 }
735
736 return true;
737
738 default:
739 break;
740 }
741 }
742 return false;
743 }
744
745 static vec<tree> plus_negates;
746
747 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
748 expression, look in OPS for a corresponding positive operation to cancel
749 it out. If we find one, remove the other from OPS, replace
750 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
751 return false. */
752
753 static bool
754 eliminate_plus_minus_pair (enum tree_code opcode,
755 vec<operand_entry_t> *ops,
756 unsigned int currindex,
757 operand_entry_t curr)
758 {
759 tree negateop;
760 tree notop;
761 unsigned int i;
762 operand_entry_t oe;
763
764 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
765 return false;
766
767 negateop = get_unary_op (curr->op, NEGATE_EXPR);
768 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
769 if (negateop == NULL_TREE && notop == NULL_TREE)
770 return false;
771
772 /* Any non-negated version will have a rank that is one less than
773 the current rank. So once we hit those ranks, if we don't find
774 one, we can stop. */
775
776 for (i = currindex + 1;
777 ops->iterate (i, &oe)
778 && oe->rank >= curr->rank - 1 ;
779 i++)
780 {
781 if (oe->op == negateop)
782 {
783
784 if (dump_file && (dump_flags & TDF_DETAILS))
785 {
786 fprintf (dump_file, "Equivalence: ");
787 print_generic_expr (dump_file, negateop, 0);
788 fprintf (dump_file, " + -");
789 print_generic_expr (dump_file, oe->op, 0);
790 fprintf (dump_file, " -> 0\n");
791 }
792
793 ops->ordered_remove (i);
794 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
795 ops->ordered_remove (currindex);
796 reassociate_stats.ops_eliminated ++;
797
798 return true;
799 }
800 else if (oe->op == notop)
801 {
802 tree op_type = TREE_TYPE (oe->op);
803
804 if (dump_file && (dump_flags & TDF_DETAILS))
805 {
806 fprintf (dump_file, "Equivalence: ");
807 print_generic_expr (dump_file, notop, 0);
808 fprintf (dump_file, " + ~");
809 print_generic_expr (dump_file, oe->op, 0);
810 fprintf (dump_file, " -> -1\n");
811 }
812
813 ops->ordered_remove (i);
814 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
815 ops->ordered_remove (currindex);
816 reassociate_stats.ops_eliminated ++;
817
818 return true;
819 }
820 }
821
822 /* CURR->OP is a negate expr in a plus expr: save it for later
823 inspection in repropagate_negates(). */
824 if (negateop != NULL_TREE)
825 plus_negates.safe_push (curr->op);
826
827 return false;
828 }
829
830 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
831 bitwise not expression, look in OPS for a corresponding operand to
832 cancel it out. If we find one, remove the other from OPS, replace
833 OPS[CURRINDEX] with 0, and return true. Otherwise, return
834 false. */
835
836 static bool
837 eliminate_not_pairs (enum tree_code opcode,
838 vec<operand_entry_t> *ops,
839 unsigned int currindex,
840 operand_entry_t curr)
841 {
842 tree notop;
843 unsigned int i;
844 operand_entry_t oe;
845
846 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
847 || TREE_CODE (curr->op) != SSA_NAME)
848 return false;
849
850 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
851 if (notop == NULL_TREE)
852 return false;
853
854 /* Any non-not version will have a rank that is one less than
855 the current rank. So once we hit those ranks, if we don't find
856 one, we can stop. */
857
858 for (i = currindex + 1;
859 ops->iterate (i, &oe)
860 && oe->rank >= curr->rank - 1;
861 i++)
862 {
863 if (oe->op == notop)
864 {
865 if (dump_file && (dump_flags & TDF_DETAILS))
866 {
867 fprintf (dump_file, "Equivalence: ");
868 print_generic_expr (dump_file, notop, 0);
869 if (opcode == BIT_AND_EXPR)
870 fprintf (dump_file, " & ~");
871 else if (opcode == BIT_IOR_EXPR)
872 fprintf (dump_file, " | ~");
873 print_generic_expr (dump_file, oe->op, 0);
874 if (opcode == BIT_AND_EXPR)
875 fprintf (dump_file, " -> 0\n");
876 else if (opcode == BIT_IOR_EXPR)
877 fprintf (dump_file, " -> -1\n");
878 }
879
880 if (opcode == BIT_AND_EXPR)
881 oe->op = build_zero_cst (TREE_TYPE (oe->op));
882 else if (opcode == BIT_IOR_EXPR)
883 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
884
885 reassociate_stats.ops_eliminated += ops->length () - 1;
886 ops->truncate (0);
887 ops->quick_push (oe);
888 return true;
889 }
890 }
891
892 return false;
893 }
894
895 /* Use constant value that may be present in OPS to try to eliminate
896 operands. Note that this function is only really used when we've
897 eliminated ops for other reasons, or merged constants. Across
898 single statements, fold already does all of this, plus more. There
899 is little point in duplicating logic, so I've only included the
900 identities that I could ever construct testcases to trigger. */
901
902 static void
903 eliminate_using_constants (enum tree_code opcode,
904 vec<operand_entry_t> *ops)
905 {
906 operand_entry_t oelast = ops->last ();
907 tree type = TREE_TYPE (oelast->op);
908
909 if (oelast->rank == 0
910 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
911 {
912 switch (opcode)
913 {
914 case BIT_AND_EXPR:
915 if (integer_zerop (oelast->op))
916 {
917 if (ops->length () != 1)
918 {
919 if (dump_file && (dump_flags & TDF_DETAILS))
920 fprintf (dump_file, "Found & 0, removing all other ops\n");
921
922 reassociate_stats.ops_eliminated += ops->length () - 1;
923
924 ops->truncate (0);
925 ops->quick_push (oelast);
926 return;
927 }
928 }
929 else if (integer_all_onesp (oelast->op))
930 {
931 if (ops->length () != 1)
932 {
933 if (dump_file && (dump_flags & TDF_DETAILS))
934 fprintf (dump_file, "Found & -1, removing\n");
935 ops->pop ();
936 reassociate_stats.ops_eliminated++;
937 }
938 }
939 break;
940 case BIT_IOR_EXPR:
941 if (integer_all_onesp (oelast->op))
942 {
943 if (ops->length () != 1)
944 {
945 if (dump_file && (dump_flags & TDF_DETAILS))
946 fprintf (dump_file, "Found | -1, removing all other ops\n");
947
948 reassociate_stats.ops_eliminated += ops->length () - 1;
949
950 ops->truncate (0);
951 ops->quick_push (oelast);
952 return;
953 }
954 }
955 else if (integer_zerop (oelast->op))
956 {
957 if (ops->length () != 1)
958 {
959 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "Found | 0, removing\n");
961 ops->pop ();
962 reassociate_stats.ops_eliminated++;
963 }
964 }
965 break;
966 case MULT_EXPR:
967 if (integer_zerop (oelast->op)
968 || (FLOAT_TYPE_P (type)
969 && !HONOR_NANS (TYPE_MODE (type))
970 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
971 && real_zerop (oelast->op)))
972 {
973 if (ops->length () != 1)
974 {
975 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Found * 0, removing all other ops\n");
977
978 reassociate_stats.ops_eliminated += ops->length () - 1;
979 ops->truncate (1);
980 ops->quick_push (oelast);
981 return;
982 }
983 }
984 else if (integer_onep (oelast->op)
985 || (FLOAT_TYPE_P (type)
986 && !HONOR_SNANS (TYPE_MODE (type))
987 && real_onep (oelast->op)))
988 {
989 if (ops->length () != 1)
990 {
991 if (dump_file && (dump_flags & TDF_DETAILS))
992 fprintf (dump_file, "Found * 1, removing\n");
993 ops->pop ();
994 reassociate_stats.ops_eliminated++;
995 return;
996 }
997 }
998 break;
999 case BIT_XOR_EXPR:
1000 case PLUS_EXPR:
1001 case MINUS_EXPR:
1002 if (integer_zerop (oelast->op)
1003 || (FLOAT_TYPE_P (type)
1004 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1005 && fold_real_zero_addition_p (type, oelast->op,
1006 opcode == MINUS_EXPR)))
1007 {
1008 if (ops->length () != 1)
1009 {
1010 if (dump_file && (dump_flags & TDF_DETAILS))
1011 fprintf (dump_file, "Found [|^+] 0, removing\n");
1012 ops->pop ();
1013 reassociate_stats.ops_eliminated++;
1014 return;
1015 }
1016 }
1017 break;
1018 default:
1019 break;
1020 }
1021 }
1022 }
1023
1024
1025 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1026 bool, bool);
1027
1028 /* Structure for tracking and counting operands. */
1029 typedef struct oecount_s {
1030 int cnt;
1031 int id;
1032 enum tree_code oecode;
1033 tree op;
1034 } oecount;
1035
1036
1037 /* The heap for the oecount hashtable and the sorted list of operands. */
1038 static vec<oecount> cvec;
1039
1040
1041 /* Oecount hashtable helpers. */
1042
1043 struct oecount_hasher
1044 {
1045 typedef int value_type;
1046 typedef int compare_type;
1047 typedef int store_values_directly;
1048 static inline hashval_t hash (const value_type &);
1049 static inline bool equal (const value_type &, const compare_type &);
1050 static bool is_deleted (int &v) { return v == 1; }
1051 static void mark_deleted (int &e) { e = 1; }
1052 static bool is_empty (int &v) { return v == 0; }
1053 static void mark_empty (int &e) { e = 0; }
1054 static void remove (int &) {}
1055 };
1056
1057 /* Hash function for oecount. */
1058
1059 inline hashval_t
1060 oecount_hasher::hash (const value_type &p)
1061 {
1062 const oecount *c = &cvec[p - 42];
1063 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1064 }
1065
1066 /* Comparison function for oecount. */
1067
1068 inline bool
1069 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1070 {
1071 const oecount *c1 = &cvec[p1 - 42];
1072 const oecount *c2 = &cvec[p2 - 42];
1073 return (c1->oecode == c2->oecode
1074 && c1->op == c2->op);
1075 }
1076
1077 /* Comparison function for qsort sorting oecount elements by count. */
1078
1079 static int
1080 oecount_cmp (const void *p1, const void *p2)
1081 {
1082 const oecount *c1 = (const oecount *)p1;
1083 const oecount *c2 = (const oecount *)p2;
1084 if (c1->cnt != c2->cnt)
1085 return c1->cnt - c2->cnt;
1086 else
1087 /* If counts are identical, use unique IDs to stabilize qsort. */
1088 return c1->id - c2->id;
1089 }
1090
1091 /* Return TRUE iff STMT represents a builtin call that raises OP
1092 to some exponent. */
1093
1094 static bool
1095 stmt_is_power_of_op (gimple stmt, tree op)
1096 {
1097 tree fndecl;
1098
1099 if (!is_gimple_call (stmt))
1100 return false;
1101
1102 fndecl = gimple_call_fndecl (stmt);
1103
1104 if (!fndecl
1105 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1106 return false;
1107
1108 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1109 {
1110 CASE_FLT_FN (BUILT_IN_POW):
1111 CASE_FLT_FN (BUILT_IN_POWI):
1112 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1113
1114 default:
1115 return false;
1116 }
1117 }
1118
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120 in place and return the result. Assumes that stmt_is_power_of_op
1121 was previously called for STMT and returned TRUE. */
1122
1123 static HOST_WIDE_INT
1124 decrement_power (gimple stmt)
1125 {
1126 REAL_VALUE_TYPE c, cint;
1127 HOST_WIDE_INT power;
1128 tree arg1;
1129
1130 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1131 {
1132 CASE_FLT_FN (BUILT_IN_POW):
1133 arg1 = gimple_call_arg (stmt, 1);
1134 c = TREE_REAL_CST (arg1);
1135 power = real_to_integer (&c) - 1;
1136 real_from_integer (&cint, VOIDmode, power, SIGNED);
1137 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1138 return power;
1139
1140 CASE_FLT_FN (BUILT_IN_POWI):
1141 arg1 = gimple_call_arg (stmt, 1);
1142 power = TREE_INT_CST_LOW (arg1) - 1;
1143 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1144 return power;
1145
1146 default:
1147 gcc_unreachable ();
1148 }
1149 }
1150
1151 /* Find the single immediate use of STMT's LHS, and replace it
1152 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1153 replace *DEF with OP as well. */
1154
1155 static void
1156 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1157 {
1158 tree lhs;
1159 gimple use_stmt;
1160 use_operand_p use;
1161 gimple_stmt_iterator gsi;
1162
1163 if (is_gimple_call (stmt))
1164 lhs = gimple_call_lhs (stmt);
1165 else
1166 lhs = gimple_assign_lhs (stmt);
1167
1168 gcc_assert (has_single_use (lhs));
1169 single_imm_use (lhs, &use, &use_stmt);
1170 if (lhs == *def)
1171 *def = op;
1172 SET_USE (use, op);
1173 if (TREE_CODE (op) != SSA_NAME)
1174 update_stmt (use_stmt);
1175 gsi = gsi_for_stmt (stmt);
1176 unlink_stmt_vdef (stmt);
1177 reassoc_remove_stmt (&gsi);
1178 release_defs (stmt);
1179 }
1180
1181 /* Walks the linear chain with result *DEF searching for an operation
1182 with operand OP and code OPCODE removing that from the chain. *DEF
1183 is updated if there is only one operand but no operation left. */
1184
1185 static void
1186 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1187 {
1188 gimple stmt = SSA_NAME_DEF_STMT (*def);
1189
1190 do
1191 {
1192 tree name;
1193
1194 if (opcode == MULT_EXPR
1195 && stmt_is_power_of_op (stmt, op))
1196 {
1197 if (decrement_power (stmt) == 1)
1198 propagate_op_to_single_use (op, stmt, def);
1199 return;
1200 }
1201
1202 name = gimple_assign_rhs1 (stmt);
1203
1204 /* If this is the operation we look for and one of the operands
1205 is ours simply propagate the other operand into the stmts
1206 single use. */
1207 if (gimple_assign_rhs_code (stmt) == opcode
1208 && (name == op
1209 || gimple_assign_rhs2 (stmt) == op))
1210 {
1211 if (name == op)
1212 name = gimple_assign_rhs2 (stmt);
1213 propagate_op_to_single_use (name, stmt, def);
1214 return;
1215 }
1216
1217 /* We might have a multiply of two __builtin_pow* calls, and
1218 the operand might be hiding in the rightmost one. */
1219 if (opcode == MULT_EXPR
1220 && gimple_assign_rhs_code (stmt) == opcode
1221 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1222 && has_single_use (gimple_assign_rhs2 (stmt)))
1223 {
1224 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1225 if (stmt_is_power_of_op (stmt2, op))
1226 {
1227 if (decrement_power (stmt2) == 1)
1228 propagate_op_to_single_use (op, stmt2, def);
1229 return;
1230 }
1231 }
1232
1233 /* Continue walking the chain. */
1234 gcc_assert (name != op
1235 && TREE_CODE (name) == SSA_NAME);
1236 stmt = SSA_NAME_DEF_STMT (name);
1237 }
1238 while (1);
1239 }
1240
1241 /* Returns true if statement S1 dominates statement S2. Like
1242 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1243
1244 static bool
1245 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1246 {
1247 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1248
1249 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1250 SSA_NAME. Assume it lives at the beginning of function and
1251 thus dominates everything. */
1252 if (!bb1 || s1 == s2)
1253 return true;
1254
1255 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1256 if (!bb2)
1257 return false;
1258
1259 if (bb1 == bb2)
1260 {
1261 /* PHIs in the same basic block are assumed to be
1262 executed all in parallel, if only one stmt is a PHI,
1263 it dominates the other stmt in the same basic block. */
1264 if (gimple_code (s1) == GIMPLE_PHI)
1265 return true;
1266
1267 if (gimple_code (s2) == GIMPLE_PHI)
1268 return false;
1269
1270 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1271
1272 if (gimple_uid (s1) < gimple_uid (s2))
1273 return true;
1274
1275 if (gimple_uid (s1) > gimple_uid (s2))
1276 return false;
1277
1278 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1279 unsigned int uid = gimple_uid (s1);
1280 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1281 {
1282 gimple s = gsi_stmt (gsi);
1283 if (gimple_uid (s) != uid)
1284 break;
1285 if (s == s2)
1286 return true;
1287 }
1288
1289 return false;
1290 }
1291
1292 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1293 }
1294
1295 /* Insert STMT after INSERT_POINT. */
1296
1297 static void
1298 insert_stmt_after (gimple stmt, gimple insert_point)
1299 {
1300 gimple_stmt_iterator gsi;
1301 basic_block bb;
1302
1303 if (gimple_code (insert_point) == GIMPLE_PHI)
1304 bb = gimple_bb (insert_point);
1305 else if (!stmt_ends_bb_p (insert_point))
1306 {
1307 gsi = gsi_for_stmt (insert_point);
1308 gimple_set_uid (stmt, gimple_uid (insert_point));
1309 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1310 return;
1311 }
1312 else
1313 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1314 thus if it must end a basic block, it should be a call that can
1315 throw, or some assignment that can throw. If it throws, the LHS
1316 of it will not be initialized though, so only valid places using
1317 the SSA_NAME should be dominated by the fallthru edge. */
1318 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1319 gsi = gsi_after_labels (bb);
1320 if (gsi_end_p (gsi))
1321 {
1322 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1323 gimple_set_uid (stmt,
1324 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1325 }
1326 else
1327 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1328 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1329 }
1330
1331 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1332 the result. Places the statement after the definition of either
1333 OP1 or OP2. Returns the new statement. */
1334
1335 static gimple
1336 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1337 {
1338 gimple op1def = NULL, op2def = NULL;
1339 gimple_stmt_iterator gsi;
1340 tree op;
1341 gimple sum;
1342
1343 /* Create the addition statement. */
1344 op = make_ssa_name (type, NULL);
1345 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1346
1347 /* Find an insertion place and insert. */
1348 if (TREE_CODE (op1) == SSA_NAME)
1349 op1def = SSA_NAME_DEF_STMT (op1);
1350 if (TREE_CODE (op2) == SSA_NAME)
1351 op2def = SSA_NAME_DEF_STMT (op2);
1352 if ((!op1def || gimple_nop_p (op1def))
1353 && (!op2def || gimple_nop_p (op2def)))
1354 {
1355 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1356 if (gsi_end_p (gsi))
1357 {
1358 gimple_stmt_iterator gsi2
1359 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1360 gimple_set_uid (sum,
1361 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1362 }
1363 else
1364 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1365 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1366 }
1367 else
1368 {
1369 gimple insert_point;
1370 if ((!op1def || gimple_nop_p (op1def))
1371 || (op2def && !gimple_nop_p (op2def)
1372 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1373 insert_point = op2def;
1374 else
1375 insert_point = op1def;
1376 insert_stmt_after (sum, insert_point);
1377 }
1378 update_stmt (sum);
1379
1380 return sum;
1381 }
1382
1383 /* Perform un-distribution of divisions and multiplications.
1384 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1385 to (A + B) / X for real X.
1386
1387 The algorithm is organized as follows.
1388
1389 - First we walk the addition chain *OPS looking for summands that
1390 are defined by a multiplication or a real division. This results
1391 in the candidates bitmap with relevant indices into *OPS.
1392
1393 - Second we build the chains of multiplications or divisions for
1394 these candidates, counting the number of occurrences of (operand, code)
1395 pairs in all of the candidates chains.
1396
1397 - Third we sort the (operand, code) pairs by number of occurrence and
1398 process them starting with the pair with the most uses.
1399
1400 * For each such pair we walk the candidates again to build a
1401 second candidate bitmap noting all multiplication/division chains
1402 that have at least one occurrence of (operand, code).
1403
1404 * We build an alternate addition chain only covering these
1405 candidates with one (operand, code) operation removed from their
1406 multiplication/division chain.
1407
1408 * The first candidate gets replaced by the alternate addition chain
1409 multiplied/divided by the operand.
1410
1411 * All candidate chains get disabled for further processing and
1412 processing of (operand, code) pairs continues.
1413
1414 The alternate addition chains built are re-processed by the main
1415 reassociation algorithm which allows optimizing a * x * y + b * y * x
1416 to (a + b ) * x * y in one invocation of the reassociation pass. */
1417
1418 static bool
1419 undistribute_ops_list (enum tree_code opcode,
1420 vec<operand_entry_t> *ops, struct loop *loop)
1421 {
1422 unsigned int length = ops->length ();
1423 operand_entry_t oe1;
1424 unsigned i, j;
1425 sbitmap candidates, candidates2;
1426 unsigned nr_candidates, nr_candidates2;
1427 sbitmap_iterator sbi0;
1428 vec<operand_entry_t> *subops;
1429 bool changed = false;
1430 int next_oecount_id = 0;
1431
1432 if (length <= 1
1433 || opcode != PLUS_EXPR)
1434 return false;
1435
1436 /* Build a list of candidates to process. */
1437 candidates = sbitmap_alloc (length);
1438 bitmap_clear (candidates);
1439 nr_candidates = 0;
1440 FOR_EACH_VEC_ELT (*ops, i, oe1)
1441 {
1442 enum tree_code dcode;
1443 gimple oe1def;
1444
1445 if (TREE_CODE (oe1->op) != SSA_NAME)
1446 continue;
1447 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1448 if (!is_gimple_assign (oe1def))
1449 continue;
1450 dcode = gimple_assign_rhs_code (oe1def);
1451 if ((dcode != MULT_EXPR
1452 && dcode != RDIV_EXPR)
1453 || !is_reassociable_op (oe1def, dcode, loop))
1454 continue;
1455
1456 bitmap_set_bit (candidates, i);
1457 nr_candidates++;
1458 }
1459
1460 if (nr_candidates < 2)
1461 {
1462 sbitmap_free (candidates);
1463 return false;
1464 }
1465
1466 if (dump_file && (dump_flags & TDF_DETAILS))
1467 {
1468 fprintf (dump_file, "searching for un-distribute opportunities ");
1469 print_generic_expr (dump_file,
1470 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1471 fprintf (dump_file, " %d\n", nr_candidates);
1472 }
1473
1474 /* Build linearized sub-operand lists and the counting table. */
1475 cvec.create (0);
1476
1477 hash_table<oecount_hasher> ctable (15);
1478
1479 /* ??? Macro arguments cannot have multi-argument template types in
1480 them. This typedef is needed to workaround that limitation. */
1481 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1482 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1483 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1484 {
1485 gimple oedef;
1486 enum tree_code oecode;
1487 unsigned j;
1488
1489 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1490 oecode = gimple_assign_rhs_code (oedef);
1491 linearize_expr_tree (&subops[i], oedef,
1492 associative_tree_code (oecode), false);
1493
1494 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1495 {
1496 oecount c;
1497 int *slot;
1498 int idx;
1499 c.oecode = oecode;
1500 c.cnt = 1;
1501 c.id = next_oecount_id++;
1502 c.op = oe1->op;
1503 cvec.safe_push (c);
1504 idx = cvec.length () + 41;
1505 slot = ctable.find_slot (idx, INSERT);
1506 if (!*slot)
1507 {
1508 *slot = idx;
1509 }
1510 else
1511 {
1512 cvec.pop ();
1513 cvec[*slot - 42].cnt++;
1514 }
1515 }
1516 }
1517
1518 /* Sort the counting table. */
1519 cvec.qsort (oecount_cmp);
1520
1521 if (dump_file && (dump_flags & TDF_DETAILS))
1522 {
1523 oecount *c;
1524 fprintf (dump_file, "Candidates:\n");
1525 FOR_EACH_VEC_ELT (cvec, j, c)
1526 {
1527 fprintf (dump_file, " %u %s: ", c->cnt,
1528 c->oecode == MULT_EXPR
1529 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1530 print_generic_expr (dump_file, c->op, 0);
1531 fprintf (dump_file, "\n");
1532 }
1533 }
1534
1535 /* Process the (operand, code) pairs in order of most occurrence. */
1536 candidates2 = sbitmap_alloc (length);
1537 while (!cvec.is_empty ())
1538 {
1539 oecount *c = &cvec.last ();
1540 if (c->cnt < 2)
1541 break;
1542
1543 /* Now collect the operands in the outer chain that contain
1544 the common operand in their inner chain. */
1545 bitmap_clear (candidates2);
1546 nr_candidates2 = 0;
1547 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1548 {
1549 gimple oedef;
1550 enum tree_code oecode;
1551 unsigned j;
1552 tree op = (*ops)[i]->op;
1553
1554 /* If we undistributed in this chain already this may be
1555 a constant. */
1556 if (TREE_CODE (op) != SSA_NAME)
1557 continue;
1558
1559 oedef = SSA_NAME_DEF_STMT (op);
1560 oecode = gimple_assign_rhs_code (oedef);
1561 if (oecode != c->oecode)
1562 continue;
1563
1564 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1565 {
1566 if (oe1->op == c->op)
1567 {
1568 bitmap_set_bit (candidates2, i);
1569 ++nr_candidates2;
1570 break;
1571 }
1572 }
1573 }
1574
1575 if (nr_candidates2 >= 2)
1576 {
1577 operand_entry_t oe1, oe2;
1578 gimple prod;
1579 int first = bitmap_first_set_bit (candidates2);
1580
1581 /* Build the new addition chain. */
1582 oe1 = (*ops)[first];
1583 if (dump_file && (dump_flags & TDF_DETAILS))
1584 {
1585 fprintf (dump_file, "Building (");
1586 print_generic_expr (dump_file, oe1->op, 0);
1587 }
1588 zero_one_operation (&oe1->op, c->oecode, c->op);
1589 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1590 {
1591 gimple sum;
1592 oe2 = (*ops)[i];
1593 if (dump_file && (dump_flags & TDF_DETAILS))
1594 {
1595 fprintf (dump_file, " + ");
1596 print_generic_expr (dump_file, oe2->op, 0);
1597 }
1598 zero_one_operation (&oe2->op, c->oecode, c->op);
1599 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1600 oe1->op, oe2->op, opcode);
1601 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1602 oe2->rank = 0;
1603 oe1->op = gimple_get_lhs (sum);
1604 }
1605
1606 /* Apply the multiplication/division. */
1607 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1608 oe1->op, c->op, c->oecode);
1609 if (dump_file && (dump_flags & TDF_DETAILS))
1610 {
1611 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1612 print_generic_expr (dump_file, c->op, 0);
1613 fprintf (dump_file, "\n");
1614 }
1615
1616 /* Record it in the addition chain and disable further
1617 undistribution with this op. */
1618 oe1->op = gimple_assign_lhs (prod);
1619 oe1->rank = get_rank (oe1->op);
1620 subops[first].release ();
1621
1622 changed = true;
1623 }
1624
1625 cvec.pop ();
1626 }
1627
1628 for (i = 0; i < ops->length (); ++i)
1629 subops[i].release ();
1630 free (subops);
1631 cvec.release ();
1632 sbitmap_free (candidates);
1633 sbitmap_free (candidates2);
1634
1635 return changed;
1636 }
1637
1638 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1639 expression, examine the other OPS to see if any of them are comparisons
1640 of the same values, which we may be able to combine or eliminate.
1641 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1642
1643 static bool
1644 eliminate_redundant_comparison (enum tree_code opcode,
1645 vec<operand_entry_t> *ops,
1646 unsigned int currindex,
1647 operand_entry_t curr)
1648 {
1649 tree op1, op2;
1650 enum tree_code lcode, rcode;
1651 gimple def1, def2;
1652 int i;
1653 operand_entry_t oe;
1654
1655 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1656 return false;
1657
1658 /* Check that CURR is a comparison. */
1659 if (TREE_CODE (curr->op) != SSA_NAME)
1660 return false;
1661 def1 = SSA_NAME_DEF_STMT (curr->op);
1662 if (!is_gimple_assign (def1))
1663 return false;
1664 lcode = gimple_assign_rhs_code (def1);
1665 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1666 return false;
1667 op1 = gimple_assign_rhs1 (def1);
1668 op2 = gimple_assign_rhs2 (def1);
1669
1670 /* Now look for a similar comparison in the remaining OPS. */
1671 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1672 {
1673 tree t;
1674
1675 if (TREE_CODE (oe->op) != SSA_NAME)
1676 continue;
1677 def2 = SSA_NAME_DEF_STMT (oe->op);
1678 if (!is_gimple_assign (def2))
1679 continue;
1680 rcode = gimple_assign_rhs_code (def2);
1681 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1682 continue;
1683
1684 /* If we got here, we have a match. See if we can combine the
1685 two comparisons. */
1686 if (opcode == BIT_IOR_EXPR)
1687 t = maybe_fold_or_comparisons (lcode, op1, op2,
1688 rcode, gimple_assign_rhs1 (def2),
1689 gimple_assign_rhs2 (def2));
1690 else
1691 t = maybe_fold_and_comparisons (lcode, op1, op2,
1692 rcode, gimple_assign_rhs1 (def2),
1693 gimple_assign_rhs2 (def2));
1694 if (!t)
1695 continue;
1696
1697 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1698 always give us a boolean_type_node value back. If the original
1699 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1700 we need to convert. */
1701 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1702 t = fold_convert (TREE_TYPE (curr->op), t);
1703
1704 if (TREE_CODE (t) != INTEGER_CST
1705 && !operand_equal_p (t, curr->op, 0))
1706 {
1707 enum tree_code subcode;
1708 tree newop1, newop2;
1709 if (!COMPARISON_CLASS_P (t))
1710 continue;
1711 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1712 STRIP_USELESS_TYPE_CONVERSION (newop1);
1713 STRIP_USELESS_TYPE_CONVERSION (newop2);
1714 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1715 continue;
1716 }
1717
1718 if (dump_file && (dump_flags & TDF_DETAILS))
1719 {
1720 fprintf (dump_file, "Equivalence: ");
1721 print_generic_expr (dump_file, curr->op, 0);
1722 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1723 print_generic_expr (dump_file, oe->op, 0);
1724 fprintf (dump_file, " -> ");
1725 print_generic_expr (dump_file, t, 0);
1726 fprintf (dump_file, "\n");
1727 }
1728
1729 /* Now we can delete oe, as it has been subsumed by the new combined
1730 expression t. */
1731 ops->ordered_remove (i);
1732 reassociate_stats.ops_eliminated ++;
1733
1734 /* If t is the same as curr->op, we're done. Otherwise we must
1735 replace curr->op with t. Special case is if we got a constant
1736 back, in which case we add it to the end instead of in place of
1737 the current entry. */
1738 if (TREE_CODE (t) == INTEGER_CST)
1739 {
1740 ops->ordered_remove (currindex);
1741 add_to_ops_vec (ops, t);
1742 }
1743 else if (!operand_equal_p (t, curr->op, 0))
1744 {
1745 gimple sum;
1746 enum tree_code subcode;
1747 tree newop1;
1748 tree newop2;
1749 gcc_assert (COMPARISON_CLASS_P (t));
1750 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1751 STRIP_USELESS_TYPE_CONVERSION (newop1);
1752 STRIP_USELESS_TYPE_CONVERSION (newop2);
1753 gcc_checking_assert (is_gimple_val (newop1)
1754 && is_gimple_val (newop2));
1755 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1756 curr->op = gimple_get_lhs (sum);
1757 }
1758 return true;
1759 }
1760
1761 return false;
1762 }
1763
1764 /* Perform various identities and other optimizations on the list of
1765 operand entries, stored in OPS. The tree code for the binary
1766 operation between all the operands is OPCODE. */
1767
1768 static void
1769 optimize_ops_list (enum tree_code opcode,
1770 vec<operand_entry_t> *ops)
1771 {
1772 unsigned int length = ops->length ();
1773 unsigned int i;
1774 operand_entry_t oe;
1775 operand_entry_t oelast = NULL;
1776 bool iterate = false;
1777
1778 if (length == 1)
1779 return;
1780
1781 oelast = ops->last ();
1782
1783 /* If the last two are constants, pop the constants off, merge them
1784 and try the next two. */
1785 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1786 {
1787 operand_entry_t oelm1 = (*ops)[length - 2];
1788
1789 if (oelm1->rank == 0
1790 && is_gimple_min_invariant (oelm1->op)
1791 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1792 TREE_TYPE (oelast->op)))
1793 {
1794 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1795 oelm1->op, oelast->op);
1796
1797 if (folded && is_gimple_min_invariant (folded))
1798 {
1799 if (dump_file && (dump_flags & TDF_DETAILS))
1800 fprintf (dump_file, "Merging constants\n");
1801
1802 ops->pop ();
1803 ops->pop ();
1804
1805 add_to_ops_vec (ops, folded);
1806 reassociate_stats.constants_eliminated++;
1807
1808 optimize_ops_list (opcode, ops);
1809 return;
1810 }
1811 }
1812 }
1813
1814 eliminate_using_constants (opcode, ops);
1815 oelast = NULL;
1816
1817 for (i = 0; ops->iterate (i, &oe);)
1818 {
1819 bool done = false;
1820
1821 if (eliminate_not_pairs (opcode, ops, i, oe))
1822 return;
1823 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1824 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1825 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1826 {
1827 if (done)
1828 return;
1829 iterate = true;
1830 oelast = NULL;
1831 continue;
1832 }
1833 oelast = oe;
1834 i++;
1835 }
1836
1837 length = ops->length ();
1838 oelast = ops->last ();
1839
1840 if (iterate)
1841 optimize_ops_list (opcode, ops);
1842 }
1843
1844 /* The following functions are subroutines to optimize_range_tests and allow
1845 it to try to change a logical combination of comparisons into a range
1846 test.
1847
1848 For example, both
1849 X == 2 || X == 5 || X == 3 || X == 4
1850 and
1851 X >= 2 && X <= 5
1852 are converted to
1853 (unsigned) (X - 2) <= 3
1854
1855 For more information see comments above fold_test_range in fold-const.c,
1856 this implementation is for GIMPLE. */
1857
1858 struct range_entry
1859 {
1860 tree exp;
1861 tree low;
1862 tree high;
1863 bool in_p;
1864 bool strict_overflow_p;
1865 unsigned int idx, next;
1866 };
1867
1868 /* This is similar to make_range in fold-const.c, but on top of
1869 GIMPLE instead of trees. If EXP is non-NULL, it should be
1870 an SSA_NAME and STMT argument is ignored, otherwise STMT
1871 argument should be a GIMPLE_COND. */
1872
1873 static void
1874 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1875 {
1876 int in_p;
1877 tree low, high;
1878 bool is_bool, strict_overflow_p;
1879
1880 r->exp = NULL_TREE;
1881 r->in_p = false;
1882 r->strict_overflow_p = false;
1883 r->low = NULL_TREE;
1884 r->high = NULL_TREE;
1885 if (exp != NULL_TREE
1886 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1887 return;
1888
1889 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1890 and see if we can refine the range. Some of the cases below may not
1891 happen, but it doesn't seem worth worrying about this. We "continue"
1892 the outer loop when we've changed something; otherwise we "break"
1893 the switch, which will "break" the while. */
1894 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1895 high = low;
1896 in_p = 0;
1897 strict_overflow_p = false;
1898 is_bool = false;
1899 if (exp == NULL_TREE)
1900 is_bool = true;
1901 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1902 {
1903 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1904 is_bool = true;
1905 else
1906 return;
1907 }
1908 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1909 is_bool = true;
1910
1911 while (1)
1912 {
1913 enum tree_code code;
1914 tree arg0, arg1, exp_type;
1915 tree nexp;
1916 location_t loc;
1917
1918 if (exp != NULL_TREE)
1919 {
1920 if (TREE_CODE (exp) != SSA_NAME
1921 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1922 break;
1923
1924 stmt = SSA_NAME_DEF_STMT (exp);
1925 if (!is_gimple_assign (stmt))
1926 break;
1927
1928 code = gimple_assign_rhs_code (stmt);
1929 arg0 = gimple_assign_rhs1 (stmt);
1930 arg1 = gimple_assign_rhs2 (stmt);
1931 exp_type = TREE_TYPE (exp);
1932 }
1933 else
1934 {
1935 code = gimple_cond_code (stmt);
1936 arg0 = gimple_cond_lhs (stmt);
1937 arg1 = gimple_cond_rhs (stmt);
1938 exp_type = boolean_type_node;
1939 }
1940
1941 if (TREE_CODE (arg0) != SSA_NAME)
1942 break;
1943 loc = gimple_location (stmt);
1944 switch (code)
1945 {
1946 case BIT_NOT_EXPR:
1947 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1948 /* Ensure the range is either +[-,0], +[0,0],
1949 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1950 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1951 or similar expression of unconditional true or
1952 false, it should not be negated. */
1953 && ((high && integer_zerop (high))
1954 || (low && integer_onep (low))))
1955 {
1956 in_p = !in_p;
1957 exp = arg0;
1958 continue;
1959 }
1960 break;
1961 case SSA_NAME:
1962 exp = arg0;
1963 continue;
1964 CASE_CONVERT:
1965 if (is_bool)
1966 goto do_default;
1967 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1968 {
1969 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1970 is_bool = true;
1971 else
1972 return;
1973 }
1974 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1975 is_bool = true;
1976 goto do_default;
1977 case EQ_EXPR:
1978 case NE_EXPR:
1979 case LT_EXPR:
1980 case LE_EXPR:
1981 case GE_EXPR:
1982 case GT_EXPR:
1983 is_bool = true;
1984 /* FALLTHRU */
1985 default:
1986 if (!is_bool)
1987 return;
1988 do_default:
1989 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1990 &low, &high, &in_p,
1991 &strict_overflow_p);
1992 if (nexp != NULL_TREE)
1993 {
1994 exp = nexp;
1995 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1996 continue;
1997 }
1998 break;
1999 }
2000 break;
2001 }
2002 if (is_bool)
2003 {
2004 r->exp = exp;
2005 r->in_p = in_p;
2006 r->low = low;
2007 r->high = high;
2008 r->strict_overflow_p = strict_overflow_p;
2009 }
2010 }
2011
2012 /* Comparison function for qsort. Sort entries
2013 without SSA_NAME exp first, then with SSA_NAMEs sorted
2014 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2015 by increasing ->low and if ->low is the same, by increasing
2016 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2017 maximum. */
2018
2019 static int
2020 range_entry_cmp (const void *a, const void *b)
2021 {
2022 const struct range_entry *p = (const struct range_entry *) a;
2023 const struct range_entry *q = (const struct range_entry *) b;
2024
2025 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2026 {
2027 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2028 {
2029 /* Group range_entries for the same SSA_NAME together. */
2030 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2031 return -1;
2032 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2033 return 1;
2034 /* If ->low is different, NULL low goes first, then by
2035 ascending low. */
2036 if (p->low != NULL_TREE)
2037 {
2038 if (q->low != NULL_TREE)
2039 {
2040 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2041 p->low, q->low);
2042 if (tem && integer_onep (tem))
2043 return -1;
2044 tem = fold_binary (GT_EXPR, boolean_type_node,
2045 p->low, q->low);
2046 if (tem && integer_onep (tem))
2047 return 1;
2048 }
2049 else
2050 return 1;
2051 }
2052 else if (q->low != NULL_TREE)
2053 return -1;
2054 /* If ->high is different, NULL high goes last, before that by
2055 ascending high. */
2056 if (p->high != NULL_TREE)
2057 {
2058 if (q->high != NULL_TREE)
2059 {
2060 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2061 p->high, q->high);
2062 if (tem && integer_onep (tem))
2063 return -1;
2064 tem = fold_binary (GT_EXPR, boolean_type_node,
2065 p->high, q->high);
2066 if (tem && integer_onep (tem))
2067 return 1;
2068 }
2069 else
2070 return -1;
2071 }
2072 else if (p->high != NULL_TREE)
2073 return 1;
2074 /* If both ranges are the same, sort below by ascending idx. */
2075 }
2076 else
2077 return 1;
2078 }
2079 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2080 return -1;
2081
2082 if (p->idx < q->idx)
2083 return -1;
2084 else
2085 {
2086 gcc_checking_assert (p->idx > q->idx);
2087 return 1;
2088 }
2089 }
2090
2091 /* Helper routine of optimize_range_test.
2092 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2093 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2094 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2095 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2096 an array of COUNT pointers to other ranges. Return
2097 true if the range merge has been successful.
2098 If OPCODE is ERROR_MARK, this is called from within
2099 maybe_optimize_range_tests and is performing inter-bb range optimization.
2100 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2101 oe->rank. */
2102
2103 static bool
2104 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2105 struct range_entry **otherrangep,
2106 unsigned int count, enum tree_code opcode,
2107 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2108 bool in_p, tree low, tree high, bool strict_overflow_p)
2109 {
2110 operand_entry_t oe = (*ops)[range->idx];
2111 tree op = oe->op;
2112 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2113 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2114 location_t loc = gimple_location (stmt);
2115 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2116 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2117 in_p, low, high);
2118 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2119 gimple_stmt_iterator gsi;
2120 unsigned int i;
2121
2122 if (tem == NULL_TREE)
2123 return false;
2124
2125 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2126 warning_at (loc, OPT_Wstrict_overflow,
2127 "assuming signed overflow does not occur "
2128 "when simplifying range test");
2129
2130 if (dump_file && (dump_flags & TDF_DETAILS))
2131 {
2132 struct range_entry *r;
2133 fprintf (dump_file, "Optimizing range tests ");
2134 print_generic_expr (dump_file, range->exp, 0);
2135 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2136 print_generic_expr (dump_file, range->low, 0);
2137 fprintf (dump_file, ", ");
2138 print_generic_expr (dump_file, range->high, 0);
2139 fprintf (dump_file, "]");
2140 for (i = 0; i < count; i++)
2141 {
2142 if (otherrange)
2143 r = otherrange + i;
2144 else
2145 r = otherrangep[i];
2146 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2147 print_generic_expr (dump_file, r->low, 0);
2148 fprintf (dump_file, ", ");
2149 print_generic_expr (dump_file, r->high, 0);
2150 fprintf (dump_file, "]");
2151 }
2152 fprintf (dump_file, "\n into ");
2153 print_generic_expr (dump_file, tem, 0);
2154 fprintf (dump_file, "\n");
2155 }
2156
2157 if (opcode == BIT_IOR_EXPR
2158 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2159 tem = invert_truthvalue_loc (loc, tem);
2160
2161 tem = fold_convert_loc (loc, optype, tem);
2162 gsi = gsi_for_stmt (stmt);
2163 /* In rare cases range->exp can be equal to lhs of stmt.
2164 In that case we have to insert after the stmt rather then before
2165 it. */
2166 if (op == range->exp)
2167 {
2168 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2169 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2170 GSI_CONTINUE_LINKING);
2171 }
2172 else
2173 {
2174 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2175 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2176 GSI_SAME_STMT);
2177 gsi_prev (&gsi);
2178 }
2179 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2180 if (gimple_uid (gsi_stmt (gsi)))
2181 break;
2182 else
2183 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2184
2185 oe->op = tem;
2186 range->exp = exp;
2187 range->low = low;
2188 range->high = high;
2189 range->in_p = in_p;
2190 range->strict_overflow_p = false;
2191
2192 for (i = 0; i < count; i++)
2193 {
2194 if (otherrange)
2195 range = otherrange + i;
2196 else
2197 range = otherrangep[i];
2198 oe = (*ops)[range->idx];
2199 /* Now change all the other range test immediate uses, so that
2200 those tests will be optimized away. */
2201 if (opcode == ERROR_MARK)
2202 {
2203 if (oe->op)
2204 oe->op = build_int_cst (TREE_TYPE (oe->op),
2205 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2206 else
2207 oe->op = (oe->rank == BIT_IOR_EXPR
2208 ? boolean_false_node : boolean_true_node);
2209 }
2210 else
2211 oe->op = error_mark_node;
2212 range->exp = NULL_TREE;
2213 }
2214 return true;
2215 }
2216
2217 /* Optimize X == CST1 || X == CST2
2218 if popcount (CST1 ^ CST2) == 1 into
2219 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2220 Similarly for ranges. E.g.
2221 X != 2 && X != 3 && X != 10 && X != 11
2222 will be transformed by the previous optimization into
2223 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2224 and this loop can transform that into
2225 !(((X & ~8) - 2U) <= 1U). */
2226
2227 static bool
2228 optimize_range_tests_xor (enum tree_code opcode, tree type,
2229 tree lowi, tree lowj, tree highi, tree highj,
2230 vec<operand_entry_t> *ops,
2231 struct range_entry *rangei,
2232 struct range_entry *rangej)
2233 {
2234 tree lowxor, highxor, tem, exp;
2235 /* Check lowi ^ lowj == highi ^ highj and
2236 popcount (lowi ^ lowj) == 1. */
2237 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2238 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2239 return false;
2240 if (!integer_pow2p (lowxor))
2241 return false;
2242 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2243 if (!tree_int_cst_equal (lowxor, highxor))
2244 return false;
2245
2246 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2247 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2248 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2249 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2250 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2251 NULL, rangei->in_p, lowj, highj,
2252 rangei->strict_overflow_p
2253 || rangej->strict_overflow_p))
2254 return true;
2255 return false;
2256 }
2257
2258 /* Optimize X == CST1 || X == CST2
2259 if popcount (CST2 - CST1) == 1 into
2260 ((X - CST1) & ~(CST2 - CST1)) == 0.
2261 Similarly for ranges. E.g.
2262 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2263 || X == 75 || X == 45
2264 will be transformed by the previous optimization into
2265 (X - 43U) <= 3U || (X - 75U) <= 3U
2266 and this loop can transform that into
2267 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2268 static bool
2269 optimize_range_tests_diff (enum tree_code opcode, tree type,
2270 tree lowi, tree lowj, tree highi, tree highj,
2271 vec<operand_entry_t> *ops,
2272 struct range_entry *rangei,
2273 struct range_entry *rangej)
2274 {
2275 tree tem1, tem2, mask;
2276 /* Check highi - lowi == highj - lowj. */
2277 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2278 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2279 return false;
2280 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2281 if (!tree_int_cst_equal (tem1, tem2))
2282 return false;
2283 /* Check popcount (lowj - lowi) == 1. */
2284 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2285 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2286 return false;
2287 if (!integer_pow2p (tem1))
2288 return false;
2289
2290 type = unsigned_type_for (type);
2291 tem1 = fold_convert (type, tem1);
2292 tem2 = fold_convert (type, tem2);
2293 lowi = fold_convert (type, lowi);
2294 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2295 tem1 = fold_binary (MINUS_EXPR, type,
2296 fold_convert (type, rangei->exp), lowi);
2297 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2298 lowj = build_int_cst (type, 0);
2299 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2300 NULL, rangei->in_p, lowj, tem2,
2301 rangei->strict_overflow_p
2302 || rangej->strict_overflow_p))
2303 return true;
2304 return false;
2305 }
2306
2307 /* It does some common checks for function optimize_range_tests_xor and
2308 optimize_range_tests_diff.
2309 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2310 Else it calls optimize_range_tests_diff. */
2311
2312 static bool
2313 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2314 bool optimize_xor, vec<operand_entry_t> *ops,
2315 struct range_entry *ranges)
2316 {
2317 int i, j;
2318 bool any_changes = false;
2319 for (i = first; i < length; i++)
2320 {
2321 tree lowi, highi, lowj, highj, type, tem;
2322
2323 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2324 continue;
2325 type = TREE_TYPE (ranges[i].exp);
2326 if (!INTEGRAL_TYPE_P (type))
2327 continue;
2328 lowi = ranges[i].low;
2329 if (lowi == NULL_TREE)
2330 lowi = TYPE_MIN_VALUE (type);
2331 highi = ranges[i].high;
2332 if (highi == NULL_TREE)
2333 continue;
2334 for (j = i + 1; j < length && j < i + 64; j++)
2335 {
2336 bool changes;
2337 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2338 continue;
2339 lowj = ranges[j].low;
2340 if (lowj == NULL_TREE)
2341 continue;
2342 highj = ranges[j].high;
2343 if (highj == NULL_TREE)
2344 highj = TYPE_MAX_VALUE (type);
2345 /* Check lowj > highi. */
2346 tem = fold_binary (GT_EXPR, boolean_type_node,
2347 lowj, highi);
2348 if (tem == NULL_TREE || !integer_onep (tem))
2349 continue;
2350 if (optimize_xor)
2351 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2352 highi, highj, ops,
2353 ranges + i, ranges + j);
2354 else
2355 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2356 highi, highj, ops,
2357 ranges + i, ranges + j);
2358 if (changes)
2359 {
2360 any_changes = true;
2361 break;
2362 }
2363 }
2364 }
2365 return any_changes;
2366 }
2367
2368 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2369 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2370 EXP on success, NULL otherwise. */
2371
2372 static tree
2373 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2374 wide_int *mask, tree *totallowp)
2375 {
2376 tree tem = int_const_binop (MINUS_EXPR, high, low);
2377 if (tem == NULL_TREE
2378 || TREE_CODE (tem) != INTEGER_CST
2379 || TREE_OVERFLOW (tem)
2380 || tree_int_cst_sgn (tem) == -1
2381 || compare_tree_int (tem, prec) != -1)
2382 return NULL_TREE;
2383
2384 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2385 *mask = wi::shifted_mask (0, max, false, prec);
2386 if (TREE_CODE (exp) == BIT_AND_EXPR
2387 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2388 {
2389 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2390 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2391 if (wi::popcount (msk) == 1
2392 && wi::ltu_p (msk, prec - max))
2393 {
2394 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2395 max += msk.to_uhwi ();
2396 exp = TREE_OPERAND (exp, 0);
2397 if (integer_zerop (low)
2398 && TREE_CODE (exp) == PLUS_EXPR
2399 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2400 {
2401 widest_int bias
2402 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2403 TYPE_PRECISION (TREE_TYPE (low))));
2404 tree tbias = wide_int_to_tree (TREE_TYPE (low), bias);
2405 if (totallowp)
2406 {
2407 *totallowp = tbias;
2408 exp = TREE_OPERAND (exp, 0);
2409 STRIP_NOPS (exp);
2410 return exp;
2411 }
2412 else if (!tree_int_cst_lt (totallow, tbias))
2413 return NULL_TREE;
2414 bias -= wi::to_widest (totallow);
2415 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2416 {
2417 *mask = wi::lshift (*mask, bias);
2418 exp = TREE_OPERAND (exp, 0);
2419 STRIP_NOPS (exp);
2420 return exp;
2421 }
2422 }
2423 }
2424 }
2425 if (totallowp)
2426 return exp;
2427 if (!tree_int_cst_lt (totallow, low))
2428 return exp;
2429 tem = int_const_binop (MINUS_EXPR, low, totallow);
2430 if (tem == NULL_TREE
2431 || TREE_CODE (tem) != INTEGER_CST
2432 || TREE_OVERFLOW (tem)
2433 || compare_tree_int (tem, prec - max) == 1)
2434 return NULL_TREE;
2435
2436 *mask = wi::lshift (*mask, wi::to_widest (tem));
2437 return exp;
2438 }
2439
2440 /* Attempt to optimize small range tests using bit test.
2441 E.g.
2442 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2443 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2444 has been by earlier optimizations optimized into:
2445 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2446 As all the 43 through 82 range is less than 64 numbers,
2447 for 64-bit word targets optimize that into:
2448 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2449
2450 static bool
2451 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2452 vec<operand_entry_t> *ops,
2453 struct range_entry *ranges)
2454 {
2455 int i, j;
2456 bool any_changes = false;
2457 int prec = GET_MODE_BITSIZE (word_mode);
2458 auto_vec<struct range_entry *, 64> candidates;
2459
2460 for (i = first; i < length - 2; i++)
2461 {
2462 tree lowi, highi, lowj, highj, type;
2463
2464 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2465 continue;
2466 type = TREE_TYPE (ranges[i].exp);
2467 if (!INTEGRAL_TYPE_P (type))
2468 continue;
2469 lowi = ranges[i].low;
2470 if (lowi == NULL_TREE)
2471 lowi = TYPE_MIN_VALUE (type);
2472 highi = ranges[i].high;
2473 if (highi == NULL_TREE)
2474 continue;
2475 wide_int mask;
2476 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2477 highi, &mask, &lowi);
2478 if (exp == NULL_TREE)
2479 continue;
2480 bool strict_overflow_p = ranges[i].strict_overflow_p;
2481 candidates.truncate (0);
2482 int end = MIN (i + 64, length);
2483 for (j = i + 1; j < end; j++)
2484 {
2485 tree exp2;
2486 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2487 continue;
2488 if (ranges[j].exp == exp)
2489 ;
2490 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2491 {
2492 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2493 if (exp2 == exp)
2494 ;
2495 else if (TREE_CODE (exp2) == PLUS_EXPR)
2496 {
2497 exp2 = TREE_OPERAND (exp2, 0);
2498 STRIP_NOPS (exp2);
2499 if (exp2 != exp)
2500 continue;
2501 }
2502 else
2503 continue;
2504 }
2505 else
2506 continue;
2507 lowj = ranges[j].low;
2508 if (lowj == NULL_TREE)
2509 continue;
2510 highj = ranges[j].high;
2511 if (highj == NULL_TREE)
2512 highj = TYPE_MAX_VALUE (type);
2513 wide_int mask2;
2514 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2515 highj, &mask2, NULL);
2516 if (exp2 != exp)
2517 continue;
2518 mask |= mask2;
2519 strict_overflow_p |= ranges[j].strict_overflow_p;
2520 candidates.safe_push (&ranges[j]);
2521 }
2522
2523 /* If we need otherwise 3 or more comparisons, use a bit test. */
2524 if (candidates.length () >= 2)
2525 {
2526 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2527 wi::to_widest (lowi)
2528 + prec - 1 - wi::clz (mask));
2529 operand_entry_t oe = (*ops)[ranges[i].idx];
2530 tree op = oe->op;
2531 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2532 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2533 location_t loc = gimple_location (stmt);
2534 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2535
2536 /* See if it isn't cheaper to pretend the minimum value of the
2537 range is 0, if maximum value is small enough.
2538 We can avoid then subtraction of the minimum value, but the
2539 mask constant could be perhaps more expensive. */
2540 if (compare_tree_int (lowi, 0) > 0
2541 && compare_tree_int (high, prec) < 0)
2542 {
2543 int cost_diff;
2544 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2545 rtx reg = gen_raw_REG (word_mode, 10000);
2546 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2547 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2548 GEN_INT (-m)), speed_p);
2549 rtx r = immed_wide_int_const (mask, word_mode);
2550 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2551 speed_p);
2552 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2553 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2554 speed_p);
2555 if (cost_diff > 0)
2556 {
2557 mask = wi::lshift (mask, m);
2558 lowi = build_zero_cst (TREE_TYPE (lowi));
2559 }
2560 }
2561
2562 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2563 false, lowi, high);
2564 if (tem == NULL_TREE || is_gimple_val (tem))
2565 continue;
2566 tree etype = unsigned_type_for (TREE_TYPE (exp));
2567 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2568 fold_convert_loc (loc, etype, exp),
2569 fold_convert_loc (loc, etype, lowi));
2570 exp = fold_convert_loc (loc, integer_type_node, exp);
2571 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2572 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2573 build_int_cst (word_type, 1), exp);
2574 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2575 wide_int_to_tree (word_type, mask));
2576 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2577 build_zero_cst (word_type));
2578 if (is_gimple_val (exp))
2579 continue;
2580
2581 /* The shift might have undefined behavior if TEM is true,
2582 but reassociate_bb isn't prepared to have basic blocks
2583 split when it is running. So, temporarily emit a code
2584 with BIT_IOR_EXPR instead of &&, and fix it up in
2585 branch_fixup. */
2586 gimple_seq seq;
2587 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2588 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2589 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2590 gimple_seq seq2;
2591 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2592 gimple_seq_add_seq_without_update (&seq, seq2);
2593 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2594 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2595 gimple g
2596 = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2597 make_ssa_name (optype, NULL),
2598 tem, exp);
2599 gimple_set_location (g, loc);
2600 gimple_seq_add_stmt_without_update (&seq, g);
2601 exp = gimple_assign_lhs (g);
2602 tree val = build_zero_cst (optype);
2603 if (update_range_test (&ranges[i], NULL, candidates.address (),
2604 candidates.length (), opcode, ops, exp,
2605 seq, false, val, val, strict_overflow_p))
2606 {
2607 any_changes = true;
2608 reassoc_branch_fixups.safe_push (tem);
2609 }
2610 else
2611 gimple_seq_discard (seq);
2612 }
2613 }
2614 return any_changes;
2615 }
2616
2617 /* Optimize range tests, similarly how fold_range_test optimizes
2618 it on trees. The tree code for the binary
2619 operation between all the operands is OPCODE.
2620 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2621 maybe_optimize_range_tests for inter-bb range optimization.
2622 In that case if oe->op is NULL, oe->id is bb->index whose
2623 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2624 the actual opcode. */
2625
2626 static bool
2627 optimize_range_tests (enum tree_code opcode,
2628 vec<operand_entry_t> *ops)
2629 {
2630 unsigned int length = ops->length (), i, j, first;
2631 operand_entry_t oe;
2632 struct range_entry *ranges;
2633 bool any_changes = false;
2634
2635 if (length == 1)
2636 return false;
2637
2638 ranges = XNEWVEC (struct range_entry, length);
2639 for (i = 0; i < length; i++)
2640 {
2641 oe = (*ops)[i];
2642 ranges[i].idx = i;
2643 init_range_entry (ranges + i, oe->op,
2644 oe->op ? NULL :
2645 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2646 /* For | invert it now, we will invert it again before emitting
2647 the optimized expression. */
2648 if (opcode == BIT_IOR_EXPR
2649 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2650 ranges[i].in_p = !ranges[i].in_p;
2651 }
2652
2653 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2654 for (i = 0; i < length; i++)
2655 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2656 break;
2657
2658 /* Try to merge ranges. */
2659 for (first = i; i < length; i++)
2660 {
2661 tree low = ranges[i].low;
2662 tree high = ranges[i].high;
2663 int in_p = ranges[i].in_p;
2664 bool strict_overflow_p = ranges[i].strict_overflow_p;
2665 int update_fail_count = 0;
2666
2667 for (j = i + 1; j < length; j++)
2668 {
2669 if (ranges[i].exp != ranges[j].exp)
2670 break;
2671 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2672 ranges[j].in_p, ranges[j].low, ranges[j].high))
2673 break;
2674 strict_overflow_p |= ranges[j].strict_overflow_p;
2675 }
2676
2677 if (j == i + 1)
2678 continue;
2679
2680 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2681 opcode, ops, ranges[i].exp, NULL, in_p,
2682 low, high, strict_overflow_p))
2683 {
2684 i = j - 1;
2685 any_changes = true;
2686 }
2687 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2688 while update_range_test would fail. */
2689 else if (update_fail_count == 64)
2690 i = j - 1;
2691 else
2692 ++update_fail_count;
2693 }
2694
2695 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2696 ops, ranges);
2697
2698 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2699 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2700 ops, ranges);
2701 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2702 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2703 ops, ranges);
2704
2705 if (any_changes && opcode != ERROR_MARK)
2706 {
2707 j = 0;
2708 FOR_EACH_VEC_ELT (*ops, i, oe)
2709 {
2710 if (oe->op == error_mark_node)
2711 continue;
2712 else if (i != j)
2713 (*ops)[j] = oe;
2714 j++;
2715 }
2716 ops->truncate (j);
2717 }
2718
2719 XDELETEVEC (ranges);
2720 return any_changes;
2721 }
2722
2723 /* Return true if STMT is a cast like:
2724 <bb N>:
2725 ...
2726 _123 = (int) _234;
2727
2728 <bb M>:
2729 # _345 = PHI <_123(N), 1(...), 1(...)>
2730 where _234 has bool type, _123 has single use and
2731 bb N has a single successor M. This is commonly used in
2732 the last block of a range test. */
2733
2734 static bool
2735 final_range_test_p (gimple stmt)
2736 {
2737 basic_block bb, rhs_bb;
2738 edge e;
2739 tree lhs, rhs;
2740 use_operand_p use_p;
2741 gimple use_stmt;
2742
2743 if (!gimple_assign_cast_p (stmt))
2744 return false;
2745 bb = gimple_bb (stmt);
2746 if (!single_succ_p (bb))
2747 return false;
2748 e = single_succ_edge (bb);
2749 if (e->flags & EDGE_COMPLEX)
2750 return false;
2751
2752 lhs = gimple_assign_lhs (stmt);
2753 rhs = gimple_assign_rhs1 (stmt);
2754 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2755 || TREE_CODE (rhs) != SSA_NAME
2756 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2757 return false;
2758
2759 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2760 if (!single_imm_use (lhs, &use_p, &use_stmt))
2761 return false;
2762
2763 if (gimple_code (use_stmt) != GIMPLE_PHI
2764 || gimple_bb (use_stmt) != e->dest)
2765 return false;
2766
2767 /* And that the rhs is defined in the same loop. */
2768 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2769 if (rhs_bb == NULL
2770 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2771 return false;
2772
2773 return true;
2774 }
2775
2776 /* Return true if BB is suitable basic block for inter-bb range test
2777 optimization. If BACKWARD is true, BB should be the only predecessor
2778 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2779 or compared with to find a common basic block to which all conditions
2780 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2781 be the only predecessor of BB. */
2782
2783 static bool
2784 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2785 bool backward)
2786 {
2787 edge_iterator ei, ei2;
2788 edge e, e2;
2789 gimple stmt;
2790 gimple_stmt_iterator gsi;
2791 bool other_edge_seen = false;
2792 bool is_cond;
2793
2794 if (test_bb == bb)
2795 return false;
2796 /* Check last stmt first. */
2797 stmt = last_stmt (bb);
2798 if (stmt == NULL
2799 || (gimple_code (stmt) != GIMPLE_COND
2800 && (backward || !final_range_test_p (stmt)))
2801 || gimple_visited_p (stmt)
2802 || stmt_could_throw_p (stmt)
2803 || *other_bb == bb)
2804 return false;
2805 is_cond = gimple_code (stmt) == GIMPLE_COND;
2806 if (is_cond)
2807 {
2808 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2809 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2810 to *OTHER_BB (if not set yet, try to find it out). */
2811 if (EDGE_COUNT (bb->succs) != 2)
2812 return false;
2813 FOR_EACH_EDGE (e, ei, bb->succs)
2814 {
2815 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2816 return false;
2817 if (e->dest == test_bb)
2818 {
2819 if (backward)
2820 continue;
2821 else
2822 return false;
2823 }
2824 if (e->dest == bb)
2825 return false;
2826 if (*other_bb == NULL)
2827 {
2828 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2829 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2830 return false;
2831 else if (e->dest == e2->dest)
2832 *other_bb = e->dest;
2833 if (*other_bb == NULL)
2834 return false;
2835 }
2836 if (e->dest == *other_bb)
2837 other_edge_seen = true;
2838 else if (backward)
2839 return false;
2840 }
2841 if (*other_bb == NULL || !other_edge_seen)
2842 return false;
2843 }
2844 else if (single_succ (bb) != *other_bb)
2845 return false;
2846
2847 /* Now check all PHIs of *OTHER_BB. */
2848 e = find_edge (bb, *other_bb);
2849 e2 = find_edge (test_bb, *other_bb);
2850 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2851 {
2852 gimple phi = gsi_stmt (gsi);
2853 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2854 corresponding to BB and TEST_BB predecessor must be the same. */
2855 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2856 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2857 {
2858 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2859 one of the PHIs should have the lhs of the last stmt in
2860 that block as PHI arg and that PHI should have 0 or 1
2861 corresponding to it in all other range test basic blocks
2862 considered. */
2863 if (!is_cond)
2864 {
2865 if (gimple_phi_arg_def (phi, e->dest_idx)
2866 == gimple_assign_lhs (stmt)
2867 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2868 || integer_onep (gimple_phi_arg_def (phi,
2869 e2->dest_idx))))
2870 continue;
2871 }
2872 else
2873 {
2874 gimple test_last = last_stmt (test_bb);
2875 if (gimple_code (test_last) != GIMPLE_COND
2876 && gimple_phi_arg_def (phi, e2->dest_idx)
2877 == gimple_assign_lhs (test_last)
2878 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2879 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2880 continue;
2881 }
2882
2883 return false;
2884 }
2885 }
2886 return true;
2887 }
2888
2889 /* Return true if BB doesn't have side-effects that would disallow
2890 range test optimization, all SSA_NAMEs set in the bb are consumed
2891 in the bb and there are no PHIs. */
2892
2893 static bool
2894 no_side_effect_bb (basic_block bb)
2895 {
2896 gimple_stmt_iterator gsi;
2897 gimple last;
2898
2899 if (!gimple_seq_empty_p (phi_nodes (bb)))
2900 return false;
2901 last = last_stmt (bb);
2902 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2903 {
2904 gimple stmt = gsi_stmt (gsi);
2905 tree lhs;
2906 imm_use_iterator imm_iter;
2907 use_operand_p use_p;
2908
2909 if (is_gimple_debug (stmt))
2910 continue;
2911 if (gimple_has_side_effects (stmt))
2912 return false;
2913 if (stmt == last)
2914 return true;
2915 if (!is_gimple_assign (stmt))
2916 return false;
2917 lhs = gimple_assign_lhs (stmt);
2918 if (TREE_CODE (lhs) != SSA_NAME)
2919 return false;
2920 if (gimple_assign_rhs_could_trap_p (stmt))
2921 return false;
2922 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2923 {
2924 gimple use_stmt = USE_STMT (use_p);
2925 if (is_gimple_debug (use_stmt))
2926 continue;
2927 if (gimple_bb (use_stmt) != bb)
2928 return false;
2929 }
2930 }
2931 return false;
2932 }
2933
2934 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2935 return true and fill in *OPS recursively. */
2936
2937 static bool
2938 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2939 struct loop *loop)
2940 {
2941 gimple stmt = SSA_NAME_DEF_STMT (var);
2942 tree rhs[2];
2943 int i;
2944
2945 if (!is_reassociable_op (stmt, code, loop))
2946 return false;
2947
2948 rhs[0] = gimple_assign_rhs1 (stmt);
2949 rhs[1] = gimple_assign_rhs2 (stmt);
2950 gimple_set_visited (stmt, true);
2951 for (i = 0; i < 2; i++)
2952 if (TREE_CODE (rhs[i]) == SSA_NAME
2953 && !get_ops (rhs[i], code, ops, loop)
2954 && has_single_use (rhs[i]))
2955 {
2956 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2957
2958 oe->op = rhs[i];
2959 oe->rank = code;
2960 oe->id = 0;
2961 oe->count = 1;
2962 ops->safe_push (oe);
2963 }
2964 return true;
2965 }
2966
2967 /* Find the ops that were added by get_ops starting from VAR, see if
2968 they were changed during update_range_test and if yes, create new
2969 stmts. */
2970
2971 static tree
2972 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2973 unsigned int *pidx, struct loop *loop)
2974 {
2975 gimple stmt = SSA_NAME_DEF_STMT (var);
2976 tree rhs[4];
2977 int i;
2978
2979 if (!is_reassociable_op (stmt, code, loop))
2980 return NULL;
2981
2982 rhs[0] = gimple_assign_rhs1 (stmt);
2983 rhs[1] = gimple_assign_rhs2 (stmt);
2984 rhs[2] = rhs[0];
2985 rhs[3] = rhs[1];
2986 for (i = 0; i < 2; i++)
2987 if (TREE_CODE (rhs[i]) == SSA_NAME)
2988 {
2989 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2990 if (rhs[2 + i] == NULL_TREE)
2991 {
2992 if (has_single_use (rhs[i]))
2993 rhs[2 + i] = ops[(*pidx)++]->op;
2994 else
2995 rhs[2 + i] = rhs[i];
2996 }
2997 }
2998 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2999 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3000 {
3001 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3002 var = make_ssa_name (TREE_TYPE (var), NULL);
3003 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3004 var, rhs[2], rhs[3]);
3005 gimple_set_uid (g, gimple_uid (stmt));
3006 gimple_set_visited (g, true);
3007 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3008 }
3009 return var;
3010 }
3011
3012 /* Structure to track the initial value passed to get_ops and
3013 the range in the ops vector for each basic block. */
3014
3015 struct inter_bb_range_test_entry
3016 {
3017 tree op;
3018 unsigned int first_idx, last_idx;
3019 };
3020
3021 /* Inter-bb range test optimization. */
3022
3023 static void
3024 maybe_optimize_range_tests (gimple stmt)
3025 {
3026 basic_block first_bb = gimple_bb (stmt);
3027 basic_block last_bb = first_bb;
3028 basic_block other_bb = NULL;
3029 basic_block bb;
3030 edge_iterator ei;
3031 edge e;
3032 auto_vec<operand_entry_t> ops;
3033 auto_vec<inter_bb_range_test_entry> bbinfo;
3034 bool any_changes = false;
3035
3036 /* Consider only basic blocks that end with GIMPLE_COND or
3037 a cast statement satisfying final_range_test_p. All
3038 but the last bb in the first_bb .. last_bb range
3039 should end with GIMPLE_COND. */
3040 if (gimple_code (stmt) == GIMPLE_COND)
3041 {
3042 if (EDGE_COUNT (first_bb->succs) != 2)
3043 return;
3044 }
3045 else if (final_range_test_p (stmt))
3046 other_bb = single_succ (first_bb);
3047 else
3048 return;
3049
3050 if (stmt_could_throw_p (stmt))
3051 return;
3052
3053 /* As relative ordering of post-dominator sons isn't fixed,
3054 maybe_optimize_range_tests can be called first on any
3055 bb in the range we want to optimize. So, start searching
3056 backwards, if first_bb can be set to a predecessor. */
3057 while (single_pred_p (first_bb))
3058 {
3059 basic_block pred_bb = single_pred (first_bb);
3060 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3061 break;
3062 if (!no_side_effect_bb (first_bb))
3063 break;
3064 first_bb = pred_bb;
3065 }
3066 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3067 Before starting forward search in last_bb successors, find
3068 out the other_bb. */
3069 if (first_bb == last_bb)
3070 {
3071 other_bb = NULL;
3072 /* As non-GIMPLE_COND last stmt always terminates the range,
3073 if forward search didn't discover anything, just give up. */
3074 if (gimple_code (stmt) != GIMPLE_COND)
3075 return;
3076 /* Look at both successors. Either it ends with a GIMPLE_COND
3077 and satisfies suitable_cond_bb, or ends with a cast and
3078 other_bb is that cast's successor. */
3079 FOR_EACH_EDGE (e, ei, first_bb->succs)
3080 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3081 || e->dest == first_bb)
3082 return;
3083 else if (single_pred_p (e->dest))
3084 {
3085 stmt = last_stmt (e->dest);
3086 if (stmt
3087 && gimple_code (stmt) == GIMPLE_COND
3088 && EDGE_COUNT (e->dest->succs) == 2)
3089 {
3090 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3091 break;
3092 else
3093 other_bb = NULL;
3094 }
3095 else if (stmt
3096 && final_range_test_p (stmt)
3097 && find_edge (first_bb, single_succ (e->dest)))
3098 {
3099 other_bb = single_succ (e->dest);
3100 if (other_bb == first_bb)
3101 other_bb = NULL;
3102 }
3103 }
3104 if (other_bb == NULL)
3105 return;
3106 }
3107 /* Now do the forward search, moving last_bb to successor bbs
3108 that aren't other_bb. */
3109 while (EDGE_COUNT (last_bb->succs) == 2)
3110 {
3111 FOR_EACH_EDGE (e, ei, last_bb->succs)
3112 if (e->dest != other_bb)
3113 break;
3114 if (e == NULL)
3115 break;
3116 if (!single_pred_p (e->dest))
3117 break;
3118 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3119 break;
3120 if (!no_side_effect_bb (e->dest))
3121 break;
3122 last_bb = e->dest;
3123 }
3124 if (first_bb == last_bb)
3125 return;
3126 /* Here basic blocks first_bb through last_bb's predecessor
3127 end with GIMPLE_COND, all of them have one of the edges to
3128 other_bb and another to another block in the range,
3129 all blocks except first_bb don't have side-effects and
3130 last_bb ends with either GIMPLE_COND, or cast satisfying
3131 final_range_test_p. */
3132 for (bb = last_bb; ; bb = single_pred (bb))
3133 {
3134 enum tree_code code;
3135 tree lhs, rhs;
3136 inter_bb_range_test_entry bb_ent;
3137
3138 bb_ent.op = NULL_TREE;
3139 bb_ent.first_idx = ops.length ();
3140 bb_ent.last_idx = bb_ent.first_idx;
3141 e = find_edge (bb, other_bb);
3142 stmt = last_stmt (bb);
3143 gimple_set_visited (stmt, true);
3144 if (gimple_code (stmt) != GIMPLE_COND)
3145 {
3146 use_operand_p use_p;
3147 gimple phi;
3148 edge e2;
3149 unsigned int d;
3150
3151 lhs = gimple_assign_lhs (stmt);
3152 rhs = gimple_assign_rhs1 (stmt);
3153 gcc_assert (bb == last_bb);
3154
3155 /* stmt is
3156 _123 = (int) _234;
3157
3158 followed by:
3159 <bb M>:
3160 # _345 = PHI <_123(N), 1(...), 1(...)>
3161
3162 or 0 instead of 1. If it is 0, the _234
3163 range test is anded together with all the
3164 other range tests, if it is 1, it is ored with
3165 them. */
3166 single_imm_use (lhs, &use_p, &phi);
3167 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3168 e2 = find_edge (first_bb, other_bb);
3169 d = e2->dest_idx;
3170 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3171 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3172 code = BIT_AND_EXPR;
3173 else
3174 {
3175 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3176 code = BIT_IOR_EXPR;
3177 }
3178
3179 /* If _234 SSA_NAME_DEF_STMT is
3180 _234 = _567 | _789;
3181 (or &, corresponding to 1/0 in the phi arguments,
3182 push into ops the individual range test arguments
3183 of the bitwise or resp. and, recursively. */
3184 if (!get_ops (rhs, code, &ops,
3185 loop_containing_stmt (stmt))
3186 && has_single_use (rhs))
3187 {
3188 /* Otherwise, push the _234 range test itself. */
3189 operand_entry_t oe
3190 = (operand_entry_t) pool_alloc (operand_entry_pool);
3191
3192 oe->op = rhs;
3193 oe->rank = code;
3194 oe->id = 0;
3195 oe->count = 1;
3196 ops.safe_push (oe);
3197 bb_ent.last_idx++;
3198 }
3199 else
3200 bb_ent.last_idx = ops.length ();
3201 bb_ent.op = rhs;
3202 bbinfo.safe_push (bb_ent);
3203 continue;
3204 }
3205 /* Otherwise stmt is GIMPLE_COND. */
3206 code = gimple_cond_code (stmt);
3207 lhs = gimple_cond_lhs (stmt);
3208 rhs = gimple_cond_rhs (stmt);
3209 if (TREE_CODE (lhs) == SSA_NAME
3210 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3211 && ((code != EQ_EXPR && code != NE_EXPR)
3212 || rhs != boolean_false_node
3213 /* Either push into ops the individual bitwise
3214 or resp. and operands, depending on which
3215 edge is other_bb. */
3216 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3217 ^ (code == EQ_EXPR))
3218 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3219 loop_containing_stmt (stmt))))
3220 {
3221 /* Or push the GIMPLE_COND stmt itself. */
3222 operand_entry_t oe
3223 = (operand_entry_t) pool_alloc (operand_entry_pool);
3224
3225 oe->op = NULL;
3226 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3227 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3228 /* oe->op = NULL signs that there is no SSA_NAME
3229 for the range test, and oe->id instead is the
3230 basic block number, at which's end the GIMPLE_COND
3231 is. */
3232 oe->id = bb->index;
3233 oe->count = 1;
3234 ops.safe_push (oe);
3235 bb_ent.op = NULL;
3236 bb_ent.last_idx++;
3237 }
3238 else if (ops.length () > bb_ent.first_idx)
3239 {
3240 bb_ent.op = lhs;
3241 bb_ent.last_idx = ops.length ();
3242 }
3243 bbinfo.safe_push (bb_ent);
3244 if (bb == first_bb)
3245 break;
3246 }
3247 if (ops.length () > 1)
3248 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3249 if (any_changes)
3250 {
3251 unsigned int idx;
3252 /* update_ops relies on has_single_use predicates returning the
3253 same values as it did during get_ops earlier. Additionally it
3254 never removes statements, only adds new ones and it should walk
3255 from the single imm use and check the predicate already before
3256 making those changes.
3257 On the other side, the handling of GIMPLE_COND directly can turn
3258 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3259 it needs to be done in a separate loop afterwards. */
3260 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3261 {
3262 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3263 && bbinfo[idx].op != NULL_TREE)
3264 {
3265 tree new_op;
3266
3267 stmt = last_stmt (bb);
3268 new_op = update_ops (bbinfo[idx].op,
3269 (enum tree_code)
3270 ops[bbinfo[idx].first_idx]->rank,
3271 ops, &bbinfo[idx].first_idx,
3272 loop_containing_stmt (stmt));
3273 if (new_op == NULL_TREE)
3274 {
3275 gcc_assert (bb == last_bb);
3276 new_op = ops[bbinfo[idx].first_idx++]->op;
3277 }
3278 if (bbinfo[idx].op != new_op)
3279 {
3280 imm_use_iterator iter;
3281 use_operand_p use_p;
3282 gimple use_stmt, cast_stmt = NULL;
3283
3284 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3285 if (is_gimple_debug (use_stmt))
3286 continue;
3287 else if (gimple_code (use_stmt) == GIMPLE_COND
3288 || gimple_code (use_stmt) == GIMPLE_PHI)
3289 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3290 SET_USE (use_p, new_op);
3291 else if (gimple_assign_cast_p (use_stmt))
3292 cast_stmt = use_stmt;
3293 else
3294 gcc_unreachable ();
3295 if (cast_stmt)
3296 {
3297 gcc_assert (bb == last_bb);
3298 tree lhs = gimple_assign_lhs (cast_stmt);
3299 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3300 enum tree_code rhs_code
3301 = gimple_assign_rhs_code (cast_stmt);
3302 gimple g;
3303 if (is_gimple_min_invariant (new_op))
3304 {
3305 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3306 g = gimple_build_assign (new_lhs, new_op);
3307 }
3308 else
3309 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3310 new_op, NULL_TREE);
3311 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3312 gimple_set_uid (g, gimple_uid (cast_stmt));
3313 gimple_set_visited (g, true);
3314 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3315 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3316 if (is_gimple_debug (use_stmt))
3317 continue;
3318 else if (gimple_code (use_stmt) == GIMPLE_COND
3319 || gimple_code (use_stmt) == GIMPLE_PHI)
3320 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3321 SET_USE (use_p, new_lhs);
3322 else
3323 gcc_unreachable ();
3324 }
3325 }
3326 }
3327 if (bb == first_bb)
3328 break;
3329 }
3330 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3331 {
3332 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3333 && bbinfo[idx].op == NULL_TREE
3334 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3335 {
3336 stmt = last_stmt (bb);
3337 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3338 gimple_cond_make_false (stmt);
3339 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3340 gimple_cond_make_true (stmt);
3341 else
3342 {
3343 gimple_cond_set_code (stmt, NE_EXPR);
3344 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3345 gimple_cond_set_rhs (stmt, boolean_false_node);
3346 }
3347 update_stmt (stmt);
3348 }
3349 if (bb == first_bb)
3350 break;
3351 }
3352 }
3353 }
3354
3355 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3356 of STMT in it's operands. This is also known as a "destructive
3357 update" operation. */
3358
3359 static bool
3360 is_phi_for_stmt (gimple stmt, tree operand)
3361 {
3362 gimple def_stmt;
3363 tree lhs;
3364 use_operand_p arg_p;
3365 ssa_op_iter i;
3366
3367 if (TREE_CODE (operand) != SSA_NAME)
3368 return false;
3369
3370 lhs = gimple_assign_lhs (stmt);
3371
3372 def_stmt = SSA_NAME_DEF_STMT (operand);
3373 if (gimple_code (def_stmt) != GIMPLE_PHI)
3374 return false;
3375
3376 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3377 if (lhs == USE_FROM_PTR (arg_p))
3378 return true;
3379 return false;
3380 }
3381
3382 /* Remove def stmt of VAR if VAR has zero uses and recurse
3383 on rhs1 operand if so. */
3384
3385 static void
3386 remove_visited_stmt_chain (tree var)
3387 {
3388 gimple stmt;
3389 gimple_stmt_iterator gsi;
3390
3391 while (1)
3392 {
3393 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3394 return;
3395 stmt = SSA_NAME_DEF_STMT (var);
3396 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3397 {
3398 var = gimple_assign_rhs1 (stmt);
3399 gsi = gsi_for_stmt (stmt);
3400 reassoc_remove_stmt (&gsi);
3401 release_defs (stmt);
3402 }
3403 else
3404 return;
3405 }
3406 }
3407
3408 /* This function checks three consequtive operands in
3409 passed operands vector OPS starting from OPINDEX and
3410 swaps two operands if it is profitable for binary operation
3411 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3412
3413 We pair ops with the same rank if possible.
3414
3415 The alternative we try is to see if STMT is a destructive
3416 update style statement, which is like:
3417 b = phi (a, ...)
3418 a = c + b;
3419 In that case, we want to use the destructive update form to
3420 expose the possible vectorizer sum reduction opportunity.
3421 In that case, the third operand will be the phi node. This
3422 check is not performed if STMT is null.
3423
3424 We could, of course, try to be better as noted above, and do a
3425 lot of work to try to find these opportunities in >3 operand
3426 cases, but it is unlikely to be worth it. */
3427
3428 static void
3429 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3430 unsigned int opindex, gimple stmt)
3431 {
3432 operand_entry_t oe1, oe2, oe3;
3433
3434 oe1 = ops[opindex];
3435 oe2 = ops[opindex + 1];
3436 oe3 = ops[opindex + 2];
3437
3438 if ((oe1->rank == oe2->rank
3439 && oe2->rank != oe3->rank)
3440 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3441 && !is_phi_for_stmt (stmt, oe1->op)
3442 && !is_phi_for_stmt (stmt, oe2->op)))
3443 {
3444 struct operand_entry temp = *oe3;
3445 oe3->op = oe1->op;
3446 oe3->rank = oe1->rank;
3447 oe1->op = temp.op;
3448 oe1->rank= temp.rank;
3449 }
3450 else if ((oe1->rank == oe3->rank
3451 && oe2->rank != oe3->rank)
3452 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3453 && !is_phi_for_stmt (stmt, oe1->op)
3454 && !is_phi_for_stmt (stmt, oe3->op)))
3455 {
3456 struct operand_entry temp = *oe2;
3457 oe2->op = oe1->op;
3458 oe2->rank = oe1->rank;
3459 oe1->op = temp.op;
3460 oe1->rank = temp.rank;
3461 }
3462 }
3463
3464 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3465 two definitions, otherwise return STMT. */
3466
3467 static inline gimple
3468 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3469 {
3470 if (TREE_CODE (rhs1) == SSA_NAME
3471 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3472 stmt = SSA_NAME_DEF_STMT (rhs1);
3473 if (TREE_CODE (rhs2) == SSA_NAME
3474 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3475 stmt = SSA_NAME_DEF_STMT (rhs2);
3476 return stmt;
3477 }
3478
3479 /* Recursively rewrite our linearized statements so that the operators
3480 match those in OPS[OPINDEX], putting the computation in rank
3481 order. Return new lhs. */
3482
3483 static tree
3484 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3485 vec<operand_entry_t> ops, bool changed)
3486 {
3487 tree rhs1 = gimple_assign_rhs1 (stmt);
3488 tree rhs2 = gimple_assign_rhs2 (stmt);
3489 tree lhs = gimple_assign_lhs (stmt);
3490 operand_entry_t oe;
3491
3492 /* The final recursion case for this function is that you have
3493 exactly two operations left.
3494 If we had one exactly one op in the entire list to start with, we
3495 would have never called this function, and the tail recursion
3496 rewrites them one at a time. */
3497 if (opindex + 2 == ops.length ())
3498 {
3499 operand_entry_t oe1, oe2;
3500
3501 oe1 = ops[opindex];
3502 oe2 = ops[opindex + 1];
3503
3504 if (rhs1 != oe1->op || rhs2 != oe2->op)
3505 {
3506 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3507 unsigned int uid = gimple_uid (stmt);
3508
3509 if (dump_file && (dump_flags & TDF_DETAILS))
3510 {
3511 fprintf (dump_file, "Transforming ");
3512 print_gimple_stmt (dump_file, stmt, 0, 0);
3513 }
3514
3515 if (changed)
3516 {
3517 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3518 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3519 stmt
3520 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3521 lhs, oe1->op, oe2->op);
3522 gimple_set_uid (stmt, uid);
3523 gimple_set_visited (stmt, true);
3524 if (insert_point == gsi_stmt (gsi))
3525 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3526 else
3527 insert_stmt_after (stmt, insert_point);
3528 }
3529 else
3530 {
3531 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3532 == stmt);
3533 gimple_assign_set_rhs1 (stmt, oe1->op);
3534 gimple_assign_set_rhs2 (stmt, oe2->op);
3535 update_stmt (stmt);
3536 }
3537
3538 if (rhs1 != oe1->op && rhs1 != oe2->op)
3539 remove_visited_stmt_chain (rhs1);
3540
3541 if (dump_file && (dump_flags & TDF_DETAILS))
3542 {
3543 fprintf (dump_file, " into ");
3544 print_gimple_stmt (dump_file, stmt, 0, 0);
3545 }
3546 }
3547 return lhs;
3548 }
3549
3550 /* If we hit here, we should have 3 or more ops left. */
3551 gcc_assert (opindex + 2 < ops.length ());
3552
3553 /* Rewrite the next operator. */
3554 oe = ops[opindex];
3555
3556 /* Recurse on the LHS of the binary operator, which is guaranteed to
3557 be the non-leaf side. */
3558 tree new_rhs1
3559 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3560 changed || oe->op != rhs2);
3561
3562 if (oe->op != rhs2 || new_rhs1 != rhs1)
3563 {
3564 if (dump_file && (dump_flags & TDF_DETAILS))
3565 {
3566 fprintf (dump_file, "Transforming ");
3567 print_gimple_stmt (dump_file, stmt, 0, 0);
3568 }
3569
3570 /* If changed is false, this is either opindex == 0
3571 or all outer rhs2's were equal to corresponding oe->op,
3572 and powi_result is NULL.
3573 That means lhs is equivalent before and after reassociation.
3574 Otherwise ensure the old lhs SSA_NAME is not reused and
3575 create a new stmt as well, so that any debug stmts will be
3576 properly adjusted. */
3577 if (changed)
3578 {
3579 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3580 unsigned int uid = gimple_uid (stmt);
3581 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3582
3583 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3584 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3585 lhs, new_rhs1, oe->op);
3586 gimple_set_uid (stmt, uid);
3587 gimple_set_visited (stmt, true);
3588 if (insert_point == gsi_stmt (gsi))
3589 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3590 else
3591 insert_stmt_after (stmt, insert_point);
3592 }
3593 else
3594 {
3595 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3596 == stmt);
3597 gimple_assign_set_rhs1 (stmt, new_rhs1);
3598 gimple_assign_set_rhs2 (stmt, oe->op);
3599 update_stmt (stmt);
3600 }
3601
3602 if (dump_file && (dump_flags & TDF_DETAILS))
3603 {
3604 fprintf (dump_file, " into ");
3605 print_gimple_stmt (dump_file, stmt, 0, 0);
3606 }
3607 }
3608 return lhs;
3609 }
3610
3611 /* Find out how many cycles we need to compute statements chain.
3612 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3613 maximum number of independent statements we may execute per cycle. */
3614
3615 static int
3616 get_required_cycles (int ops_num, int cpu_width)
3617 {
3618 int res;
3619 int elog;
3620 unsigned int rest;
3621
3622 /* While we have more than 2 * cpu_width operands
3623 we may reduce number of operands by cpu_width
3624 per cycle. */
3625 res = ops_num / (2 * cpu_width);
3626
3627 /* Remained operands count may be reduced twice per cycle
3628 until we have only one operand. */
3629 rest = (unsigned)(ops_num - res * cpu_width);
3630 elog = exact_log2 (rest);
3631 if (elog >= 0)
3632 res += elog;
3633 else
3634 res += floor_log2 (rest) + 1;
3635
3636 return res;
3637 }
3638
3639 /* Returns an optimal number of registers to use for computation of
3640 given statements. */
3641
3642 static int
3643 get_reassociation_width (int ops_num, enum tree_code opc,
3644 machine_mode mode)
3645 {
3646 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3647 int width;
3648 int width_min;
3649 int cycles_best;
3650
3651 if (param_width > 0)
3652 width = param_width;
3653 else
3654 width = targetm.sched.reassociation_width (opc, mode);
3655
3656 if (width == 1)
3657 return width;
3658
3659 /* Get the minimal time required for sequence computation. */
3660 cycles_best = get_required_cycles (ops_num, width);
3661
3662 /* Check if we may use less width and still compute sequence for
3663 the same time. It will allow us to reduce registers usage.
3664 get_required_cycles is monotonically increasing with lower width
3665 so we can perform a binary search for the minimal width that still
3666 results in the optimal cycle count. */
3667 width_min = 1;
3668 while (width > width_min)
3669 {
3670 int width_mid = (width + width_min) / 2;
3671
3672 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3673 width = width_mid;
3674 else if (width_min < width_mid)
3675 width_min = width_mid;
3676 else
3677 break;
3678 }
3679
3680 return width;
3681 }
3682
3683 /* Recursively rewrite our linearized statements so that the operators
3684 match those in OPS[OPINDEX], putting the computation in rank
3685 order and trying to allow operations to be executed in
3686 parallel. */
3687
3688 static void
3689 rewrite_expr_tree_parallel (gimple stmt, int width,
3690 vec<operand_entry_t> ops)
3691 {
3692 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3693 int op_num = ops.length ();
3694 int stmt_num = op_num - 1;
3695 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3696 int op_index = op_num - 1;
3697 int stmt_index = 0;
3698 int ready_stmts_end = 0;
3699 int i = 0;
3700 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3701
3702 /* We start expression rewriting from the top statements.
3703 So, in this loop we create a full list of statements
3704 we will work with. */
3705 stmts[stmt_num - 1] = stmt;
3706 for (i = stmt_num - 2; i >= 0; i--)
3707 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3708
3709 for (i = 0; i < stmt_num; i++)
3710 {
3711 tree op1, op2;
3712
3713 /* Determine whether we should use results of
3714 already handled statements or not. */
3715 if (ready_stmts_end == 0
3716 && (i - stmt_index >= width || op_index < 1))
3717 ready_stmts_end = i;
3718
3719 /* Now we choose operands for the next statement. Non zero
3720 value in ready_stmts_end means here that we should use
3721 the result of already generated statements as new operand. */
3722 if (ready_stmts_end > 0)
3723 {
3724 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3725 if (ready_stmts_end > stmt_index)
3726 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3727 else if (op_index >= 0)
3728 op2 = ops[op_index--]->op;
3729 else
3730 {
3731 gcc_assert (stmt_index < i);
3732 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3733 }
3734
3735 if (stmt_index >= ready_stmts_end)
3736 ready_stmts_end = 0;
3737 }
3738 else
3739 {
3740 if (op_index > 1)
3741 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3742 op2 = ops[op_index--]->op;
3743 op1 = ops[op_index--]->op;
3744 }
3745
3746 /* If we emit the last statement then we should put
3747 operands into the last statement. It will also
3748 break the loop. */
3749 if (op_index < 0 && stmt_index == i)
3750 i = stmt_num - 1;
3751
3752 if (dump_file && (dump_flags & TDF_DETAILS))
3753 {
3754 fprintf (dump_file, "Transforming ");
3755 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3756 }
3757
3758 /* We keep original statement only for the last one. All
3759 others are recreated. */
3760 if (i == stmt_num - 1)
3761 {
3762 gimple_assign_set_rhs1 (stmts[i], op1);
3763 gimple_assign_set_rhs2 (stmts[i], op2);
3764 update_stmt (stmts[i]);
3765 }
3766 else
3767 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3768
3769 if (dump_file && (dump_flags & TDF_DETAILS))
3770 {
3771 fprintf (dump_file, " into ");
3772 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3773 }
3774 }
3775
3776 remove_visited_stmt_chain (last_rhs1);
3777 }
3778
3779 /* Transform STMT, which is really (A +B) + (C + D) into the left
3780 linear form, ((A+B)+C)+D.
3781 Recurse on D if necessary. */
3782
3783 static void
3784 linearize_expr (gimple stmt)
3785 {
3786 gimple_stmt_iterator gsi;
3787 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3788 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3789 gimple oldbinrhs = binrhs;
3790 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3791 gimple newbinrhs = NULL;
3792 struct loop *loop = loop_containing_stmt (stmt);
3793 tree lhs = gimple_assign_lhs (stmt);
3794
3795 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3796 && is_reassociable_op (binrhs, rhscode, loop));
3797
3798 gsi = gsi_for_stmt (stmt);
3799
3800 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3801 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3802 make_ssa_name (TREE_TYPE (lhs), NULL),
3803 gimple_assign_lhs (binlhs),
3804 gimple_assign_rhs2 (binrhs));
3805 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3806 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3807 gimple_set_uid (binrhs, gimple_uid (stmt));
3808
3809 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3810 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3811
3812 if (dump_file && (dump_flags & TDF_DETAILS))
3813 {
3814 fprintf (dump_file, "Linearized: ");
3815 print_gimple_stmt (dump_file, stmt, 0, 0);
3816 }
3817
3818 reassociate_stats.linearized++;
3819 update_stmt (stmt);
3820
3821 gsi = gsi_for_stmt (oldbinrhs);
3822 reassoc_remove_stmt (&gsi);
3823 release_defs (oldbinrhs);
3824
3825 gimple_set_visited (stmt, true);
3826 gimple_set_visited (binlhs, true);
3827 gimple_set_visited (binrhs, true);
3828
3829 /* Tail recurse on the new rhs if it still needs reassociation. */
3830 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3831 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3832 want to change the algorithm while converting to tuples. */
3833 linearize_expr (stmt);
3834 }
3835
3836 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3837 it. Otherwise, return NULL. */
3838
3839 static gimple
3840 get_single_immediate_use (tree lhs)
3841 {
3842 use_operand_p immuse;
3843 gimple immusestmt;
3844
3845 if (TREE_CODE (lhs) == SSA_NAME
3846 && single_imm_use (lhs, &immuse, &immusestmt)
3847 && is_gimple_assign (immusestmt))
3848 return immusestmt;
3849
3850 return NULL;
3851 }
3852
3853 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3854 representing the negated value. Insertions of any necessary
3855 instructions go before GSI.
3856 This function is recursive in that, if you hand it "a_5" as the
3857 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3858 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3859
3860 static tree
3861 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3862 {
3863 gimple negatedefstmt = NULL;
3864 tree resultofnegate;
3865 gimple_stmt_iterator gsi;
3866 unsigned int uid;
3867
3868 /* If we are trying to negate a name, defined by an add, negate the
3869 add operands instead. */
3870 if (TREE_CODE (tonegate) == SSA_NAME)
3871 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3872 if (TREE_CODE (tonegate) == SSA_NAME
3873 && is_gimple_assign (negatedefstmt)
3874 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3875 && has_single_use (gimple_assign_lhs (negatedefstmt))
3876 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3877 {
3878 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3879 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3880 tree lhs = gimple_assign_lhs (negatedefstmt);
3881 gimple g;
3882
3883 gsi = gsi_for_stmt (negatedefstmt);
3884 rhs1 = negate_value (rhs1, &gsi);
3885
3886 gsi = gsi_for_stmt (negatedefstmt);
3887 rhs2 = negate_value (rhs2, &gsi);
3888
3889 gsi = gsi_for_stmt (negatedefstmt);
3890 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3891 gimple_set_visited (negatedefstmt, true);
3892 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3893 gimple_set_uid (g, gimple_uid (negatedefstmt));
3894 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3895 return lhs;
3896 }
3897
3898 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3899 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3900 NULL_TREE, true, GSI_SAME_STMT);
3901 gsi = *gsip;
3902 uid = gimple_uid (gsi_stmt (gsi));
3903 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3904 {
3905 gimple stmt = gsi_stmt (gsi);
3906 if (gimple_uid (stmt) != 0)
3907 break;
3908 gimple_set_uid (stmt, uid);
3909 }
3910 return resultofnegate;
3911 }
3912
3913 /* Return true if we should break up the subtract in STMT into an add
3914 with negate. This is true when we the subtract operands are really
3915 adds, or the subtract itself is used in an add expression. In
3916 either case, breaking up the subtract into an add with negate
3917 exposes the adds to reassociation. */
3918
3919 static bool
3920 should_break_up_subtract (gimple stmt)
3921 {
3922 tree lhs = gimple_assign_lhs (stmt);
3923 tree binlhs = gimple_assign_rhs1 (stmt);
3924 tree binrhs = gimple_assign_rhs2 (stmt);
3925 gimple immusestmt;
3926 struct loop *loop = loop_containing_stmt (stmt);
3927
3928 if (TREE_CODE (binlhs) == SSA_NAME
3929 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3930 return true;
3931
3932 if (TREE_CODE (binrhs) == SSA_NAME
3933 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3934 return true;
3935
3936 if (TREE_CODE (lhs) == SSA_NAME
3937 && (immusestmt = get_single_immediate_use (lhs))
3938 && is_gimple_assign (immusestmt)
3939 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3940 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3941 return true;
3942 return false;
3943 }
3944
3945 /* Transform STMT from A - B into A + -B. */
3946
3947 static void
3948 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3949 {
3950 tree rhs1 = gimple_assign_rhs1 (stmt);
3951 tree rhs2 = gimple_assign_rhs2 (stmt);
3952
3953 if (dump_file && (dump_flags & TDF_DETAILS))
3954 {
3955 fprintf (dump_file, "Breaking up subtract ");
3956 print_gimple_stmt (dump_file, stmt, 0, 0);
3957 }
3958
3959 rhs2 = negate_value (rhs2, gsip);
3960 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3961 update_stmt (stmt);
3962 }
3963
3964 /* Determine whether STMT is a builtin call that raises an SSA name
3965 to an integer power and has only one use. If so, and this is early
3966 reassociation and unsafe math optimizations are permitted, place
3967 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3968 If any of these conditions does not hold, return FALSE. */
3969
3970 static bool
3971 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3972 {
3973 tree fndecl, arg1;
3974 REAL_VALUE_TYPE c, cint;
3975
3976 if (!first_pass_instance
3977 || !flag_unsafe_math_optimizations
3978 || !is_gimple_call (stmt)
3979 || !has_single_use (gimple_call_lhs (stmt)))
3980 return false;
3981
3982 fndecl = gimple_call_fndecl (stmt);
3983
3984 if (!fndecl
3985 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3986 return false;
3987
3988 switch (DECL_FUNCTION_CODE (fndecl))
3989 {
3990 CASE_FLT_FN (BUILT_IN_POW):
3991 *base = gimple_call_arg (stmt, 0);
3992 arg1 = gimple_call_arg (stmt, 1);
3993
3994 if (TREE_CODE (arg1) != REAL_CST)
3995 return false;
3996
3997 c = TREE_REAL_CST (arg1);
3998
3999 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4000 return false;
4001
4002 *exponent = real_to_integer (&c);
4003 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4004 if (!real_identical (&c, &cint))
4005 return false;
4006
4007 break;
4008
4009 CASE_FLT_FN (BUILT_IN_POWI):
4010 *base = gimple_call_arg (stmt, 0);
4011 arg1 = gimple_call_arg (stmt, 1);
4012
4013 if (!tree_fits_shwi_p (arg1))
4014 return false;
4015
4016 *exponent = tree_to_shwi (arg1);
4017 break;
4018
4019 default:
4020 return false;
4021 }
4022
4023 /* Expanding negative exponents is generally unproductive, so we don't
4024 complicate matters with those. Exponents of zero and one should
4025 have been handled by expression folding. */
4026 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4027 return false;
4028
4029 return true;
4030 }
4031
4032 /* Recursively linearize a binary expression that is the RHS of STMT.
4033 Place the operands of the expression tree in the vector named OPS. */
4034
4035 static void
4036 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4037 bool is_associative, bool set_visited)
4038 {
4039 tree binlhs = gimple_assign_rhs1 (stmt);
4040 tree binrhs = gimple_assign_rhs2 (stmt);
4041 gimple binlhsdef = NULL, binrhsdef = NULL;
4042 bool binlhsisreassoc = false;
4043 bool binrhsisreassoc = false;
4044 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4045 struct loop *loop = loop_containing_stmt (stmt);
4046 tree base = NULL_TREE;
4047 HOST_WIDE_INT exponent = 0;
4048
4049 if (set_visited)
4050 gimple_set_visited (stmt, true);
4051
4052 if (TREE_CODE (binlhs) == SSA_NAME)
4053 {
4054 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4055 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4056 && !stmt_could_throw_p (binlhsdef));
4057 }
4058
4059 if (TREE_CODE (binrhs) == SSA_NAME)
4060 {
4061 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4062 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4063 && !stmt_could_throw_p (binrhsdef));
4064 }
4065
4066 /* If the LHS is not reassociable, but the RHS is, we need to swap
4067 them. If neither is reassociable, there is nothing we can do, so
4068 just put them in the ops vector. If the LHS is reassociable,
4069 linearize it. If both are reassociable, then linearize the RHS
4070 and the LHS. */
4071
4072 if (!binlhsisreassoc)
4073 {
4074 tree temp;
4075
4076 /* If this is not a associative operation like division, give up. */
4077 if (!is_associative)
4078 {
4079 add_to_ops_vec (ops, binrhs);
4080 return;
4081 }
4082
4083 if (!binrhsisreassoc)
4084 {
4085 if (rhscode == MULT_EXPR
4086 && TREE_CODE (binrhs) == SSA_NAME
4087 && acceptable_pow_call (binrhsdef, &base, &exponent))
4088 {
4089 add_repeat_to_ops_vec (ops, base, exponent);
4090 gimple_set_visited (binrhsdef, true);
4091 }
4092 else
4093 add_to_ops_vec (ops, binrhs);
4094
4095 if (rhscode == MULT_EXPR
4096 && TREE_CODE (binlhs) == SSA_NAME
4097 && acceptable_pow_call (binlhsdef, &base, &exponent))
4098 {
4099 add_repeat_to_ops_vec (ops, base, exponent);
4100 gimple_set_visited (binlhsdef, true);
4101 }
4102 else
4103 add_to_ops_vec (ops, binlhs);
4104
4105 return;
4106 }
4107
4108 if (dump_file && (dump_flags & TDF_DETAILS))
4109 {
4110 fprintf (dump_file, "swapping operands of ");
4111 print_gimple_stmt (dump_file, stmt, 0, 0);
4112 }
4113
4114 swap_ssa_operands (stmt,
4115 gimple_assign_rhs1_ptr (stmt),
4116 gimple_assign_rhs2_ptr (stmt));
4117 update_stmt (stmt);
4118
4119 if (dump_file && (dump_flags & TDF_DETAILS))
4120 {
4121 fprintf (dump_file, " is now ");
4122 print_gimple_stmt (dump_file, stmt, 0, 0);
4123 }
4124
4125 /* We want to make it so the lhs is always the reassociative op,
4126 so swap. */
4127 temp = binlhs;
4128 binlhs = binrhs;
4129 binrhs = temp;
4130 }
4131 else if (binrhsisreassoc)
4132 {
4133 linearize_expr (stmt);
4134 binlhs = gimple_assign_rhs1 (stmt);
4135 binrhs = gimple_assign_rhs2 (stmt);
4136 }
4137
4138 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4139 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4140 rhscode, loop));
4141 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4142 is_associative, set_visited);
4143
4144 if (rhscode == MULT_EXPR
4145 && TREE_CODE (binrhs) == SSA_NAME
4146 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4147 {
4148 add_repeat_to_ops_vec (ops, base, exponent);
4149 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4150 }
4151 else
4152 add_to_ops_vec (ops, binrhs);
4153 }
4154
4155 /* Repropagate the negates back into subtracts, since no other pass
4156 currently does it. */
4157
4158 static void
4159 repropagate_negates (void)
4160 {
4161 unsigned int i = 0;
4162 tree negate;
4163
4164 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4165 {
4166 gimple user = get_single_immediate_use (negate);
4167
4168 if (!user || !is_gimple_assign (user))
4169 continue;
4170
4171 /* The negate operand can be either operand of a PLUS_EXPR
4172 (it can be the LHS if the RHS is a constant for example).
4173
4174 Force the negate operand to the RHS of the PLUS_EXPR, then
4175 transform the PLUS_EXPR into a MINUS_EXPR. */
4176 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4177 {
4178 /* If the negated operand appears on the LHS of the
4179 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4180 to force the negated operand to the RHS of the PLUS_EXPR. */
4181 if (gimple_assign_rhs1 (user) == negate)
4182 {
4183 swap_ssa_operands (user,
4184 gimple_assign_rhs1_ptr (user),
4185 gimple_assign_rhs2_ptr (user));
4186 }
4187
4188 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4189 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4190 if (gimple_assign_rhs2 (user) == negate)
4191 {
4192 tree rhs1 = gimple_assign_rhs1 (user);
4193 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4194 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4195 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4196 update_stmt (user);
4197 }
4198 }
4199 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4200 {
4201 if (gimple_assign_rhs1 (user) == negate)
4202 {
4203 /* We have
4204 x = -a
4205 y = x - b
4206 which we transform into
4207 x = a + b
4208 y = -x .
4209 This pushes down the negate which we possibly can merge
4210 into some other operation, hence insert it into the
4211 plus_negates vector. */
4212 gimple feed = SSA_NAME_DEF_STMT (negate);
4213 tree a = gimple_assign_rhs1 (feed);
4214 tree b = gimple_assign_rhs2 (user);
4215 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4216 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4217 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
4218 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
4219 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4220 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
4221 user = gsi_stmt (gsi2);
4222 update_stmt (user);
4223 reassoc_remove_stmt (&gsi);
4224 release_defs (feed);
4225 plus_negates.safe_push (gimple_assign_lhs (user));
4226 }
4227 else
4228 {
4229 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4230 rid of one operation. */
4231 gimple feed = SSA_NAME_DEF_STMT (negate);
4232 tree a = gimple_assign_rhs1 (feed);
4233 tree rhs1 = gimple_assign_rhs1 (user);
4234 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4235 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4236 update_stmt (gsi_stmt (gsi));
4237 }
4238 }
4239 }
4240 }
4241
4242 /* Returns true if OP is of a type for which we can do reassociation.
4243 That is for integral or non-saturating fixed-point types, and for
4244 floating point type when associative-math is enabled. */
4245
4246 static bool
4247 can_reassociate_p (tree op)
4248 {
4249 tree type = TREE_TYPE (op);
4250 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4251 || NON_SAT_FIXED_POINT_TYPE_P (type)
4252 || (flag_associative_math && FLOAT_TYPE_P (type)))
4253 return true;
4254 return false;
4255 }
4256
4257 /* Break up subtract operations in block BB.
4258
4259 We do this top down because we don't know whether the subtract is
4260 part of a possible chain of reassociation except at the top.
4261
4262 IE given
4263 d = f + g
4264 c = a + e
4265 b = c - d
4266 q = b - r
4267 k = t - q
4268
4269 we want to break up k = t - q, but we won't until we've transformed q
4270 = b - r, which won't be broken up until we transform b = c - d.
4271
4272 En passant, clear the GIMPLE visited flag on every statement
4273 and set UIDs within each basic block. */
4274
4275 static void
4276 break_up_subtract_bb (basic_block bb)
4277 {
4278 gimple_stmt_iterator gsi;
4279 basic_block son;
4280 unsigned int uid = 1;
4281
4282 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4283 {
4284 gimple stmt = gsi_stmt (gsi);
4285 gimple_set_visited (stmt, false);
4286 gimple_set_uid (stmt, uid++);
4287
4288 if (!is_gimple_assign (stmt)
4289 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4290 continue;
4291
4292 /* Look for simple gimple subtract operations. */
4293 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4294 {
4295 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4296 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4297 continue;
4298
4299 /* Check for a subtract used only in an addition. If this
4300 is the case, transform it into add of a negate for better
4301 reassociation. IE transform C = A-B into C = A + -B if C
4302 is only used in an addition. */
4303 if (should_break_up_subtract (stmt))
4304 break_up_subtract (stmt, &gsi);
4305 }
4306 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4307 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4308 plus_negates.safe_push (gimple_assign_lhs (stmt));
4309 }
4310 for (son = first_dom_son (CDI_DOMINATORS, bb);
4311 son;
4312 son = next_dom_son (CDI_DOMINATORS, son))
4313 break_up_subtract_bb (son);
4314 }
4315
4316 /* Used for repeated factor analysis. */
4317 struct repeat_factor_d
4318 {
4319 /* An SSA name that occurs in a multiply chain. */
4320 tree factor;
4321
4322 /* Cached rank of the factor. */
4323 unsigned rank;
4324
4325 /* Number of occurrences of the factor in the chain. */
4326 HOST_WIDE_INT count;
4327
4328 /* An SSA name representing the product of this factor and
4329 all factors appearing later in the repeated factor vector. */
4330 tree repr;
4331 };
4332
4333 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4334 typedef const struct repeat_factor_d *const_repeat_factor_t;
4335
4336
4337 static vec<repeat_factor> repeat_factor_vec;
4338
4339 /* Used for sorting the repeat factor vector. Sort primarily by
4340 ascending occurrence count, secondarily by descending rank. */
4341
4342 static int
4343 compare_repeat_factors (const void *x1, const void *x2)
4344 {
4345 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4346 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4347
4348 if (rf1->count != rf2->count)
4349 return rf1->count - rf2->count;
4350
4351 return rf2->rank - rf1->rank;
4352 }
4353
4354 /* Look for repeated operands in OPS in the multiply tree rooted at
4355 STMT. Replace them with an optimal sequence of multiplies and powi
4356 builtin calls, and remove the used operands from OPS. Return an
4357 SSA name representing the value of the replacement sequence. */
4358
4359 static tree
4360 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4361 {
4362 unsigned i, j, vec_len;
4363 int ii;
4364 operand_entry_t oe;
4365 repeat_factor_t rf1, rf2;
4366 repeat_factor rfnew;
4367 tree result = NULL_TREE;
4368 tree target_ssa, iter_result;
4369 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4370 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4371 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4372 gimple mul_stmt, pow_stmt;
4373
4374 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4375 target. */
4376 if (!powi_fndecl)
4377 return NULL_TREE;
4378
4379 /* Allocate the repeated factor vector. */
4380 repeat_factor_vec.create (10);
4381
4382 /* Scan the OPS vector for all SSA names in the product and build
4383 up a vector of occurrence counts for each factor. */
4384 FOR_EACH_VEC_ELT (*ops, i, oe)
4385 {
4386 if (TREE_CODE (oe->op) == SSA_NAME)
4387 {
4388 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4389 {
4390 if (rf1->factor == oe->op)
4391 {
4392 rf1->count += oe->count;
4393 break;
4394 }
4395 }
4396
4397 if (j >= repeat_factor_vec.length ())
4398 {
4399 rfnew.factor = oe->op;
4400 rfnew.rank = oe->rank;
4401 rfnew.count = oe->count;
4402 rfnew.repr = NULL_TREE;
4403 repeat_factor_vec.safe_push (rfnew);
4404 }
4405 }
4406 }
4407
4408 /* Sort the repeated factor vector by (a) increasing occurrence count,
4409 and (b) decreasing rank. */
4410 repeat_factor_vec.qsort (compare_repeat_factors);
4411
4412 /* It is generally best to combine as many base factors as possible
4413 into a product before applying __builtin_powi to the result.
4414 However, the sort order chosen for the repeated factor vector
4415 allows us to cache partial results for the product of the base
4416 factors for subsequent use. When we already have a cached partial
4417 result from a previous iteration, it is best to make use of it
4418 before looking for another __builtin_pow opportunity.
4419
4420 As an example, consider x * x * y * y * y * z * z * z * z.
4421 We want to first compose the product x * y * z, raise it to the
4422 second power, then multiply this by y * z, and finally multiply
4423 by z. This can be done in 5 multiplies provided we cache y * z
4424 for use in both expressions:
4425
4426 t1 = y * z
4427 t2 = t1 * x
4428 t3 = t2 * t2
4429 t4 = t1 * t3
4430 result = t4 * z
4431
4432 If we instead ignored the cached y * z and first multiplied by
4433 the __builtin_pow opportunity z * z, we would get the inferior:
4434
4435 t1 = y * z
4436 t2 = t1 * x
4437 t3 = t2 * t2
4438 t4 = z * z
4439 t5 = t3 * t4
4440 result = t5 * y */
4441
4442 vec_len = repeat_factor_vec.length ();
4443
4444 /* Repeatedly look for opportunities to create a builtin_powi call. */
4445 while (true)
4446 {
4447 HOST_WIDE_INT power;
4448
4449 /* First look for the largest cached product of factors from
4450 preceding iterations. If found, create a builtin_powi for
4451 it if the minimum occurrence count for its factors is at
4452 least 2, or just use this cached product as our next
4453 multiplicand if the minimum occurrence count is 1. */
4454 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4455 {
4456 if (rf1->repr && rf1->count > 0)
4457 break;
4458 }
4459
4460 if (j < vec_len)
4461 {
4462 power = rf1->count;
4463
4464 if (power == 1)
4465 {
4466 iter_result = rf1->repr;
4467
4468 if (dump_file && (dump_flags & TDF_DETAILS))
4469 {
4470 unsigned elt;
4471 repeat_factor_t rf;
4472 fputs ("Multiplying by cached product ", dump_file);
4473 for (elt = j; elt < vec_len; elt++)
4474 {
4475 rf = &repeat_factor_vec[elt];
4476 print_generic_expr (dump_file, rf->factor, 0);
4477 if (elt < vec_len - 1)
4478 fputs (" * ", dump_file);
4479 }
4480 fputs ("\n", dump_file);
4481 }
4482 }
4483 else
4484 {
4485 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4486 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4487 build_int_cst (integer_type_node,
4488 power));
4489 gimple_call_set_lhs (pow_stmt, iter_result);
4490 gimple_set_location (pow_stmt, gimple_location (stmt));
4491 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4492
4493 if (dump_file && (dump_flags & TDF_DETAILS))
4494 {
4495 unsigned elt;
4496 repeat_factor_t rf;
4497 fputs ("Building __builtin_pow call for cached product (",
4498 dump_file);
4499 for (elt = j; elt < vec_len; elt++)
4500 {
4501 rf = &repeat_factor_vec[elt];
4502 print_generic_expr (dump_file, rf->factor, 0);
4503 if (elt < vec_len - 1)
4504 fputs (" * ", dump_file);
4505 }
4506 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4507 power);
4508 }
4509 }
4510 }
4511 else
4512 {
4513 /* Otherwise, find the first factor in the repeated factor
4514 vector whose occurrence count is at least 2. If no such
4515 factor exists, there are no builtin_powi opportunities
4516 remaining. */
4517 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4518 {
4519 if (rf1->count >= 2)
4520 break;
4521 }
4522
4523 if (j >= vec_len)
4524 break;
4525
4526 power = rf1->count;
4527
4528 if (dump_file && (dump_flags & TDF_DETAILS))
4529 {
4530 unsigned elt;
4531 repeat_factor_t rf;
4532 fputs ("Building __builtin_pow call for (", dump_file);
4533 for (elt = j; elt < vec_len; elt++)
4534 {
4535 rf = &repeat_factor_vec[elt];
4536 print_generic_expr (dump_file, rf->factor, 0);
4537 if (elt < vec_len - 1)
4538 fputs (" * ", dump_file);
4539 }
4540 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4541 }
4542
4543 reassociate_stats.pows_created++;
4544
4545 /* Visit each element of the vector in reverse order (so that
4546 high-occurrence elements are visited first, and within the
4547 same occurrence count, lower-ranked elements are visited
4548 first). Form a linear product of all elements in this order
4549 whose occurrencce count is at least that of element J.
4550 Record the SSA name representing the product of each element
4551 with all subsequent elements in the vector. */
4552 if (j == vec_len - 1)
4553 rf1->repr = rf1->factor;
4554 else
4555 {
4556 for (ii = vec_len - 2; ii >= (int)j; ii--)
4557 {
4558 tree op1, op2;
4559
4560 rf1 = &repeat_factor_vec[ii];
4561 rf2 = &repeat_factor_vec[ii + 1];
4562
4563 /* Init the last factor's representative to be itself. */
4564 if (!rf2->repr)
4565 rf2->repr = rf2->factor;
4566
4567 op1 = rf1->factor;
4568 op2 = rf2->repr;
4569
4570 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4571 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4572 target_ssa,
4573 op1, op2);
4574 gimple_set_location (mul_stmt, gimple_location (stmt));
4575 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4576 rf1->repr = target_ssa;
4577
4578 /* Don't reprocess the multiply we just introduced. */
4579 gimple_set_visited (mul_stmt, true);
4580 }
4581 }
4582
4583 /* Form a call to __builtin_powi for the maximum product
4584 just formed, raised to the power obtained earlier. */
4585 rf1 = &repeat_factor_vec[j];
4586 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4587 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4588 build_int_cst (integer_type_node,
4589 power));
4590 gimple_call_set_lhs (pow_stmt, iter_result);
4591 gimple_set_location (pow_stmt, gimple_location (stmt));
4592 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4593 }
4594
4595 /* If we previously formed at least one other builtin_powi call,
4596 form the product of this one and those others. */
4597 if (result)
4598 {
4599 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4600 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4601 result, iter_result);
4602 gimple_set_location (mul_stmt, gimple_location (stmt));
4603 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4604 gimple_set_visited (mul_stmt, true);
4605 result = new_result;
4606 }
4607 else
4608 result = iter_result;
4609
4610 /* Decrement the occurrence count of each element in the product
4611 by the count found above, and remove this many copies of each
4612 factor from OPS. */
4613 for (i = j; i < vec_len; i++)
4614 {
4615 unsigned k = power;
4616 unsigned n;
4617
4618 rf1 = &repeat_factor_vec[i];
4619 rf1->count -= power;
4620
4621 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4622 {
4623 if (oe->op == rf1->factor)
4624 {
4625 if (oe->count <= k)
4626 {
4627 ops->ordered_remove (n);
4628 k -= oe->count;
4629
4630 if (k == 0)
4631 break;
4632 }
4633 else
4634 {
4635 oe->count -= k;
4636 break;
4637 }
4638 }
4639 }
4640 }
4641 }
4642
4643 /* At this point all elements in the repeated factor vector have a
4644 remaining occurrence count of 0 or 1, and those with a count of 1
4645 don't have cached representatives. Re-sort the ops vector and
4646 clean up. */
4647 ops->qsort (sort_by_operand_rank);
4648 repeat_factor_vec.release ();
4649
4650 /* Return the final product computed herein. Note that there may
4651 still be some elements with single occurrence count left in OPS;
4652 those will be handled by the normal reassociation logic. */
4653 return result;
4654 }
4655
4656 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4657
4658 static void
4659 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4660 {
4661 tree rhs1;
4662
4663 if (dump_file && (dump_flags & TDF_DETAILS))
4664 {
4665 fprintf (dump_file, "Transforming ");
4666 print_gimple_stmt (dump_file, stmt, 0, 0);
4667 }
4668
4669 rhs1 = gimple_assign_rhs1 (stmt);
4670 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4671 update_stmt (stmt);
4672 remove_visited_stmt_chain (rhs1);
4673
4674 if (dump_file && (dump_flags & TDF_DETAILS))
4675 {
4676 fprintf (dump_file, " into ");
4677 print_gimple_stmt (dump_file, stmt, 0, 0);
4678 }
4679 }
4680
4681 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4682
4683 static void
4684 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4685 tree rhs1, tree rhs2)
4686 {
4687 if (dump_file && (dump_flags & TDF_DETAILS))
4688 {
4689 fprintf (dump_file, "Transforming ");
4690 print_gimple_stmt (dump_file, stmt, 0, 0);
4691 }
4692
4693 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4694 update_stmt (gsi_stmt (*gsi));
4695 remove_visited_stmt_chain (rhs1);
4696
4697 if (dump_file && (dump_flags & TDF_DETAILS))
4698 {
4699 fprintf (dump_file, " into ");
4700 print_gimple_stmt (dump_file, stmt, 0, 0);
4701 }
4702 }
4703
4704 /* Reassociate expressions in basic block BB and its post-dominator as
4705 children. */
4706
4707 static void
4708 reassociate_bb (basic_block bb)
4709 {
4710 gimple_stmt_iterator gsi;
4711 basic_block son;
4712 gimple stmt = last_stmt (bb);
4713
4714 if (stmt && !gimple_visited_p (stmt))
4715 maybe_optimize_range_tests (stmt);
4716
4717 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4718 {
4719 stmt = gsi_stmt (gsi);
4720
4721 if (is_gimple_assign (stmt)
4722 && !stmt_could_throw_p (stmt))
4723 {
4724 tree lhs, rhs1, rhs2;
4725 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4726
4727 /* If this is not a gimple binary expression, there is
4728 nothing for us to do with it. */
4729 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4730 continue;
4731
4732 /* If this was part of an already processed statement,
4733 we don't need to touch it again. */
4734 if (gimple_visited_p (stmt))
4735 {
4736 /* This statement might have become dead because of previous
4737 reassociations. */
4738 if (has_zero_uses (gimple_get_lhs (stmt)))
4739 {
4740 reassoc_remove_stmt (&gsi);
4741 release_defs (stmt);
4742 /* We might end up removing the last stmt above which
4743 places the iterator to the end of the sequence.
4744 Reset it to the last stmt in this case which might
4745 be the end of the sequence as well if we removed
4746 the last statement of the sequence. In which case
4747 we need to bail out. */
4748 if (gsi_end_p (gsi))
4749 {
4750 gsi = gsi_last_bb (bb);
4751 if (gsi_end_p (gsi))
4752 break;
4753 }
4754 }
4755 continue;
4756 }
4757
4758 lhs = gimple_assign_lhs (stmt);
4759 rhs1 = gimple_assign_rhs1 (stmt);
4760 rhs2 = gimple_assign_rhs2 (stmt);
4761
4762 /* For non-bit or min/max operations we can't associate
4763 all types. Verify that here. */
4764 if (rhs_code != BIT_IOR_EXPR
4765 && rhs_code != BIT_AND_EXPR
4766 && rhs_code != BIT_XOR_EXPR
4767 && rhs_code != MIN_EXPR
4768 && rhs_code != MAX_EXPR
4769 && (!can_reassociate_p (lhs)
4770 || !can_reassociate_p (rhs1)
4771 || !can_reassociate_p (rhs2)))
4772 continue;
4773
4774 if (associative_tree_code (rhs_code))
4775 {
4776 auto_vec<operand_entry_t> ops;
4777 tree powi_result = NULL_TREE;
4778
4779 /* There may be no immediate uses left by the time we
4780 get here because we may have eliminated them all. */
4781 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4782 continue;
4783
4784 gimple_set_visited (stmt, true);
4785 linearize_expr_tree (&ops, stmt, true, true);
4786 ops.qsort (sort_by_operand_rank);
4787 optimize_ops_list (rhs_code, &ops);
4788 if (undistribute_ops_list (rhs_code, &ops,
4789 loop_containing_stmt (stmt)))
4790 {
4791 ops.qsort (sort_by_operand_rank);
4792 optimize_ops_list (rhs_code, &ops);
4793 }
4794
4795 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4796 optimize_range_tests (rhs_code, &ops);
4797
4798 if (first_pass_instance
4799 && rhs_code == MULT_EXPR
4800 && flag_unsafe_math_optimizations)
4801 powi_result = attempt_builtin_powi (stmt, &ops);
4802
4803 /* If the operand vector is now empty, all operands were
4804 consumed by the __builtin_powi optimization. */
4805 if (ops.length () == 0)
4806 transform_stmt_to_copy (&gsi, stmt, powi_result);
4807 else if (ops.length () == 1)
4808 {
4809 tree last_op = ops.last ()->op;
4810
4811 if (powi_result)
4812 transform_stmt_to_multiply (&gsi, stmt, last_op,
4813 powi_result);
4814 else
4815 transform_stmt_to_copy (&gsi, stmt, last_op);
4816 }
4817 else
4818 {
4819 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4820 int ops_num = ops.length ();
4821 int width = get_reassociation_width (ops_num, rhs_code, mode);
4822 tree new_lhs = lhs;
4823
4824 if (dump_file && (dump_flags & TDF_DETAILS))
4825 fprintf (dump_file,
4826 "Width = %d was chosen for reassociation\n", width);
4827
4828 if (width > 1
4829 && ops.length () > 3)
4830 rewrite_expr_tree_parallel (stmt, width, ops);
4831 else
4832 {
4833 /* When there are three operands left, we want
4834 to make sure the ones that get the double
4835 binary op are chosen wisely. */
4836 int len = ops.length ();
4837 if (len >= 3)
4838 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4839
4840 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4841 powi_result != NULL);
4842 }
4843
4844 /* If we combined some repeated factors into a
4845 __builtin_powi call, multiply that result by the
4846 reassociated operands. */
4847 if (powi_result)
4848 {
4849 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4850 tree type = TREE_TYPE (lhs);
4851 tree target_ssa = make_temp_ssa_name (type, NULL,
4852 "reassocpow");
4853 gimple_set_lhs (lhs_stmt, target_ssa);
4854 update_stmt (lhs_stmt);
4855 if (lhs != new_lhs)
4856 target_ssa = new_lhs;
4857 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4858 powi_result,
4859 target_ssa);
4860 gimple_set_location (mul_stmt, gimple_location (stmt));
4861 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4862 }
4863 }
4864 }
4865 }
4866 }
4867 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4868 son;
4869 son = next_dom_son (CDI_POST_DOMINATORS, son))
4870 reassociate_bb (son);
4871 }
4872
4873 /* Add jumps around shifts for range tests turned into bit tests.
4874 For each SSA_NAME VAR we have code like:
4875 VAR = ...; // final stmt of range comparison
4876 // bit test here...;
4877 OTHERVAR = ...; // final stmt of the bit test sequence
4878 RES = VAR | OTHERVAR;
4879 Turn the above into:
4880 VAR = ...;
4881 if (VAR != 0)
4882 goto <l3>;
4883 else
4884 goto <l2>;
4885 <l2>:
4886 // bit test here...;
4887 OTHERVAR = ...;
4888 <l3>:
4889 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4890
4891 static void
4892 branch_fixup (void)
4893 {
4894 tree var;
4895 unsigned int i;
4896
4897 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4898 {
4899 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4900 gimple use_stmt;
4901 use_operand_p use;
4902 bool ok = single_imm_use (var, &use, &use_stmt);
4903 gcc_assert (ok
4904 && is_gimple_assign (use_stmt)
4905 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4906 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4907
4908 basic_block cond_bb = gimple_bb (def_stmt);
4909 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4910 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4911
4912 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4913 gimple g = gimple_build_cond (NE_EXPR, var,
4914 build_zero_cst (TREE_TYPE (var)),
4915 NULL_TREE, NULL_TREE);
4916 location_t loc = gimple_location (use_stmt);
4917 gimple_set_location (g, loc);
4918 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4919
4920 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4921 etrue->probability = REG_BR_PROB_BASE / 2;
4922 etrue->count = cond_bb->count / 2;
4923 edge efalse = find_edge (cond_bb, then_bb);
4924 efalse->flags = EDGE_FALSE_VALUE;
4925 efalse->probability -= etrue->probability;
4926 efalse->count -= etrue->count;
4927 then_bb->count -= etrue->count;
4928
4929 tree othervar = NULL_TREE;
4930 if (gimple_assign_rhs1 (use_stmt) == var)
4931 othervar = gimple_assign_rhs2 (use_stmt);
4932 else if (gimple_assign_rhs2 (use_stmt) == var)
4933 othervar = gimple_assign_rhs1 (use_stmt);
4934 else
4935 gcc_unreachable ();
4936 tree lhs = gimple_assign_lhs (use_stmt);
4937 gimple phi = create_phi_node (lhs, merge_bb);
4938 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4939 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4940 gsi = gsi_for_stmt (use_stmt);
4941 gsi_remove (&gsi, true);
4942
4943 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4944 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4945 }
4946 reassoc_branch_fixups.release ();
4947 }
4948
4949 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4950 void debug_ops_vector (vec<operand_entry_t> ops);
4951
4952 /* Dump the operand entry vector OPS to FILE. */
4953
4954 void
4955 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4956 {
4957 operand_entry_t oe;
4958 unsigned int i;
4959
4960 FOR_EACH_VEC_ELT (ops, i, oe)
4961 {
4962 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4963 print_generic_expr (file, oe->op, 0);
4964 }
4965 }
4966
4967 /* Dump the operand entry vector OPS to STDERR. */
4968
4969 DEBUG_FUNCTION void
4970 debug_ops_vector (vec<operand_entry_t> ops)
4971 {
4972 dump_ops_vector (stderr, ops);
4973 }
4974
4975 static void
4976 do_reassoc (void)
4977 {
4978 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4979 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4980 }
4981
4982 /* Initialize the reassociation pass. */
4983
4984 static void
4985 init_reassoc (void)
4986 {
4987 int i;
4988 long rank = 2;
4989 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4990
4991 /* Find the loops, so that we can prevent moving calculations in
4992 them. */
4993 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4994
4995 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4996
4997 operand_entry_pool = create_alloc_pool ("operand entry pool",
4998 sizeof (struct operand_entry), 30);
4999 next_operand_entry_id = 0;
5000
5001 /* Reverse RPO (Reverse Post Order) will give us something where
5002 deeper loops come later. */
5003 pre_and_rev_post_order_compute (NULL, bbs, false);
5004 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5005 operand_rank = new hash_map<tree, long>;
5006
5007 /* Give each default definition a distinct rank. This includes
5008 parameters and the static chain. Walk backwards over all
5009 SSA names so that we get proper rank ordering according
5010 to tree_swap_operands_p. */
5011 for (i = num_ssa_names - 1; i > 0; --i)
5012 {
5013 tree name = ssa_name (i);
5014 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5015 insert_operand_rank (name, ++rank);
5016 }
5017
5018 /* Set up rank for each BB */
5019 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5020 bb_rank[bbs[i]] = ++rank << 16;
5021
5022 free (bbs);
5023 calculate_dominance_info (CDI_POST_DOMINATORS);
5024 plus_negates = vNULL;
5025 }
5026
5027 /* Cleanup after the reassociation pass, and print stats if
5028 requested. */
5029
5030 static void
5031 fini_reassoc (void)
5032 {
5033 statistics_counter_event (cfun, "Linearized",
5034 reassociate_stats.linearized);
5035 statistics_counter_event (cfun, "Constants eliminated",
5036 reassociate_stats.constants_eliminated);
5037 statistics_counter_event (cfun, "Ops eliminated",
5038 reassociate_stats.ops_eliminated);
5039 statistics_counter_event (cfun, "Statements rewritten",
5040 reassociate_stats.rewritten);
5041 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5042 reassociate_stats.pows_encountered);
5043 statistics_counter_event (cfun, "Built-in powi calls created",
5044 reassociate_stats.pows_created);
5045
5046 delete operand_rank;
5047 free_alloc_pool (operand_entry_pool);
5048 free (bb_rank);
5049 plus_negates.release ();
5050 free_dominance_info (CDI_POST_DOMINATORS);
5051 loop_optimizer_finalize ();
5052 }
5053
5054 /* Gate and execute functions for Reassociation. */
5055
5056 static unsigned int
5057 execute_reassoc (void)
5058 {
5059 init_reassoc ();
5060
5061 do_reassoc ();
5062 repropagate_negates ();
5063 branch_fixup ();
5064
5065 fini_reassoc ();
5066 return 0;
5067 }
5068
5069 namespace {
5070
5071 const pass_data pass_data_reassoc =
5072 {
5073 GIMPLE_PASS, /* type */
5074 "reassoc", /* name */
5075 OPTGROUP_NONE, /* optinfo_flags */
5076 TV_TREE_REASSOC, /* tv_id */
5077 ( PROP_cfg | PROP_ssa ), /* properties_required */
5078 0, /* properties_provided */
5079 0, /* properties_destroyed */
5080 0, /* todo_flags_start */
5081 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5082 };
5083
5084 class pass_reassoc : public gimple_opt_pass
5085 {
5086 public:
5087 pass_reassoc (gcc::context *ctxt)
5088 : gimple_opt_pass (pass_data_reassoc, ctxt)
5089 {}
5090
5091 /* opt_pass methods: */
5092 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5093 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5094 virtual unsigned int execute (function *) { return execute_reassoc (); }
5095
5096 }; // class pass_reassoc
5097
5098 } // anon namespace
5099
5100 gimple_opt_pass *
5101 make_pass_reassoc (gcc::context *ctxt)
5102 {
5103 return new pass_reassoc (ctxt);
5104 }