gengtype-parse.c (require_template_declaration): Allow '+' in template parameters.
[gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tm_p.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "tree.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "predict.h"
33 #include "hard-reg-set.h"
34 #include "function.h"
35 #include "dominance.h"
36 #include "cfg.h"
37 #include "cfganal.h"
38 #include "basic-block.h"
39 #include "gimple-pretty-print.h"
40 #include "tree-inline.h"
41 #include "tree-ssa-alias.h"
42 #include "internal-fn.h"
43 #include "gimple-fold.h"
44 #include "tree-eh.h"
45 #include "gimple-expr.h"
46 #include "gimple.h"
47 #include "gimple-iterator.h"
48 #include "gimplify-me.h"
49 #include "gimple-ssa.h"
50 #include "tree-cfg.h"
51 #include "tree-phinodes.h"
52 #include "ssa-iterators.h"
53 #include "stringpool.h"
54 #include "tree-ssanames.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "tree-ssa-loop.h"
57 #include "flags.h"
58 #include "insn-config.h"
59 #include "expmed.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "calls.h"
63 #include "emit-rtl.h"
64 #include "varasm.h"
65 #include "stmt.h"
66 #include "expr.h"
67 #include "tree-dfa.h"
68 #include "tree-ssa.h"
69 #include "tree-iterator.h"
70 #include "tree-pass.h"
71 #include "alloc-pool.h"
72 #include "langhooks.h"
73 #include "cfgloop.h"
74 #include "target.h"
75 #include "params.h"
76 #include "diagnostic-core.h"
77 #include "builtins.h"
78 #include "gimplify.h"
79 #include "insn-codes.h"
80 #include "optabs.h"
81
82 /* This is a simple global reassociation pass. It is, in part, based
83 on the LLVM pass of the same name (They do some things more/less
84 than we do, in different orders, etc).
85
86 It consists of five steps:
87
88 1. Breaking up subtract operations into addition + negate, where
89 it would promote the reassociation of adds.
90
91 2. Left linearization of the expression trees, so that (A+B)+(C+D)
92 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
93 During linearization, we place the operands of the binary
94 expressions into a vector of operand_entry_t
95
96 3. Optimization of the operand lists, eliminating things like a +
97 -a, a & a, etc.
98
99 3a. Combine repeated factors with the same occurrence counts
100 into a __builtin_powi call that will later be optimized into
101 an optimal number of multiplies.
102
103 4. Rewrite the expression trees we linearized and optimized so
104 they are in proper rank order.
105
106 5. Repropagate negates, as nothing else will clean it up ATM.
107
108 A bit of theory on #4, since nobody seems to write anything down
109 about why it makes sense to do it the way they do it:
110
111 We could do this much nicer theoretically, but don't (for reasons
112 explained after how to do it theoretically nice :P).
113
114 In order to promote the most redundancy elimination, you want
115 binary expressions whose operands are the same rank (or
116 preferably, the same value) exposed to the redundancy eliminator,
117 for possible elimination.
118
119 So the way to do this if we really cared, is to build the new op
120 tree from the leaves to the roots, merging as you go, and putting the
121 new op on the end of the worklist, until you are left with one
122 thing on the worklist.
123
124 IE if you have to rewrite the following set of operands (listed with
125 rank in parentheses), with opcode PLUS_EXPR:
126
127 a (1), b (1), c (1), d (2), e (2)
128
129
130 We start with our merge worklist empty, and the ops list with all of
131 those on it.
132
133 You want to first merge all leaves of the same rank, as much as
134 possible.
135
136 So first build a binary op of
137
138 mergetmp = a + b, and put "mergetmp" on the merge worklist.
139
140 Because there is no three operand form of PLUS_EXPR, c is not going to
141 be exposed to redundancy elimination as a rank 1 operand.
142
143 So you might as well throw it on the merge worklist (you could also
144 consider it to now be a rank two operand, and merge it with d and e,
145 but in this case, you then have evicted e from a binary op. So at
146 least in this situation, you can't win.)
147
148 Then build a binary op of d + e
149 mergetmp2 = d + e
150
151 and put mergetmp2 on the merge worklist.
152
153 so merge worklist = {mergetmp, c, mergetmp2}
154
155 Continue building binary ops of these operations until you have only
156 one operation left on the worklist.
157
158 So we have
159
160 build binary op
161 mergetmp3 = mergetmp + c
162
163 worklist = {mergetmp2, mergetmp3}
164
165 mergetmp4 = mergetmp2 + mergetmp3
166
167 worklist = {mergetmp4}
168
169 because we have one operation left, we can now just set the original
170 statement equal to the result of that operation.
171
172 This will at least expose a + b and d + e to redundancy elimination
173 as binary operations.
174
175 For extra points, you can reuse the old statements to build the
176 mergetmps, since you shouldn't run out.
177
178 So why don't we do this?
179
180 Because it's expensive, and rarely will help. Most trees we are
181 reassociating have 3 or less ops. If they have 2 ops, they already
182 will be written into a nice single binary op. If you have 3 ops, a
183 single simple check suffices to tell you whether the first two are of the
184 same rank. If so, you know to order it
185
186 mergetmp = op1 + op2
187 newstmt = mergetmp + op3
188
189 instead of
190 mergetmp = op2 + op3
191 newstmt = mergetmp + op1
192
193 If all three are of the same rank, you can't expose them all in a
194 single binary operator anyway, so the above is *still* the best you
195 can do.
196
197 Thus, this is what we do. When we have three ops left, we check to see
198 what order to put them in, and call it a day. As a nod to vector sum
199 reduction, we check if any of the ops are really a phi node that is a
200 destructive update for the associating op, and keep the destructive
201 update together for vector sum reduction recognition. */
202
203
204 /* Statistics */
205 static struct
206 {
207 int linearized;
208 int constants_eliminated;
209 int ops_eliminated;
210 int rewritten;
211 int pows_encountered;
212 int pows_created;
213 } reassociate_stats;
214
215 /* Operator, rank pair. */
216 typedef struct operand_entry
217 {
218 unsigned int rank;
219 int id;
220 tree op;
221 unsigned int count;
222 } *operand_entry_t;
223
224 static pool_allocator<operand_entry> operand_entry_pool ("operand entry pool",
225 30);
226
227 /* This is used to assign a unique ID to each struct operand_entry
228 so that qsort results are identical on different hosts. */
229 static int next_operand_entry_id;
230
231 /* Starting rank number for a given basic block, so that we can rank
232 operations using unmovable instructions in that BB based on the bb
233 depth. */
234 static long *bb_rank;
235
236 /* Operand->rank hashtable. */
237 static hash_map<tree, long> *operand_rank;
238
239 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
240 all basic blocks the CFG should be adjusted - basic blocks
241 split right after that SSA_NAME's definition statement and before
242 the only use, which must be a bit ior. */
243 static vec<tree> reassoc_branch_fixups;
244
245 /* Forward decls. */
246 static long get_rank (tree);
247 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
248
249 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
250 possibly added by gsi_remove. */
251
252 bool
253 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
254 {
255 gimple stmt = gsi_stmt (*gsi);
256
257 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
258 return gsi_remove (gsi, true);
259
260 gimple_stmt_iterator prev = *gsi;
261 gsi_prev (&prev);
262 unsigned uid = gimple_uid (stmt);
263 basic_block bb = gimple_bb (stmt);
264 bool ret = gsi_remove (gsi, true);
265 if (!gsi_end_p (prev))
266 gsi_next (&prev);
267 else
268 prev = gsi_start_bb (bb);
269 gimple end_stmt = gsi_stmt (*gsi);
270 while ((stmt = gsi_stmt (prev)) != end_stmt)
271 {
272 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
273 gimple_set_uid (stmt, uid);
274 gsi_next (&prev);
275 }
276 return ret;
277 }
278
279 /* Bias amount for loop-carried phis. We want this to be larger than
280 the depth of any reassociation tree we can see, but not larger than
281 the rank difference between two blocks. */
282 #define PHI_LOOP_BIAS (1 << 15)
283
284 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
285 an innermost loop, and the phi has only a single use which is inside
286 the loop, then the rank is the block rank of the loop latch plus an
287 extra bias for the loop-carried dependence. This causes expressions
288 calculated into an accumulator variable to be independent for each
289 iteration of the loop. If STMT is some other phi, the rank is the
290 block rank of its containing block. */
291 static long
292 phi_rank (gimple stmt)
293 {
294 basic_block bb = gimple_bb (stmt);
295 struct loop *father = bb->loop_father;
296 tree res;
297 unsigned i;
298 use_operand_p use;
299 gimple use_stmt;
300
301 /* We only care about real loops (those with a latch). */
302 if (!father->latch)
303 return bb_rank[bb->index];
304
305 /* Interesting phis must be in headers of innermost loops. */
306 if (bb != father->header
307 || father->inner)
308 return bb_rank[bb->index];
309
310 /* Ignore virtual SSA_NAMEs. */
311 res = gimple_phi_result (stmt);
312 if (virtual_operand_p (res))
313 return bb_rank[bb->index];
314
315 /* The phi definition must have a single use, and that use must be
316 within the loop. Otherwise this isn't an accumulator pattern. */
317 if (!single_imm_use (res, &use, &use_stmt)
318 || gimple_bb (use_stmt)->loop_father != father)
319 return bb_rank[bb->index];
320
321 /* Look for phi arguments from within the loop. If found, bias this phi. */
322 for (i = 0; i < gimple_phi_num_args (stmt); i++)
323 {
324 tree arg = gimple_phi_arg_def (stmt, i);
325 if (TREE_CODE (arg) == SSA_NAME
326 && !SSA_NAME_IS_DEFAULT_DEF (arg))
327 {
328 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
329 if (gimple_bb (def_stmt)->loop_father == father)
330 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
331 }
332 }
333
334 /* Must be an uninteresting phi. */
335 return bb_rank[bb->index];
336 }
337
338 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
339 loop-carried dependence of an innermost loop, return TRUE; else
340 return FALSE. */
341 static bool
342 loop_carried_phi (tree exp)
343 {
344 gimple phi_stmt;
345 long block_rank;
346
347 if (TREE_CODE (exp) != SSA_NAME
348 || SSA_NAME_IS_DEFAULT_DEF (exp))
349 return false;
350
351 phi_stmt = SSA_NAME_DEF_STMT (exp);
352
353 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
354 return false;
355
356 /* Non-loop-carried phis have block rank. Loop-carried phis have
357 an additional bias added in. If this phi doesn't have block rank,
358 it's biased and should not be propagated. */
359 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
360
361 if (phi_rank (phi_stmt) != block_rank)
362 return true;
363
364 return false;
365 }
366
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
371 static long
372 propagate_rank (long rank, tree op)
373 {
374 long op_rank;
375
376 if (loop_carried_phi (op))
377 return rank;
378
379 op_rank = get_rank (op);
380
381 return MAX (rank, op_rank);
382 }
383
384 /* Look up the operand rank structure for expression E. */
385
386 static inline long
387 find_operand_rank (tree e)
388 {
389 long *slot = operand_rank->get (e);
390 return slot ? *slot : -1;
391 }
392
393 /* Insert {E,RANK} into the operand rank hashtable. */
394
395 static inline void
396 insert_operand_rank (tree e, long rank)
397 {
398 gcc_assert (rank > 0);
399 gcc_assert (!operand_rank->put (e, rank));
400 }
401
402 /* Given an expression E, return the rank of the expression. */
403
404 static long
405 get_rank (tree e)
406 {
407 /* SSA_NAME's have the rank of the expression they are the result
408 of.
409 For globals and uninitialized values, the rank is 0.
410 For function arguments, use the pre-setup rank.
411 For PHI nodes, stores, asm statements, etc, we use the rank of
412 the BB.
413 For simple operations, the rank is the maximum rank of any of
414 its operands, or the bb_rank, whichever is less.
415 I make no claims that this is optimal, however, it gives good
416 results. */
417
418 /* We make an exception to the normal ranking system to break
419 dependences of accumulator variables in loops. Suppose we
420 have a simple one-block loop containing:
421
422 x_1 = phi(x_0, x_2)
423 b = a + x_1
424 c = b + d
425 x_2 = c + e
426
427 As shown, each iteration of the calculation into x is fully
428 dependent upon the iteration before it. We would prefer to
429 see this in the form:
430
431 x_1 = phi(x_0, x_2)
432 b = a + d
433 c = b + e
434 x_2 = c + x_1
435
436 If the loop is unrolled, the calculations of b and c from
437 different iterations can be interleaved.
438
439 To obtain this result during reassociation, we bias the rank
440 of the phi definition x_1 upward, when it is recognized as an
441 accumulator pattern. The artificial rank causes it to be
442 added last, providing the desired independence. */
443
444 if (TREE_CODE (e) == SSA_NAME)
445 {
446 ssa_op_iter iter;
447 gimple stmt;
448 long rank;
449 tree op;
450
451 if (SSA_NAME_IS_DEFAULT_DEF (e))
452 return find_operand_rank (e);
453
454 stmt = SSA_NAME_DEF_STMT (e);
455 if (gimple_code (stmt) == GIMPLE_PHI)
456 return phi_rank (stmt);
457
458 if (!is_gimple_assign (stmt))
459 return bb_rank[gimple_bb (stmt)->index];
460
461 /* If we already have a rank for this expression, use that. */
462 rank = find_operand_rank (e);
463 if (rank != -1)
464 return rank;
465
466 /* Otherwise, find the maximum rank for the operands. As an
467 exception, remove the bias from loop-carried phis when propagating
468 the rank so that dependent operations are not also biased. */
469 /* Simply walk over all SSA uses - this takes advatage of the
470 fact that non-SSA operands are is_gimple_min_invariant and
471 thus have rank 0. */
472 rank = 0;
473 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
474 rank = propagate_rank (rank, op);
475
476 if (dump_file && (dump_flags & TDF_DETAILS))
477 {
478 fprintf (dump_file, "Rank for ");
479 print_generic_expr (dump_file, e, 0);
480 fprintf (dump_file, " is %ld\n", (rank + 1));
481 }
482
483 /* Note the rank in the hashtable so we don't recompute it. */
484 insert_operand_rank (e, (rank + 1));
485 return (rank + 1);
486 }
487
488 /* Constants, globals, etc., are rank 0 */
489 return 0;
490 }
491
492
493 /* We want integer ones to end up last no matter what, since they are
494 the ones we can do the most with. */
495 #define INTEGER_CONST_TYPE 1 << 3
496 #define FLOAT_CONST_TYPE 1 << 2
497 #define OTHER_CONST_TYPE 1 << 1
498
499 /* Classify an invariant tree into integer, float, or other, so that
500 we can sort them to be near other constants of the same type. */
501 static inline int
502 constant_type (tree t)
503 {
504 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
505 return INTEGER_CONST_TYPE;
506 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
507 return FLOAT_CONST_TYPE;
508 else
509 return OTHER_CONST_TYPE;
510 }
511
512 /* qsort comparison function to sort operand entries PA and PB by rank
513 so that the sorted array is ordered by rank in decreasing order. */
514 static int
515 sort_by_operand_rank (const void *pa, const void *pb)
516 {
517 const operand_entry_t oea = *(const operand_entry_t *)pa;
518 const operand_entry_t oeb = *(const operand_entry_t *)pb;
519
520 /* It's nicer for optimize_expression if constants that are likely
521 to fold when added/multiplied//whatever are put next to each
522 other. Since all constants have rank 0, order them by type. */
523 if (oeb->rank == 0 && oea->rank == 0)
524 {
525 if (constant_type (oeb->op) != constant_type (oea->op))
526 return constant_type (oeb->op) - constant_type (oea->op);
527 else
528 /* To make sorting result stable, we use unique IDs to determine
529 order. */
530 return oeb->id - oea->id;
531 }
532
533 /* Lastly, make sure the versions that are the same go next to each
534 other. */
535 if ((oeb->rank - oea->rank == 0)
536 && TREE_CODE (oea->op) == SSA_NAME
537 && TREE_CODE (oeb->op) == SSA_NAME)
538 {
539 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
540 versions of removed SSA_NAMEs, so if possible, prefer to sort
541 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
542 See PR60418. */
543 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
544 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
545 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
546 {
547 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
548 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
549 basic_block bba = gimple_bb (stmta);
550 basic_block bbb = gimple_bb (stmtb);
551 if (bbb != bba)
552 {
553 if (bb_rank[bbb->index] != bb_rank[bba->index])
554 return bb_rank[bbb->index] - bb_rank[bba->index];
555 }
556 else
557 {
558 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
559 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
560 if (da != db)
561 return da ? 1 : -1;
562 }
563 }
564
565 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
566 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
567 else
568 return oeb->id - oea->id;
569 }
570
571 if (oeb->rank != oea->rank)
572 return oeb->rank - oea->rank;
573 else
574 return oeb->id - oea->id;
575 }
576
577 /* Add an operand entry to *OPS for the tree operand OP. */
578
579 static void
580 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
581 {
582 operand_entry_t oe = operand_entry_pool.allocate ();
583
584 oe->op = op;
585 oe->rank = get_rank (op);
586 oe->id = next_operand_entry_id++;
587 oe->count = 1;
588 ops->safe_push (oe);
589 }
590
591 /* Add an operand entry to *OPS for the tree operand OP with repeat
592 count REPEAT. */
593
594 static void
595 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
596 HOST_WIDE_INT repeat)
597 {
598 operand_entry_t oe = operand_entry_pool.allocate ();
599
600 oe->op = op;
601 oe->rank = get_rank (op);
602 oe->id = next_operand_entry_id++;
603 oe->count = repeat;
604 ops->safe_push (oe);
605
606 reassociate_stats.pows_encountered++;
607 }
608
609 /* Return true if STMT is reassociable operation containing a binary
610 operation with tree code CODE, and is inside LOOP. */
611
612 static bool
613 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
614 {
615 basic_block bb = gimple_bb (stmt);
616
617 if (gimple_bb (stmt) == NULL)
618 return false;
619
620 if (!flow_bb_inside_loop_p (loop, bb))
621 return false;
622
623 if (is_gimple_assign (stmt)
624 && gimple_assign_rhs_code (stmt) == code
625 && has_single_use (gimple_assign_lhs (stmt)))
626 return true;
627
628 return false;
629 }
630
631
632 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
633 operand of the negate operation. Otherwise, return NULL. */
634
635 static tree
636 get_unary_op (tree name, enum tree_code opcode)
637 {
638 gimple stmt = SSA_NAME_DEF_STMT (name);
639
640 if (!is_gimple_assign (stmt))
641 return NULL_TREE;
642
643 if (gimple_assign_rhs_code (stmt) == opcode)
644 return gimple_assign_rhs1 (stmt);
645 return NULL_TREE;
646 }
647
648 /* If CURR and LAST are a pair of ops that OPCODE allows us to
649 eliminate through equivalences, do so, remove them from OPS, and
650 return true. Otherwise, return false. */
651
652 static bool
653 eliminate_duplicate_pair (enum tree_code opcode,
654 vec<operand_entry_t> *ops,
655 bool *all_done,
656 unsigned int i,
657 operand_entry_t curr,
658 operand_entry_t last)
659 {
660
661 /* If we have two of the same op, and the opcode is & |, min, or max,
662 we can eliminate one of them.
663 If we have two of the same op, and the opcode is ^, we can
664 eliminate both of them. */
665
666 if (last && last->op == curr->op)
667 {
668 switch (opcode)
669 {
670 case MAX_EXPR:
671 case MIN_EXPR:
672 case BIT_IOR_EXPR:
673 case BIT_AND_EXPR:
674 if (dump_file && (dump_flags & TDF_DETAILS))
675 {
676 fprintf (dump_file, "Equivalence: ");
677 print_generic_expr (dump_file, curr->op, 0);
678 fprintf (dump_file, " [&|minmax] ");
679 print_generic_expr (dump_file, last->op, 0);
680 fprintf (dump_file, " -> ");
681 print_generic_stmt (dump_file, last->op, 0);
682 }
683
684 ops->ordered_remove (i);
685 reassociate_stats.ops_eliminated ++;
686
687 return true;
688
689 case BIT_XOR_EXPR:
690 if (dump_file && (dump_flags & TDF_DETAILS))
691 {
692 fprintf (dump_file, "Equivalence: ");
693 print_generic_expr (dump_file, curr->op, 0);
694 fprintf (dump_file, " ^ ");
695 print_generic_expr (dump_file, last->op, 0);
696 fprintf (dump_file, " -> nothing\n");
697 }
698
699 reassociate_stats.ops_eliminated += 2;
700
701 if (ops->length () == 2)
702 {
703 ops->create (0);
704 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
705 *all_done = true;
706 }
707 else
708 {
709 ops->ordered_remove (i-1);
710 ops->ordered_remove (i-1);
711 }
712
713 return true;
714
715 default:
716 break;
717 }
718 }
719 return false;
720 }
721
722 static vec<tree> plus_negates;
723
724 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
725 expression, look in OPS for a corresponding positive operation to cancel
726 it out. If we find one, remove the other from OPS, replace
727 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
728 return false. */
729
730 static bool
731 eliminate_plus_minus_pair (enum tree_code opcode,
732 vec<operand_entry_t> *ops,
733 unsigned int currindex,
734 operand_entry_t curr)
735 {
736 tree negateop;
737 tree notop;
738 unsigned int i;
739 operand_entry_t oe;
740
741 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
742 return false;
743
744 negateop = get_unary_op (curr->op, NEGATE_EXPR);
745 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
746 if (negateop == NULL_TREE && notop == NULL_TREE)
747 return false;
748
749 /* Any non-negated version will have a rank that is one less than
750 the current rank. So once we hit those ranks, if we don't find
751 one, we can stop. */
752
753 for (i = currindex + 1;
754 ops->iterate (i, &oe)
755 && oe->rank >= curr->rank - 1 ;
756 i++)
757 {
758 if (oe->op == negateop)
759 {
760
761 if (dump_file && (dump_flags & TDF_DETAILS))
762 {
763 fprintf (dump_file, "Equivalence: ");
764 print_generic_expr (dump_file, negateop, 0);
765 fprintf (dump_file, " + -");
766 print_generic_expr (dump_file, oe->op, 0);
767 fprintf (dump_file, " -> 0\n");
768 }
769
770 ops->ordered_remove (i);
771 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
772 ops->ordered_remove (currindex);
773 reassociate_stats.ops_eliminated ++;
774
775 return true;
776 }
777 else if (oe->op == notop)
778 {
779 tree op_type = TREE_TYPE (oe->op);
780
781 if (dump_file && (dump_flags & TDF_DETAILS))
782 {
783 fprintf (dump_file, "Equivalence: ");
784 print_generic_expr (dump_file, notop, 0);
785 fprintf (dump_file, " + ~");
786 print_generic_expr (dump_file, oe->op, 0);
787 fprintf (dump_file, " -> -1\n");
788 }
789
790 ops->ordered_remove (i);
791 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
792 ops->ordered_remove (currindex);
793 reassociate_stats.ops_eliminated ++;
794
795 return true;
796 }
797 }
798
799 /* CURR->OP is a negate expr in a plus expr: save it for later
800 inspection in repropagate_negates(). */
801 if (negateop != NULL_TREE)
802 plus_negates.safe_push (curr->op);
803
804 return false;
805 }
806
807 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
808 bitwise not expression, look in OPS for a corresponding operand to
809 cancel it out. If we find one, remove the other from OPS, replace
810 OPS[CURRINDEX] with 0, and return true. Otherwise, return
811 false. */
812
813 static bool
814 eliminate_not_pairs (enum tree_code opcode,
815 vec<operand_entry_t> *ops,
816 unsigned int currindex,
817 operand_entry_t curr)
818 {
819 tree notop;
820 unsigned int i;
821 operand_entry_t oe;
822
823 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
824 || TREE_CODE (curr->op) != SSA_NAME)
825 return false;
826
827 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
828 if (notop == NULL_TREE)
829 return false;
830
831 /* Any non-not version will have a rank that is one less than
832 the current rank. So once we hit those ranks, if we don't find
833 one, we can stop. */
834
835 for (i = currindex + 1;
836 ops->iterate (i, &oe)
837 && oe->rank >= curr->rank - 1;
838 i++)
839 {
840 if (oe->op == notop)
841 {
842 if (dump_file && (dump_flags & TDF_DETAILS))
843 {
844 fprintf (dump_file, "Equivalence: ");
845 print_generic_expr (dump_file, notop, 0);
846 if (opcode == BIT_AND_EXPR)
847 fprintf (dump_file, " & ~");
848 else if (opcode == BIT_IOR_EXPR)
849 fprintf (dump_file, " | ~");
850 print_generic_expr (dump_file, oe->op, 0);
851 if (opcode == BIT_AND_EXPR)
852 fprintf (dump_file, " -> 0\n");
853 else if (opcode == BIT_IOR_EXPR)
854 fprintf (dump_file, " -> -1\n");
855 }
856
857 if (opcode == BIT_AND_EXPR)
858 oe->op = build_zero_cst (TREE_TYPE (oe->op));
859 else if (opcode == BIT_IOR_EXPR)
860 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
861
862 reassociate_stats.ops_eliminated += ops->length () - 1;
863 ops->truncate (0);
864 ops->quick_push (oe);
865 return true;
866 }
867 }
868
869 return false;
870 }
871
872 /* Use constant value that may be present in OPS to try to eliminate
873 operands. Note that this function is only really used when we've
874 eliminated ops for other reasons, or merged constants. Across
875 single statements, fold already does all of this, plus more. There
876 is little point in duplicating logic, so I've only included the
877 identities that I could ever construct testcases to trigger. */
878
879 static void
880 eliminate_using_constants (enum tree_code opcode,
881 vec<operand_entry_t> *ops)
882 {
883 operand_entry_t oelast = ops->last ();
884 tree type = TREE_TYPE (oelast->op);
885
886 if (oelast->rank == 0
887 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
888 {
889 switch (opcode)
890 {
891 case BIT_AND_EXPR:
892 if (integer_zerop (oelast->op))
893 {
894 if (ops->length () != 1)
895 {
896 if (dump_file && (dump_flags & TDF_DETAILS))
897 fprintf (dump_file, "Found & 0, removing all other ops\n");
898
899 reassociate_stats.ops_eliminated += ops->length () - 1;
900
901 ops->truncate (0);
902 ops->quick_push (oelast);
903 return;
904 }
905 }
906 else if (integer_all_onesp (oelast->op))
907 {
908 if (ops->length () != 1)
909 {
910 if (dump_file && (dump_flags & TDF_DETAILS))
911 fprintf (dump_file, "Found & -1, removing\n");
912 ops->pop ();
913 reassociate_stats.ops_eliminated++;
914 }
915 }
916 break;
917 case BIT_IOR_EXPR:
918 if (integer_all_onesp (oelast->op))
919 {
920 if (ops->length () != 1)
921 {
922 if (dump_file && (dump_flags & TDF_DETAILS))
923 fprintf (dump_file, "Found | -1, removing all other ops\n");
924
925 reassociate_stats.ops_eliminated += ops->length () - 1;
926
927 ops->truncate (0);
928 ops->quick_push (oelast);
929 return;
930 }
931 }
932 else if (integer_zerop (oelast->op))
933 {
934 if (ops->length () != 1)
935 {
936 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, "Found | 0, removing\n");
938 ops->pop ();
939 reassociate_stats.ops_eliminated++;
940 }
941 }
942 break;
943 case MULT_EXPR:
944 if (integer_zerop (oelast->op)
945 || (FLOAT_TYPE_P (type)
946 && !HONOR_NANS (type)
947 && !HONOR_SIGNED_ZEROS (type)
948 && real_zerop (oelast->op)))
949 {
950 if (ops->length () != 1)
951 {
952 if (dump_file && (dump_flags & TDF_DETAILS))
953 fprintf (dump_file, "Found * 0, removing all other ops\n");
954
955 reassociate_stats.ops_eliminated += ops->length () - 1;
956 ops->truncate (1);
957 ops->quick_push (oelast);
958 return;
959 }
960 }
961 else if (integer_onep (oelast->op)
962 || (FLOAT_TYPE_P (type)
963 && !HONOR_SNANS (type)
964 && real_onep (oelast->op)))
965 {
966 if (ops->length () != 1)
967 {
968 if (dump_file && (dump_flags & TDF_DETAILS))
969 fprintf (dump_file, "Found * 1, removing\n");
970 ops->pop ();
971 reassociate_stats.ops_eliminated++;
972 return;
973 }
974 }
975 break;
976 case BIT_XOR_EXPR:
977 case PLUS_EXPR:
978 case MINUS_EXPR:
979 if (integer_zerop (oelast->op)
980 || (FLOAT_TYPE_P (type)
981 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
982 && fold_real_zero_addition_p (type, oelast->op,
983 opcode == MINUS_EXPR)))
984 {
985 if (ops->length () != 1)
986 {
987 if (dump_file && (dump_flags & TDF_DETAILS))
988 fprintf (dump_file, "Found [|^+] 0, removing\n");
989 ops->pop ();
990 reassociate_stats.ops_eliminated++;
991 return;
992 }
993 }
994 break;
995 default:
996 break;
997 }
998 }
999 }
1000
1001
1002 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1003 bool, bool);
1004
1005 /* Structure for tracking and counting operands. */
1006 typedef struct oecount_s {
1007 int cnt;
1008 int id;
1009 enum tree_code oecode;
1010 tree op;
1011 } oecount;
1012
1013
1014 /* The heap for the oecount hashtable and the sorted list of operands. */
1015 static vec<oecount> cvec;
1016
1017
1018 /* Oecount hashtable helpers. */
1019
1020 struct oecount_hasher : int_hash <int, 0, 1>
1021 {
1022 static inline hashval_t hash (int);
1023 static inline bool equal (int, int);
1024 };
1025
1026 /* Hash function for oecount. */
1027
1028 inline hashval_t
1029 oecount_hasher::hash (int p)
1030 {
1031 const oecount *c = &cvec[p - 42];
1032 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1033 }
1034
1035 /* Comparison function for oecount. */
1036
1037 inline bool
1038 oecount_hasher::equal (int p1, int p2)
1039 {
1040 const oecount *c1 = &cvec[p1 - 42];
1041 const oecount *c2 = &cvec[p2 - 42];
1042 return (c1->oecode == c2->oecode
1043 && c1->op == c2->op);
1044 }
1045
1046 /* Comparison function for qsort sorting oecount elements by count. */
1047
1048 static int
1049 oecount_cmp (const void *p1, const void *p2)
1050 {
1051 const oecount *c1 = (const oecount *)p1;
1052 const oecount *c2 = (const oecount *)p2;
1053 if (c1->cnt != c2->cnt)
1054 return c1->cnt - c2->cnt;
1055 else
1056 /* If counts are identical, use unique IDs to stabilize qsort. */
1057 return c1->id - c2->id;
1058 }
1059
1060 /* Return TRUE iff STMT represents a builtin call that raises OP
1061 to some exponent. */
1062
1063 static bool
1064 stmt_is_power_of_op (gimple stmt, tree op)
1065 {
1066 tree fndecl;
1067
1068 if (!is_gimple_call (stmt))
1069 return false;
1070
1071 fndecl = gimple_call_fndecl (stmt);
1072
1073 if (!fndecl
1074 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1075 return false;
1076
1077 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1078 {
1079 CASE_FLT_FN (BUILT_IN_POW):
1080 CASE_FLT_FN (BUILT_IN_POWI):
1081 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1082
1083 default:
1084 return false;
1085 }
1086 }
1087
1088 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1089 in place and return the result. Assumes that stmt_is_power_of_op
1090 was previously called for STMT and returned TRUE. */
1091
1092 static HOST_WIDE_INT
1093 decrement_power (gimple stmt)
1094 {
1095 REAL_VALUE_TYPE c, cint;
1096 HOST_WIDE_INT power;
1097 tree arg1;
1098
1099 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1100 {
1101 CASE_FLT_FN (BUILT_IN_POW):
1102 arg1 = gimple_call_arg (stmt, 1);
1103 c = TREE_REAL_CST (arg1);
1104 power = real_to_integer (&c) - 1;
1105 real_from_integer (&cint, VOIDmode, power, SIGNED);
1106 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1107 return power;
1108
1109 CASE_FLT_FN (BUILT_IN_POWI):
1110 arg1 = gimple_call_arg (stmt, 1);
1111 power = TREE_INT_CST_LOW (arg1) - 1;
1112 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1113 return power;
1114
1115 default:
1116 gcc_unreachable ();
1117 }
1118 }
1119
1120 /* Find the single immediate use of STMT's LHS, and replace it
1121 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1122 replace *DEF with OP as well. */
1123
1124 static void
1125 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1126 {
1127 tree lhs;
1128 gimple use_stmt;
1129 use_operand_p use;
1130 gimple_stmt_iterator gsi;
1131
1132 if (is_gimple_call (stmt))
1133 lhs = gimple_call_lhs (stmt);
1134 else
1135 lhs = gimple_assign_lhs (stmt);
1136
1137 gcc_assert (has_single_use (lhs));
1138 single_imm_use (lhs, &use, &use_stmt);
1139 if (lhs == *def)
1140 *def = op;
1141 SET_USE (use, op);
1142 if (TREE_CODE (op) != SSA_NAME)
1143 update_stmt (use_stmt);
1144 gsi = gsi_for_stmt (stmt);
1145 unlink_stmt_vdef (stmt);
1146 reassoc_remove_stmt (&gsi);
1147 release_defs (stmt);
1148 }
1149
1150 /* Walks the linear chain with result *DEF searching for an operation
1151 with operand OP and code OPCODE removing that from the chain. *DEF
1152 is updated if there is only one operand but no operation left. */
1153
1154 static void
1155 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1156 {
1157 gimple stmt = SSA_NAME_DEF_STMT (*def);
1158
1159 do
1160 {
1161 tree name;
1162
1163 if (opcode == MULT_EXPR
1164 && stmt_is_power_of_op (stmt, op))
1165 {
1166 if (decrement_power (stmt) == 1)
1167 propagate_op_to_single_use (op, stmt, def);
1168 return;
1169 }
1170
1171 name = gimple_assign_rhs1 (stmt);
1172
1173 /* If this is the operation we look for and one of the operands
1174 is ours simply propagate the other operand into the stmts
1175 single use. */
1176 if (gimple_assign_rhs_code (stmt) == opcode
1177 && (name == op
1178 || gimple_assign_rhs2 (stmt) == op))
1179 {
1180 if (name == op)
1181 name = gimple_assign_rhs2 (stmt);
1182 propagate_op_to_single_use (name, stmt, def);
1183 return;
1184 }
1185
1186 /* We might have a multiply of two __builtin_pow* calls, and
1187 the operand might be hiding in the rightmost one. */
1188 if (opcode == MULT_EXPR
1189 && gimple_assign_rhs_code (stmt) == opcode
1190 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1191 && has_single_use (gimple_assign_rhs2 (stmt)))
1192 {
1193 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1194 if (stmt_is_power_of_op (stmt2, op))
1195 {
1196 if (decrement_power (stmt2) == 1)
1197 propagate_op_to_single_use (op, stmt2, def);
1198 return;
1199 }
1200 }
1201
1202 /* Continue walking the chain. */
1203 gcc_assert (name != op
1204 && TREE_CODE (name) == SSA_NAME);
1205 stmt = SSA_NAME_DEF_STMT (name);
1206 }
1207 while (1);
1208 }
1209
1210 /* Returns true if statement S1 dominates statement S2. Like
1211 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1212
1213 static bool
1214 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1215 {
1216 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1217
1218 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1219 SSA_NAME. Assume it lives at the beginning of function and
1220 thus dominates everything. */
1221 if (!bb1 || s1 == s2)
1222 return true;
1223
1224 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1225 if (!bb2)
1226 return false;
1227
1228 if (bb1 == bb2)
1229 {
1230 /* PHIs in the same basic block are assumed to be
1231 executed all in parallel, if only one stmt is a PHI,
1232 it dominates the other stmt in the same basic block. */
1233 if (gimple_code (s1) == GIMPLE_PHI)
1234 return true;
1235
1236 if (gimple_code (s2) == GIMPLE_PHI)
1237 return false;
1238
1239 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1240
1241 if (gimple_uid (s1) < gimple_uid (s2))
1242 return true;
1243
1244 if (gimple_uid (s1) > gimple_uid (s2))
1245 return false;
1246
1247 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1248 unsigned int uid = gimple_uid (s1);
1249 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1250 {
1251 gimple s = gsi_stmt (gsi);
1252 if (gimple_uid (s) != uid)
1253 break;
1254 if (s == s2)
1255 return true;
1256 }
1257
1258 return false;
1259 }
1260
1261 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1262 }
1263
1264 /* Insert STMT after INSERT_POINT. */
1265
1266 static void
1267 insert_stmt_after (gimple stmt, gimple insert_point)
1268 {
1269 gimple_stmt_iterator gsi;
1270 basic_block bb;
1271
1272 if (gimple_code (insert_point) == GIMPLE_PHI)
1273 bb = gimple_bb (insert_point);
1274 else if (!stmt_ends_bb_p (insert_point))
1275 {
1276 gsi = gsi_for_stmt (insert_point);
1277 gimple_set_uid (stmt, gimple_uid (insert_point));
1278 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1279 return;
1280 }
1281 else
1282 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1283 thus if it must end a basic block, it should be a call that can
1284 throw, or some assignment that can throw. If it throws, the LHS
1285 of it will not be initialized though, so only valid places using
1286 the SSA_NAME should be dominated by the fallthru edge. */
1287 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1288 gsi = gsi_after_labels (bb);
1289 if (gsi_end_p (gsi))
1290 {
1291 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1292 gimple_set_uid (stmt,
1293 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1294 }
1295 else
1296 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1297 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1298 }
1299
1300 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1301 the result. Places the statement after the definition of either
1302 OP1 or OP2. Returns the new statement. */
1303
1304 static gimple
1305 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1306 {
1307 gimple op1def = NULL, op2def = NULL;
1308 gimple_stmt_iterator gsi;
1309 tree op;
1310 gassign *sum;
1311
1312 /* Create the addition statement. */
1313 op = make_ssa_name (type);
1314 sum = gimple_build_assign (op, opcode, op1, op2);
1315
1316 /* Find an insertion place and insert. */
1317 if (TREE_CODE (op1) == SSA_NAME)
1318 op1def = SSA_NAME_DEF_STMT (op1);
1319 if (TREE_CODE (op2) == SSA_NAME)
1320 op2def = SSA_NAME_DEF_STMT (op2);
1321 if ((!op1def || gimple_nop_p (op1def))
1322 && (!op2def || gimple_nop_p (op2def)))
1323 {
1324 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1325 if (gsi_end_p (gsi))
1326 {
1327 gimple_stmt_iterator gsi2
1328 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1329 gimple_set_uid (sum,
1330 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1331 }
1332 else
1333 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1334 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1335 }
1336 else
1337 {
1338 gimple insert_point;
1339 if ((!op1def || gimple_nop_p (op1def))
1340 || (op2def && !gimple_nop_p (op2def)
1341 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1342 insert_point = op2def;
1343 else
1344 insert_point = op1def;
1345 insert_stmt_after (sum, insert_point);
1346 }
1347 update_stmt (sum);
1348
1349 return sum;
1350 }
1351
1352 /* Perform un-distribution of divisions and multiplications.
1353 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1354 to (A + B) / X for real X.
1355
1356 The algorithm is organized as follows.
1357
1358 - First we walk the addition chain *OPS looking for summands that
1359 are defined by a multiplication or a real division. This results
1360 in the candidates bitmap with relevant indices into *OPS.
1361
1362 - Second we build the chains of multiplications or divisions for
1363 these candidates, counting the number of occurrences of (operand, code)
1364 pairs in all of the candidates chains.
1365
1366 - Third we sort the (operand, code) pairs by number of occurrence and
1367 process them starting with the pair with the most uses.
1368
1369 * For each such pair we walk the candidates again to build a
1370 second candidate bitmap noting all multiplication/division chains
1371 that have at least one occurrence of (operand, code).
1372
1373 * We build an alternate addition chain only covering these
1374 candidates with one (operand, code) operation removed from their
1375 multiplication/division chain.
1376
1377 * The first candidate gets replaced by the alternate addition chain
1378 multiplied/divided by the operand.
1379
1380 * All candidate chains get disabled for further processing and
1381 processing of (operand, code) pairs continues.
1382
1383 The alternate addition chains built are re-processed by the main
1384 reassociation algorithm which allows optimizing a * x * y + b * y * x
1385 to (a + b ) * x * y in one invocation of the reassociation pass. */
1386
1387 static bool
1388 undistribute_ops_list (enum tree_code opcode,
1389 vec<operand_entry_t> *ops, struct loop *loop)
1390 {
1391 unsigned int length = ops->length ();
1392 operand_entry_t oe1;
1393 unsigned i, j;
1394 sbitmap candidates, candidates2;
1395 unsigned nr_candidates, nr_candidates2;
1396 sbitmap_iterator sbi0;
1397 vec<operand_entry_t> *subops;
1398 bool changed = false;
1399 int next_oecount_id = 0;
1400
1401 if (length <= 1
1402 || opcode != PLUS_EXPR)
1403 return false;
1404
1405 /* Build a list of candidates to process. */
1406 candidates = sbitmap_alloc (length);
1407 bitmap_clear (candidates);
1408 nr_candidates = 0;
1409 FOR_EACH_VEC_ELT (*ops, i, oe1)
1410 {
1411 enum tree_code dcode;
1412 gimple oe1def;
1413
1414 if (TREE_CODE (oe1->op) != SSA_NAME)
1415 continue;
1416 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1417 if (!is_gimple_assign (oe1def))
1418 continue;
1419 dcode = gimple_assign_rhs_code (oe1def);
1420 if ((dcode != MULT_EXPR
1421 && dcode != RDIV_EXPR)
1422 || !is_reassociable_op (oe1def, dcode, loop))
1423 continue;
1424
1425 bitmap_set_bit (candidates, i);
1426 nr_candidates++;
1427 }
1428
1429 if (nr_candidates < 2)
1430 {
1431 sbitmap_free (candidates);
1432 return false;
1433 }
1434
1435 if (dump_file && (dump_flags & TDF_DETAILS))
1436 {
1437 fprintf (dump_file, "searching for un-distribute opportunities ");
1438 print_generic_expr (dump_file,
1439 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1440 fprintf (dump_file, " %d\n", nr_candidates);
1441 }
1442
1443 /* Build linearized sub-operand lists and the counting table. */
1444 cvec.create (0);
1445
1446 hash_table<oecount_hasher> ctable (15);
1447
1448 /* ??? Macro arguments cannot have multi-argument template types in
1449 them. This typedef is needed to workaround that limitation. */
1450 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1451 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1452 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1453 {
1454 gimple oedef;
1455 enum tree_code oecode;
1456 unsigned j;
1457
1458 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1459 oecode = gimple_assign_rhs_code (oedef);
1460 linearize_expr_tree (&subops[i], oedef,
1461 associative_tree_code (oecode), false);
1462
1463 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1464 {
1465 oecount c;
1466 int *slot;
1467 int idx;
1468 c.oecode = oecode;
1469 c.cnt = 1;
1470 c.id = next_oecount_id++;
1471 c.op = oe1->op;
1472 cvec.safe_push (c);
1473 idx = cvec.length () + 41;
1474 slot = ctable.find_slot (idx, INSERT);
1475 if (!*slot)
1476 {
1477 *slot = idx;
1478 }
1479 else
1480 {
1481 cvec.pop ();
1482 cvec[*slot - 42].cnt++;
1483 }
1484 }
1485 }
1486
1487 /* Sort the counting table. */
1488 cvec.qsort (oecount_cmp);
1489
1490 if (dump_file && (dump_flags & TDF_DETAILS))
1491 {
1492 oecount *c;
1493 fprintf (dump_file, "Candidates:\n");
1494 FOR_EACH_VEC_ELT (cvec, j, c)
1495 {
1496 fprintf (dump_file, " %u %s: ", c->cnt,
1497 c->oecode == MULT_EXPR
1498 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1499 print_generic_expr (dump_file, c->op, 0);
1500 fprintf (dump_file, "\n");
1501 }
1502 }
1503
1504 /* Process the (operand, code) pairs in order of most occurrence. */
1505 candidates2 = sbitmap_alloc (length);
1506 while (!cvec.is_empty ())
1507 {
1508 oecount *c = &cvec.last ();
1509 if (c->cnt < 2)
1510 break;
1511
1512 /* Now collect the operands in the outer chain that contain
1513 the common operand in their inner chain. */
1514 bitmap_clear (candidates2);
1515 nr_candidates2 = 0;
1516 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1517 {
1518 gimple oedef;
1519 enum tree_code oecode;
1520 unsigned j;
1521 tree op = (*ops)[i]->op;
1522
1523 /* If we undistributed in this chain already this may be
1524 a constant. */
1525 if (TREE_CODE (op) != SSA_NAME)
1526 continue;
1527
1528 oedef = SSA_NAME_DEF_STMT (op);
1529 oecode = gimple_assign_rhs_code (oedef);
1530 if (oecode != c->oecode)
1531 continue;
1532
1533 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1534 {
1535 if (oe1->op == c->op)
1536 {
1537 bitmap_set_bit (candidates2, i);
1538 ++nr_candidates2;
1539 break;
1540 }
1541 }
1542 }
1543
1544 if (nr_candidates2 >= 2)
1545 {
1546 operand_entry_t oe1, oe2;
1547 gimple prod;
1548 int first = bitmap_first_set_bit (candidates2);
1549
1550 /* Build the new addition chain. */
1551 oe1 = (*ops)[first];
1552 if (dump_file && (dump_flags & TDF_DETAILS))
1553 {
1554 fprintf (dump_file, "Building (");
1555 print_generic_expr (dump_file, oe1->op, 0);
1556 }
1557 zero_one_operation (&oe1->op, c->oecode, c->op);
1558 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1559 {
1560 gimple sum;
1561 oe2 = (*ops)[i];
1562 if (dump_file && (dump_flags & TDF_DETAILS))
1563 {
1564 fprintf (dump_file, " + ");
1565 print_generic_expr (dump_file, oe2->op, 0);
1566 }
1567 zero_one_operation (&oe2->op, c->oecode, c->op);
1568 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1569 oe1->op, oe2->op, opcode);
1570 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1571 oe2->rank = 0;
1572 oe1->op = gimple_get_lhs (sum);
1573 }
1574
1575 /* Apply the multiplication/division. */
1576 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1577 oe1->op, c->op, c->oecode);
1578 if (dump_file && (dump_flags & TDF_DETAILS))
1579 {
1580 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1581 print_generic_expr (dump_file, c->op, 0);
1582 fprintf (dump_file, "\n");
1583 }
1584
1585 /* Record it in the addition chain and disable further
1586 undistribution with this op. */
1587 oe1->op = gimple_assign_lhs (prod);
1588 oe1->rank = get_rank (oe1->op);
1589 subops[first].release ();
1590
1591 changed = true;
1592 }
1593
1594 cvec.pop ();
1595 }
1596
1597 for (i = 0; i < ops->length (); ++i)
1598 subops[i].release ();
1599 free (subops);
1600 cvec.release ();
1601 sbitmap_free (candidates);
1602 sbitmap_free (candidates2);
1603
1604 return changed;
1605 }
1606
1607 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1608 expression, examine the other OPS to see if any of them are comparisons
1609 of the same values, which we may be able to combine or eliminate.
1610 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1611
1612 static bool
1613 eliminate_redundant_comparison (enum tree_code opcode,
1614 vec<operand_entry_t> *ops,
1615 unsigned int currindex,
1616 operand_entry_t curr)
1617 {
1618 tree op1, op2;
1619 enum tree_code lcode, rcode;
1620 gimple def1, def2;
1621 int i;
1622 operand_entry_t oe;
1623
1624 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1625 return false;
1626
1627 /* Check that CURR is a comparison. */
1628 if (TREE_CODE (curr->op) != SSA_NAME)
1629 return false;
1630 def1 = SSA_NAME_DEF_STMT (curr->op);
1631 if (!is_gimple_assign (def1))
1632 return false;
1633 lcode = gimple_assign_rhs_code (def1);
1634 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1635 return false;
1636 op1 = gimple_assign_rhs1 (def1);
1637 op2 = gimple_assign_rhs2 (def1);
1638
1639 /* Now look for a similar comparison in the remaining OPS. */
1640 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1641 {
1642 tree t;
1643
1644 if (TREE_CODE (oe->op) != SSA_NAME)
1645 continue;
1646 def2 = SSA_NAME_DEF_STMT (oe->op);
1647 if (!is_gimple_assign (def2))
1648 continue;
1649 rcode = gimple_assign_rhs_code (def2);
1650 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1651 continue;
1652
1653 /* If we got here, we have a match. See if we can combine the
1654 two comparisons. */
1655 if (opcode == BIT_IOR_EXPR)
1656 t = maybe_fold_or_comparisons (lcode, op1, op2,
1657 rcode, gimple_assign_rhs1 (def2),
1658 gimple_assign_rhs2 (def2));
1659 else
1660 t = maybe_fold_and_comparisons (lcode, op1, op2,
1661 rcode, gimple_assign_rhs1 (def2),
1662 gimple_assign_rhs2 (def2));
1663 if (!t)
1664 continue;
1665
1666 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1667 always give us a boolean_type_node value back. If the original
1668 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1669 we need to convert. */
1670 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1671 t = fold_convert (TREE_TYPE (curr->op), t);
1672
1673 if (TREE_CODE (t) != INTEGER_CST
1674 && !operand_equal_p (t, curr->op, 0))
1675 {
1676 enum tree_code subcode;
1677 tree newop1, newop2;
1678 if (!COMPARISON_CLASS_P (t))
1679 continue;
1680 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1681 STRIP_USELESS_TYPE_CONVERSION (newop1);
1682 STRIP_USELESS_TYPE_CONVERSION (newop2);
1683 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1684 continue;
1685 }
1686
1687 if (dump_file && (dump_flags & TDF_DETAILS))
1688 {
1689 fprintf (dump_file, "Equivalence: ");
1690 print_generic_expr (dump_file, curr->op, 0);
1691 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1692 print_generic_expr (dump_file, oe->op, 0);
1693 fprintf (dump_file, " -> ");
1694 print_generic_expr (dump_file, t, 0);
1695 fprintf (dump_file, "\n");
1696 }
1697
1698 /* Now we can delete oe, as it has been subsumed by the new combined
1699 expression t. */
1700 ops->ordered_remove (i);
1701 reassociate_stats.ops_eliminated ++;
1702
1703 /* If t is the same as curr->op, we're done. Otherwise we must
1704 replace curr->op with t. Special case is if we got a constant
1705 back, in which case we add it to the end instead of in place of
1706 the current entry. */
1707 if (TREE_CODE (t) == INTEGER_CST)
1708 {
1709 ops->ordered_remove (currindex);
1710 add_to_ops_vec (ops, t);
1711 }
1712 else if (!operand_equal_p (t, curr->op, 0))
1713 {
1714 gimple sum;
1715 enum tree_code subcode;
1716 tree newop1;
1717 tree newop2;
1718 gcc_assert (COMPARISON_CLASS_P (t));
1719 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1720 STRIP_USELESS_TYPE_CONVERSION (newop1);
1721 STRIP_USELESS_TYPE_CONVERSION (newop2);
1722 gcc_checking_assert (is_gimple_val (newop1)
1723 && is_gimple_val (newop2));
1724 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1725 curr->op = gimple_get_lhs (sum);
1726 }
1727 return true;
1728 }
1729
1730 return false;
1731 }
1732
1733 /* Perform various identities and other optimizations on the list of
1734 operand entries, stored in OPS. The tree code for the binary
1735 operation between all the operands is OPCODE. */
1736
1737 static void
1738 optimize_ops_list (enum tree_code opcode,
1739 vec<operand_entry_t> *ops)
1740 {
1741 unsigned int length = ops->length ();
1742 unsigned int i;
1743 operand_entry_t oe;
1744 operand_entry_t oelast = NULL;
1745 bool iterate = false;
1746
1747 if (length == 1)
1748 return;
1749
1750 oelast = ops->last ();
1751
1752 /* If the last two are constants, pop the constants off, merge them
1753 and try the next two. */
1754 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1755 {
1756 operand_entry_t oelm1 = (*ops)[length - 2];
1757
1758 if (oelm1->rank == 0
1759 && is_gimple_min_invariant (oelm1->op)
1760 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1761 TREE_TYPE (oelast->op)))
1762 {
1763 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1764 oelm1->op, oelast->op);
1765
1766 if (folded && is_gimple_min_invariant (folded))
1767 {
1768 if (dump_file && (dump_flags & TDF_DETAILS))
1769 fprintf (dump_file, "Merging constants\n");
1770
1771 ops->pop ();
1772 ops->pop ();
1773
1774 add_to_ops_vec (ops, folded);
1775 reassociate_stats.constants_eliminated++;
1776
1777 optimize_ops_list (opcode, ops);
1778 return;
1779 }
1780 }
1781 }
1782
1783 eliminate_using_constants (opcode, ops);
1784 oelast = NULL;
1785
1786 for (i = 0; ops->iterate (i, &oe);)
1787 {
1788 bool done = false;
1789
1790 if (eliminate_not_pairs (opcode, ops, i, oe))
1791 return;
1792 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1793 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1794 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1795 {
1796 if (done)
1797 return;
1798 iterate = true;
1799 oelast = NULL;
1800 continue;
1801 }
1802 oelast = oe;
1803 i++;
1804 }
1805
1806 length = ops->length ();
1807 oelast = ops->last ();
1808
1809 if (iterate)
1810 optimize_ops_list (opcode, ops);
1811 }
1812
1813 /* The following functions are subroutines to optimize_range_tests and allow
1814 it to try to change a logical combination of comparisons into a range
1815 test.
1816
1817 For example, both
1818 X == 2 || X == 5 || X == 3 || X == 4
1819 and
1820 X >= 2 && X <= 5
1821 are converted to
1822 (unsigned) (X - 2) <= 3
1823
1824 For more information see comments above fold_test_range in fold-const.c,
1825 this implementation is for GIMPLE. */
1826
1827 struct range_entry
1828 {
1829 tree exp;
1830 tree low;
1831 tree high;
1832 bool in_p;
1833 bool strict_overflow_p;
1834 unsigned int idx, next;
1835 };
1836
1837 /* This is similar to make_range in fold-const.c, but on top of
1838 GIMPLE instead of trees. If EXP is non-NULL, it should be
1839 an SSA_NAME and STMT argument is ignored, otherwise STMT
1840 argument should be a GIMPLE_COND. */
1841
1842 static void
1843 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1844 {
1845 int in_p;
1846 tree low, high;
1847 bool is_bool, strict_overflow_p;
1848
1849 r->exp = NULL_TREE;
1850 r->in_p = false;
1851 r->strict_overflow_p = false;
1852 r->low = NULL_TREE;
1853 r->high = NULL_TREE;
1854 if (exp != NULL_TREE
1855 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1856 return;
1857
1858 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1859 and see if we can refine the range. Some of the cases below may not
1860 happen, but it doesn't seem worth worrying about this. We "continue"
1861 the outer loop when we've changed something; otherwise we "break"
1862 the switch, which will "break" the while. */
1863 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1864 high = low;
1865 in_p = 0;
1866 strict_overflow_p = false;
1867 is_bool = false;
1868 if (exp == NULL_TREE)
1869 is_bool = true;
1870 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1871 {
1872 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1873 is_bool = true;
1874 else
1875 return;
1876 }
1877 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1878 is_bool = true;
1879
1880 while (1)
1881 {
1882 enum tree_code code;
1883 tree arg0, arg1, exp_type;
1884 tree nexp;
1885 location_t loc;
1886
1887 if (exp != NULL_TREE)
1888 {
1889 if (TREE_CODE (exp) != SSA_NAME
1890 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1891 break;
1892
1893 stmt = SSA_NAME_DEF_STMT (exp);
1894 if (!is_gimple_assign (stmt))
1895 break;
1896
1897 code = gimple_assign_rhs_code (stmt);
1898 arg0 = gimple_assign_rhs1 (stmt);
1899 arg1 = gimple_assign_rhs2 (stmt);
1900 exp_type = TREE_TYPE (exp);
1901 }
1902 else
1903 {
1904 code = gimple_cond_code (stmt);
1905 arg0 = gimple_cond_lhs (stmt);
1906 arg1 = gimple_cond_rhs (stmt);
1907 exp_type = boolean_type_node;
1908 }
1909
1910 if (TREE_CODE (arg0) != SSA_NAME)
1911 break;
1912 loc = gimple_location (stmt);
1913 switch (code)
1914 {
1915 case BIT_NOT_EXPR:
1916 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1917 /* Ensure the range is either +[-,0], +[0,0],
1918 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1919 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1920 or similar expression of unconditional true or
1921 false, it should not be negated. */
1922 && ((high && integer_zerop (high))
1923 || (low && integer_onep (low))))
1924 {
1925 in_p = !in_p;
1926 exp = arg0;
1927 continue;
1928 }
1929 break;
1930 case SSA_NAME:
1931 exp = arg0;
1932 continue;
1933 CASE_CONVERT:
1934 if (is_bool)
1935 goto do_default;
1936 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1937 {
1938 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1939 is_bool = true;
1940 else
1941 return;
1942 }
1943 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1944 is_bool = true;
1945 goto do_default;
1946 case EQ_EXPR:
1947 case NE_EXPR:
1948 case LT_EXPR:
1949 case LE_EXPR:
1950 case GE_EXPR:
1951 case GT_EXPR:
1952 is_bool = true;
1953 /* FALLTHRU */
1954 default:
1955 if (!is_bool)
1956 return;
1957 do_default:
1958 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1959 &low, &high, &in_p,
1960 &strict_overflow_p);
1961 if (nexp != NULL_TREE)
1962 {
1963 exp = nexp;
1964 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1965 continue;
1966 }
1967 break;
1968 }
1969 break;
1970 }
1971 if (is_bool)
1972 {
1973 r->exp = exp;
1974 r->in_p = in_p;
1975 r->low = low;
1976 r->high = high;
1977 r->strict_overflow_p = strict_overflow_p;
1978 }
1979 }
1980
1981 /* Comparison function for qsort. Sort entries
1982 without SSA_NAME exp first, then with SSA_NAMEs sorted
1983 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1984 by increasing ->low and if ->low is the same, by increasing
1985 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1986 maximum. */
1987
1988 static int
1989 range_entry_cmp (const void *a, const void *b)
1990 {
1991 const struct range_entry *p = (const struct range_entry *) a;
1992 const struct range_entry *q = (const struct range_entry *) b;
1993
1994 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1995 {
1996 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1997 {
1998 /* Group range_entries for the same SSA_NAME together. */
1999 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2000 return -1;
2001 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2002 return 1;
2003 /* If ->low is different, NULL low goes first, then by
2004 ascending low. */
2005 if (p->low != NULL_TREE)
2006 {
2007 if (q->low != NULL_TREE)
2008 {
2009 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2010 p->low, q->low);
2011 if (tem && integer_onep (tem))
2012 return -1;
2013 tem = fold_binary (GT_EXPR, boolean_type_node,
2014 p->low, q->low);
2015 if (tem && integer_onep (tem))
2016 return 1;
2017 }
2018 else
2019 return 1;
2020 }
2021 else if (q->low != NULL_TREE)
2022 return -1;
2023 /* If ->high is different, NULL high goes last, before that by
2024 ascending high. */
2025 if (p->high != NULL_TREE)
2026 {
2027 if (q->high != NULL_TREE)
2028 {
2029 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2030 p->high, q->high);
2031 if (tem && integer_onep (tem))
2032 return -1;
2033 tem = fold_binary (GT_EXPR, boolean_type_node,
2034 p->high, q->high);
2035 if (tem && integer_onep (tem))
2036 return 1;
2037 }
2038 else
2039 return -1;
2040 }
2041 else if (q->high != NULL_TREE)
2042 return 1;
2043 /* If both ranges are the same, sort below by ascending idx. */
2044 }
2045 else
2046 return 1;
2047 }
2048 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2049 return -1;
2050
2051 if (p->idx < q->idx)
2052 return -1;
2053 else
2054 {
2055 gcc_checking_assert (p->idx > q->idx);
2056 return 1;
2057 }
2058 }
2059
2060 /* Helper routine of optimize_range_test.
2061 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2062 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2063 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2064 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2065 an array of COUNT pointers to other ranges. Return
2066 true if the range merge has been successful.
2067 If OPCODE is ERROR_MARK, this is called from within
2068 maybe_optimize_range_tests and is performing inter-bb range optimization.
2069 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2070 oe->rank. */
2071
2072 static bool
2073 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2074 struct range_entry **otherrangep,
2075 unsigned int count, enum tree_code opcode,
2076 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2077 bool in_p, tree low, tree high, bool strict_overflow_p)
2078 {
2079 operand_entry_t oe = (*ops)[range->idx];
2080 tree op = oe->op;
2081 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2082 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2083 location_t loc = gimple_location (stmt);
2084 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2085 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2086 in_p, low, high);
2087 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2088 gimple_stmt_iterator gsi;
2089 unsigned int i;
2090
2091 if (tem == NULL_TREE)
2092 return false;
2093
2094 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2095 warning_at (loc, OPT_Wstrict_overflow,
2096 "assuming signed overflow does not occur "
2097 "when simplifying range test");
2098
2099 if (dump_file && (dump_flags & TDF_DETAILS))
2100 {
2101 struct range_entry *r;
2102 fprintf (dump_file, "Optimizing range tests ");
2103 print_generic_expr (dump_file, range->exp, 0);
2104 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2105 print_generic_expr (dump_file, range->low, 0);
2106 fprintf (dump_file, ", ");
2107 print_generic_expr (dump_file, range->high, 0);
2108 fprintf (dump_file, "]");
2109 for (i = 0; i < count; i++)
2110 {
2111 if (otherrange)
2112 r = otherrange + i;
2113 else
2114 r = otherrangep[i];
2115 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2116 print_generic_expr (dump_file, r->low, 0);
2117 fprintf (dump_file, ", ");
2118 print_generic_expr (dump_file, r->high, 0);
2119 fprintf (dump_file, "]");
2120 }
2121 fprintf (dump_file, "\n into ");
2122 print_generic_expr (dump_file, tem, 0);
2123 fprintf (dump_file, "\n");
2124 }
2125
2126 if (opcode == BIT_IOR_EXPR
2127 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2128 tem = invert_truthvalue_loc (loc, tem);
2129
2130 tem = fold_convert_loc (loc, optype, tem);
2131 gsi = gsi_for_stmt (stmt);
2132 unsigned int uid = gimple_uid (stmt);
2133 /* In rare cases range->exp can be equal to lhs of stmt.
2134 In that case we have to insert after the stmt rather then before
2135 it. If stmt is a PHI, insert it at the start of the basic block. */
2136 if (op != range->exp)
2137 {
2138 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2139 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2140 GSI_SAME_STMT);
2141 gsi_prev (&gsi);
2142 }
2143 else if (gimple_code (stmt) != GIMPLE_PHI)
2144 {
2145 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2146 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2147 GSI_CONTINUE_LINKING);
2148 }
2149 else
2150 {
2151 gsi = gsi_after_labels (gimple_bb (stmt));
2152 if (!gsi_end_p (gsi))
2153 uid = gimple_uid (gsi_stmt (gsi));
2154 else
2155 {
2156 gsi = gsi_start_bb (gimple_bb (stmt));
2157 uid = 1;
2158 while (!gsi_end_p (gsi))
2159 {
2160 uid = gimple_uid (gsi_stmt (gsi));
2161 gsi_next (&gsi);
2162 }
2163 }
2164 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2165 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2166 GSI_SAME_STMT);
2167 if (gsi_end_p (gsi))
2168 gsi = gsi_last_bb (gimple_bb (stmt));
2169 else
2170 gsi_prev (&gsi);
2171 }
2172 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2173 if (gimple_uid (gsi_stmt (gsi)))
2174 break;
2175 else
2176 gimple_set_uid (gsi_stmt (gsi), uid);
2177
2178 oe->op = tem;
2179 range->exp = exp;
2180 range->low = low;
2181 range->high = high;
2182 range->in_p = in_p;
2183 range->strict_overflow_p = false;
2184
2185 for (i = 0; i < count; i++)
2186 {
2187 if (otherrange)
2188 range = otherrange + i;
2189 else
2190 range = otherrangep[i];
2191 oe = (*ops)[range->idx];
2192 /* Now change all the other range test immediate uses, so that
2193 those tests will be optimized away. */
2194 if (opcode == ERROR_MARK)
2195 {
2196 if (oe->op)
2197 oe->op = build_int_cst (TREE_TYPE (oe->op),
2198 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2199 else
2200 oe->op = (oe->rank == BIT_IOR_EXPR
2201 ? boolean_false_node : boolean_true_node);
2202 }
2203 else
2204 oe->op = error_mark_node;
2205 range->exp = NULL_TREE;
2206 }
2207 return true;
2208 }
2209
2210 /* Optimize X == CST1 || X == CST2
2211 if popcount (CST1 ^ CST2) == 1 into
2212 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2213 Similarly for ranges. E.g.
2214 X != 2 && X != 3 && X != 10 && X != 11
2215 will be transformed by the previous optimization into
2216 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2217 and this loop can transform that into
2218 !(((X & ~8) - 2U) <= 1U). */
2219
2220 static bool
2221 optimize_range_tests_xor (enum tree_code opcode, tree type,
2222 tree lowi, tree lowj, tree highi, tree highj,
2223 vec<operand_entry_t> *ops,
2224 struct range_entry *rangei,
2225 struct range_entry *rangej)
2226 {
2227 tree lowxor, highxor, tem, exp;
2228 /* Check lowi ^ lowj == highi ^ highj and
2229 popcount (lowi ^ lowj) == 1. */
2230 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2231 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2232 return false;
2233 if (!integer_pow2p (lowxor))
2234 return false;
2235 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2236 if (!tree_int_cst_equal (lowxor, highxor))
2237 return false;
2238
2239 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2240 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2241 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2242 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2243 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2244 NULL, rangei->in_p, lowj, highj,
2245 rangei->strict_overflow_p
2246 || rangej->strict_overflow_p))
2247 return true;
2248 return false;
2249 }
2250
2251 /* Optimize X == CST1 || X == CST2
2252 if popcount (CST2 - CST1) == 1 into
2253 ((X - CST1) & ~(CST2 - CST1)) == 0.
2254 Similarly for ranges. E.g.
2255 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2256 || X == 75 || X == 45
2257 will be transformed by the previous optimization into
2258 (X - 43U) <= 3U || (X - 75U) <= 3U
2259 and this loop can transform that into
2260 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2261 static bool
2262 optimize_range_tests_diff (enum tree_code opcode, tree type,
2263 tree lowi, tree lowj, tree highi, tree highj,
2264 vec<operand_entry_t> *ops,
2265 struct range_entry *rangei,
2266 struct range_entry *rangej)
2267 {
2268 tree tem1, tem2, mask;
2269 /* Check highi - lowi == highj - lowj. */
2270 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2271 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2272 return false;
2273 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2274 if (!tree_int_cst_equal (tem1, tem2))
2275 return false;
2276 /* Check popcount (lowj - lowi) == 1. */
2277 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2278 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2279 return false;
2280 if (!integer_pow2p (tem1))
2281 return false;
2282
2283 type = unsigned_type_for (type);
2284 tem1 = fold_convert (type, tem1);
2285 tem2 = fold_convert (type, tem2);
2286 lowi = fold_convert (type, lowi);
2287 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2288 tem1 = fold_binary (MINUS_EXPR, type,
2289 fold_convert (type, rangei->exp), lowi);
2290 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2291 lowj = build_int_cst (type, 0);
2292 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2293 NULL, rangei->in_p, lowj, tem2,
2294 rangei->strict_overflow_p
2295 || rangej->strict_overflow_p))
2296 return true;
2297 return false;
2298 }
2299
2300 /* It does some common checks for function optimize_range_tests_xor and
2301 optimize_range_tests_diff.
2302 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2303 Else it calls optimize_range_tests_diff. */
2304
2305 static bool
2306 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2307 bool optimize_xor, vec<operand_entry_t> *ops,
2308 struct range_entry *ranges)
2309 {
2310 int i, j;
2311 bool any_changes = false;
2312 for (i = first; i < length; i++)
2313 {
2314 tree lowi, highi, lowj, highj, type, tem;
2315
2316 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2317 continue;
2318 type = TREE_TYPE (ranges[i].exp);
2319 if (!INTEGRAL_TYPE_P (type))
2320 continue;
2321 lowi = ranges[i].low;
2322 if (lowi == NULL_TREE)
2323 lowi = TYPE_MIN_VALUE (type);
2324 highi = ranges[i].high;
2325 if (highi == NULL_TREE)
2326 continue;
2327 for (j = i + 1; j < length && j < i + 64; j++)
2328 {
2329 bool changes;
2330 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2331 continue;
2332 lowj = ranges[j].low;
2333 if (lowj == NULL_TREE)
2334 continue;
2335 highj = ranges[j].high;
2336 if (highj == NULL_TREE)
2337 highj = TYPE_MAX_VALUE (type);
2338 /* Check lowj > highi. */
2339 tem = fold_binary (GT_EXPR, boolean_type_node,
2340 lowj, highi);
2341 if (tem == NULL_TREE || !integer_onep (tem))
2342 continue;
2343 if (optimize_xor)
2344 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2345 highi, highj, ops,
2346 ranges + i, ranges + j);
2347 else
2348 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2349 highi, highj, ops,
2350 ranges + i, ranges + j);
2351 if (changes)
2352 {
2353 any_changes = true;
2354 break;
2355 }
2356 }
2357 }
2358 return any_changes;
2359 }
2360
2361 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2362 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2363 EXP on success, NULL otherwise. */
2364
2365 static tree
2366 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2367 wide_int *mask, tree *totallowp)
2368 {
2369 tree tem = int_const_binop (MINUS_EXPR, high, low);
2370 if (tem == NULL_TREE
2371 || TREE_CODE (tem) != INTEGER_CST
2372 || TREE_OVERFLOW (tem)
2373 || tree_int_cst_sgn (tem) == -1
2374 || compare_tree_int (tem, prec) != -1)
2375 return NULL_TREE;
2376
2377 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2378 *mask = wi::shifted_mask (0, max, false, prec);
2379 if (TREE_CODE (exp) == BIT_AND_EXPR
2380 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2381 {
2382 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2383 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2384 if (wi::popcount (msk) == 1
2385 && wi::ltu_p (msk, prec - max))
2386 {
2387 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2388 max += msk.to_uhwi ();
2389 exp = TREE_OPERAND (exp, 0);
2390 if (integer_zerop (low)
2391 && TREE_CODE (exp) == PLUS_EXPR
2392 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2393 {
2394 tree ret = TREE_OPERAND (exp, 0);
2395 STRIP_NOPS (ret);
2396 widest_int bias
2397 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2398 TYPE_PRECISION (TREE_TYPE (low))));
2399 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2400 if (totallowp)
2401 {
2402 *totallowp = tbias;
2403 return ret;
2404 }
2405 else if (!tree_int_cst_lt (totallow, tbias))
2406 return NULL_TREE;
2407 bias = wi::to_widest (tbias);
2408 bias -= wi::to_widest (totallow);
2409 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2410 {
2411 *mask = wi::lshift (*mask, bias);
2412 return ret;
2413 }
2414 }
2415 }
2416 }
2417 if (totallowp)
2418 return exp;
2419 if (!tree_int_cst_lt (totallow, low))
2420 return exp;
2421 tem = int_const_binop (MINUS_EXPR, low, totallow);
2422 if (tem == NULL_TREE
2423 || TREE_CODE (tem) != INTEGER_CST
2424 || TREE_OVERFLOW (tem)
2425 || compare_tree_int (tem, prec - max) == 1)
2426 return NULL_TREE;
2427
2428 *mask = wi::lshift (*mask, wi::to_widest (tem));
2429 return exp;
2430 }
2431
2432 /* Attempt to optimize small range tests using bit test.
2433 E.g.
2434 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2435 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2436 has been by earlier optimizations optimized into:
2437 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2438 As all the 43 through 82 range is less than 64 numbers,
2439 for 64-bit word targets optimize that into:
2440 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2441
2442 static bool
2443 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2444 vec<operand_entry_t> *ops,
2445 struct range_entry *ranges)
2446 {
2447 int i, j;
2448 bool any_changes = false;
2449 int prec = GET_MODE_BITSIZE (word_mode);
2450 auto_vec<struct range_entry *, 64> candidates;
2451
2452 for (i = first; i < length - 2; i++)
2453 {
2454 tree lowi, highi, lowj, highj, type;
2455
2456 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2457 continue;
2458 type = TREE_TYPE (ranges[i].exp);
2459 if (!INTEGRAL_TYPE_P (type))
2460 continue;
2461 lowi = ranges[i].low;
2462 if (lowi == NULL_TREE)
2463 lowi = TYPE_MIN_VALUE (type);
2464 highi = ranges[i].high;
2465 if (highi == NULL_TREE)
2466 continue;
2467 wide_int mask;
2468 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2469 highi, &mask, &lowi);
2470 if (exp == NULL_TREE)
2471 continue;
2472 bool strict_overflow_p = ranges[i].strict_overflow_p;
2473 candidates.truncate (0);
2474 int end = MIN (i + 64, length);
2475 for (j = i + 1; j < end; j++)
2476 {
2477 tree exp2;
2478 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2479 continue;
2480 if (ranges[j].exp == exp)
2481 ;
2482 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2483 {
2484 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2485 if (exp2 == exp)
2486 ;
2487 else if (TREE_CODE (exp2) == PLUS_EXPR)
2488 {
2489 exp2 = TREE_OPERAND (exp2, 0);
2490 STRIP_NOPS (exp2);
2491 if (exp2 != exp)
2492 continue;
2493 }
2494 else
2495 continue;
2496 }
2497 else
2498 continue;
2499 lowj = ranges[j].low;
2500 if (lowj == NULL_TREE)
2501 continue;
2502 highj = ranges[j].high;
2503 if (highj == NULL_TREE)
2504 highj = TYPE_MAX_VALUE (type);
2505 wide_int mask2;
2506 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2507 highj, &mask2, NULL);
2508 if (exp2 != exp)
2509 continue;
2510 mask |= mask2;
2511 strict_overflow_p |= ranges[j].strict_overflow_p;
2512 candidates.safe_push (&ranges[j]);
2513 }
2514
2515 /* If we need otherwise 3 or more comparisons, use a bit test. */
2516 if (candidates.length () >= 2)
2517 {
2518 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2519 wi::to_widest (lowi)
2520 + prec - 1 - wi::clz (mask));
2521 operand_entry_t oe = (*ops)[ranges[i].idx];
2522 tree op = oe->op;
2523 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2524 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2525 location_t loc = gimple_location (stmt);
2526 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2527
2528 /* See if it isn't cheaper to pretend the minimum value of the
2529 range is 0, if maximum value is small enough.
2530 We can avoid then subtraction of the minimum value, but the
2531 mask constant could be perhaps more expensive. */
2532 if (compare_tree_int (lowi, 0) > 0
2533 && compare_tree_int (high, prec) < 0)
2534 {
2535 int cost_diff;
2536 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2537 rtx reg = gen_raw_REG (word_mode, 10000);
2538 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2539 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2540 GEN_INT (-m)), speed_p);
2541 rtx r = immed_wide_int_const (mask, word_mode);
2542 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2543 speed_p);
2544 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2545 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2546 speed_p);
2547 if (cost_diff > 0)
2548 {
2549 mask = wi::lshift (mask, m);
2550 lowi = build_zero_cst (TREE_TYPE (lowi));
2551 }
2552 }
2553
2554 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2555 false, lowi, high);
2556 if (tem == NULL_TREE || is_gimple_val (tem))
2557 continue;
2558 tree etype = unsigned_type_for (TREE_TYPE (exp));
2559 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2560 fold_convert_loc (loc, etype, exp),
2561 fold_convert_loc (loc, etype, lowi));
2562 exp = fold_convert_loc (loc, integer_type_node, exp);
2563 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2564 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2565 build_int_cst (word_type, 1), exp);
2566 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2567 wide_int_to_tree (word_type, mask));
2568 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2569 build_zero_cst (word_type));
2570 if (is_gimple_val (exp))
2571 continue;
2572
2573 /* The shift might have undefined behavior if TEM is true,
2574 but reassociate_bb isn't prepared to have basic blocks
2575 split when it is running. So, temporarily emit a code
2576 with BIT_IOR_EXPR instead of &&, and fix it up in
2577 branch_fixup. */
2578 gimple_seq seq;
2579 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2580 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2581 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2582 gimple_seq seq2;
2583 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2584 gimple_seq_add_seq_without_update (&seq, seq2);
2585 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2586 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2587 gimple g = gimple_build_assign (make_ssa_name (optype),
2588 BIT_IOR_EXPR, tem, exp);
2589 gimple_set_location (g, loc);
2590 gimple_seq_add_stmt_without_update (&seq, g);
2591 exp = gimple_assign_lhs (g);
2592 tree val = build_zero_cst (optype);
2593 if (update_range_test (&ranges[i], NULL, candidates.address (),
2594 candidates.length (), opcode, ops, exp,
2595 seq, false, val, val, strict_overflow_p))
2596 {
2597 any_changes = true;
2598 reassoc_branch_fixups.safe_push (tem);
2599 }
2600 else
2601 gimple_seq_discard (seq);
2602 }
2603 }
2604 return any_changes;
2605 }
2606
2607 /* Optimize range tests, similarly how fold_range_test optimizes
2608 it on trees. The tree code for the binary
2609 operation between all the operands is OPCODE.
2610 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2611 maybe_optimize_range_tests for inter-bb range optimization.
2612 In that case if oe->op is NULL, oe->id is bb->index whose
2613 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2614 the actual opcode. */
2615
2616 static bool
2617 optimize_range_tests (enum tree_code opcode,
2618 vec<operand_entry_t> *ops)
2619 {
2620 unsigned int length = ops->length (), i, j, first;
2621 operand_entry_t oe;
2622 struct range_entry *ranges;
2623 bool any_changes = false;
2624
2625 if (length == 1)
2626 return false;
2627
2628 ranges = XNEWVEC (struct range_entry, length);
2629 for (i = 0; i < length; i++)
2630 {
2631 oe = (*ops)[i];
2632 ranges[i].idx = i;
2633 init_range_entry (ranges + i, oe->op,
2634 oe->op ? NULL :
2635 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2636 /* For | invert it now, we will invert it again before emitting
2637 the optimized expression. */
2638 if (opcode == BIT_IOR_EXPR
2639 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2640 ranges[i].in_p = !ranges[i].in_p;
2641 }
2642
2643 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2644 for (i = 0; i < length; i++)
2645 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2646 break;
2647
2648 /* Try to merge ranges. */
2649 for (first = i; i < length; i++)
2650 {
2651 tree low = ranges[i].low;
2652 tree high = ranges[i].high;
2653 int in_p = ranges[i].in_p;
2654 bool strict_overflow_p = ranges[i].strict_overflow_p;
2655 int update_fail_count = 0;
2656
2657 for (j = i + 1; j < length; j++)
2658 {
2659 if (ranges[i].exp != ranges[j].exp)
2660 break;
2661 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2662 ranges[j].in_p, ranges[j].low, ranges[j].high))
2663 break;
2664 strict_overflow_p |= ranges[j].strict_overflow_p;
2665 }
2666
2667 if (j == i + 1)
2668 continue;
2669
2670 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2671 opcode, ops, ranges[i].exp, NULL, in_p,
2672 low, high, strict_overflow_p))
2673 {
2674 i = j - 1;
2675 any_changes = true;
2676 }
2677 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2678 while update_range_test would fail. */
2679 else if (update_fail_count == 64)
2680 i = j - 1;
2681 else
2682 ++update_fail_count;
2683 }
2684
2685 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2686 ops, ranges);
2687
2688 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2689 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2690 ops, ranges);
2691 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2692 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2693 ops, ranges);
2694
2695 if (any_changes && opcode != ERROR_MARK)
2696 {
2697 j = 0;
2698 FOR_EACH_VEC_ELT (*ops, i, oe)
2699 {
2700 if (oe->op == error_mark_node)
2701 continue;
2702 else if (i != j)
2703 (*ops)[j] = oe;
2704 j++;
2705 }
2706 ops->truncate (j);
2707 }
2708
2709 XDELETEVEC (ranges);
2710 return any_changes;
2711 }
2712
2713 /* Return true if STMT is a cast like:
2714 <bb N>:
2715 ...
2716 _123 = (int) _234;
2717
2718 <bb M>:
2719 # _345 = PHI <_123(N), 1(...), 1(...)>
2720 where _234 has bool type, _123 has single use and
2721 bb N has a single successor M. This is commonly used in
2722 the last block of a range test. */
2723
2724 static bool
2725 final_range_test_p (gimple stmt)
2726 {
2727 basic_block bb, rhs_bb;
2728 edge e;
2729 tree lhs, rhs;
2730 use_operand_p use_p;
2731 gimple use_stmt;
2732
2733 if (!gimple_assign_cast_p (stmt))
2734 return false;
2735 bb = gimple_bb (stmt);
2736 if (!single_succ_p (bb))
2737 return false;
2738 e = single_succ_edge (bb);
2739 if (e->flags & EDGE_COMPLEX)
2740 return false;
2741
2742 lhs = gimple_assign_lhs (stmt);
2743 rhs = gimple_assign_rhs1 (stmt);
2744 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2745 || TREE_CODE (rhs) != SSA_NAME
2746 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2747 return false;
2748
2749 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2750 if (!single_imm_use (lhs, &use_p, &use_stmt))
2751 return false;
2752
2753 if (gimple_code (use_stmt) != GIMPLE_PHI
2754 || gimple_bb (use_stmt) != e->dest)
2755 return false;
2756
2757 /* And that the rhs is defined in the same loop. */
2758 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2759 if (rhs_bb == NULL
2760 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2761 return false;
2762
2763 return true;
2764 }
2765
2766 /* Return true if BB is suitable basic block for inter-bb range test
2767 optimization. If BACKWARD is true, BB should be the only predecessor
2768 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2769 or compared with to find a common basic block to which all conditions
2770 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2771 be the only predecessor of BB. */
2772
2773 static bool
2774 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2775 bool backward)
2776 {
2777 edge_iterator ei, ei2;
2778 edge e, e2;
2779 gimple stmt;
2780 gphi_iterator gsi;
2781 bool other_edge_seen = false;
2782 bool is_cond;
2783
2784 if (test_bb == bb)
2785 return false;
2786 /* Check last stmt first. */
2787 stmt = last_stmt (bb);
2788 if (stmt == NULL
2789 || (gimple_code (stmt) != GIMPLE_COND
2790 && (backward || !final_range_test_p (stmt)))
2791 || gimple_visited_p (stmt)
2792 || stmt_could_throw_p (stmt)
2793 || *other_bb == bb)
2794 return false;
2795 is_cond = gimple_code (stmt) == GIMPLE_COND;
2796 if (is_cond)
2797 {
2798 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2799 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2800 to *OTHER_BB (if not set yet, try to find it out). */
2801 if (EDGE_COUNT (bb->succs) != 2)
2802 return false;
2803 FOR_EACH_EDGE (e, ei, bb->succs)
2804 {
2805 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2806 return false;
2807 if (e->dest == test_bb)
2808 {
2809 if (backward)
2810 continue;
2811 else
2812 return false;
2813 }
2814 if (e->dest == bb)
2815 return false;
2816 if (*other_bb == NULL)
2817 {
2818 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2819 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2820 return false;
2821 else if (e->dest == e2->dest)
2822 *other_bb = e->dest;
2823 if (*other_bb == NULL)
2824 return false;
2825 }
2826 if (e->dest == *other_bb)
2827 other_edge_seen = true;
2828 else if (backward)
2829 return false;
2830 }
2831 if (*other_bb == NULL || !other_edge_seen)
2832 return false;
2833 }
2834 else if (single_succ (bb) != *other_bb)
2835 return false;
2836
2837 /* Now check all PHIs of *OTHER_BB. */
2838 e = find_edge (bb, *other_bb);
2839 e2 = find_edge (test_bb, *other_bb);
2840 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2841 {
2842 gphi *phi = gsi.phi ();
2843 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2844 corresponding to BB and TEST_BB predecessor must be the same. */
2845 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2846 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2847 {
2848 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2849 one of the PHIs should have the lhs of the last stmt in
2850 that block as PHI arg and that PHI should have 0 or 1
2851 corresponding to it in all other range test basic blocks
2852 considered. */
2853 if (!is_cond)
2854 {
2855 if (gimple_phi_arg_def (phi, e->dest_idx)
2856 == gimple_assign_lhs (stmt)
2857 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2858 || integer_onep (gimple_phi_arg_def (phi,
2859 e2->dest_idx))))
2860 continue;
2861 }
2862 else
2863 {
2864 gimple test_last = last_stmt (test_bb);
2865 if (gimple_code (test_last) != GIMPLE_COND
2866 && gimple_phi_arg_def (phi, e2->dest_idx)
2867 == gimple_assign_lhs (test_last)
2868 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2869 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2870 continue;
2871 }
2872
2873 return false;
2874 }
2875 }
2876 return true;
2877 }
2878
2879 /* Return true if BB doesn't have side-effects that would disallow
2880 range test optimization, all SSA_NAMEs set in the bb are consumed
2881 in the bb and there are no PHIs. */
2882
2883 static bool
2884 no_side_effect_bb (basic_block bb)
2885 {
2886 gimple_stmt_iterator gsi;
2887 gimple last;
2888
2889 if (!gimple_seq_empty_p (phi_nodes (bb)))
2890 return false;
2891 last = last_stmt (bb);
2892 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2893 {
2894 gimple stmt = gsi_stmt (gsi);
2895 tree lhs;
2896 imm_use_iterator imm_iter;
2897 use_operand_p use_p;
2898
2899 if (is_gimple_debug (stmt))
2900 continue;
2901 if (gimple_has_side_effects (stmt))
2902 return false;
2903 if (stmt == last)
2904 return true;
2905 if (!is_gimple_assign (stmt))
2906 return false;
2907 lhs = gimple_assign_lhs (stmt);
2908 if (TREE_CODE (lhs) != SSA_NAME)
2909 return false;
2910 if (gimple_assign_rhs_could_trap_p (stmt))
2911 return false;
2912 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2913 {
2914 gimple use_stmt = USE_STMT (use_p);
2915 if (is_gimple_debug (use_stmt))
2916 continue;
2917 if (gimple_bb (use_stmt) != bb)
2918 return false;
2919 }
2920 }
2921 return false;
2922 }
2923
2924 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2925 return true and fill in *OPS recursively. */
2926
2927 static bool
2928 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2929 struct loop *loop)
2930 {
2931 gimple stmt = SSA_NAME_DEF_STMT (var);
2932 tree rhs[2];
2933 int i;
2934
2935 if (!is_reassociable_op (stmt, code, loop))
2936 return false;
2937
2938 rhs[0] = gimple_assign_rhs1 (stmt);
2939 rhs[1] = gimple_assign_rhs2 (stmt);
2940 gimple_set_visited (stmt, true);
2941 for (i = 0; i < 2; i++)
2942 if (TREE_CODE (rhs[i]) == SSA_NAME
2943 && !get_ops (rhs[i], code, ops, loop)
2944 && has_single_use (rhs[i]))
2945 {
2946 operand_entry_t oe = operand_entry_pool.allocate ();
2947
2948 oe->op = rhs[i];
2949 oe->rank = code;
2950 oe->id = 0;
2951 oe->count = 1;
2952 ops->safe_push (oe);
2953 }
2954 return true;
2955 }
2956
2957 /* Find the ops that were added by get_ops starting from VAR, see if
2958 they were changed during update_range_test and if yes, create new
2959 stmts. */
2960
2961 static tree
2962 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2963 unsigned int *pidx, struct loop *loop)
2964 {
2965 gimple stmt = SSA_NAME_DEF_STMT (var);
2966 tree rhs[4];
2967 int i;
2968
2969 if (!is_reassociable_op (stmt, code, loop))
2970 return NULL;
2971
2972 rhs[0] = gimple_assign_rhs1 (stmt);
2973 rhs[1] = gimple_assign_rhs2 (stmt);
2974 rhs[2] = rhs[0];
2975 rhs[3] = rhs[1];
2976 for (i = 0; i < 2; i++)
2977 if (TREE_CODE (rhs[i]) == SSA_NAME)
2978 {
2979 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2980 if (rhs[2 + i] == NULL_TREE)
2981 {
2982 if (has_single_use (rhs[i]))
2983 rhs[2 + i] = ops[(*pidx)++]->op;
2984 else
2985 rhs[2 + i] = rhs[i];
2986 }
2987 }
2988 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2989 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2990 {
2991 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2992 var = make_ssa_name (TREE_TYPE (var));
2993 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
2994 rhs[2], rhs[3]);
2995 gimple_set_uid (g, gimple_uid (stmt));
2996 gimple_set_visited (g, true);
2997 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2998 }
2999 return var;
3000 }
3001
3002 /* Structure to track the initial value passed to get_ops and
3003 the range in the ops vector for each basic block. */
3004
3005 struct inter_bb_range_test_entry
3006 {
3007 tree op;
3008 unsigned int first_idx, last_idx;
3009 };
3010
3011 /* Inter-bb range test optimization. */
3012
3013 static void
3014 maybe_optimize_range_tests (gimple stmt)
3015 {
3016 basic_block first_bb = gimple_bb (stmt);
3017 basic_block last_bb = first_bb;
3018 basic_block other_bb = NULL;
3019 basic_block bb;
3020 edge_iterator ei;
3021 edge e;
3022 auto_vec<operand_entry_t> ops;
3023 auto_vec<inter_bb_range_test_entry> bbinfo;
3024 bool any_changes = false;
3025
3026 /* Consider only basic blocks that end with GIMPLE_COND or
3027 a cast statement satisfying final_range_test_p. All
3028 but the last bb in the first_bb .. last_bb range
3029 should end with GIMPLE_COND. */
3030 if (gimple_code (stmt) == GIMPLE_COND)
3031 {
3032 if (EDGE_COUNT (first_bb->succs) != 2)
3033 return;
3034 }
3035 else if (final_range_test_p (stmt))
3036 other_bb = single_succ (first_bb);
3037 else
3038 return;
3039
3040 if (stmt_could_throw_p (stmt))
3041 return;
3042
3043 /* As relative ordering of post-dominator sons isn't fixed,
3044 maybe_optimize_range_tests can be called first on any
3045 bb in the range we want to optimize. So, start searching
3046 backwards, if first_bb can be set to a predecessor. */
3047 while (single_pred_p (first_bb))
3048 {
3049 basic_block pred_bb = single_pred (first_bb);
3050 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3051 break;
3052 if (!no_side_effect_bb (first_bb))
3053 break;
3054 first_bb = pred_bb;
3055 }
3056 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3057 Before starting forward search in last_bb successors, find
3058 out the other_bb. */
3059 if (first_bb == last_bb)
3060 {
3061 other_bb = NULL;
3062 /* As non-GIMPLE_COND last stmt always terminates the range,
3063 if forward search didn't discover anything, just give up. */
3064 if (gimple_code (stmt) != GIMPLE_COND)
3065 return;
3066 /* Look at both successors. Either it ends with a GIMPLE_COND
3067 and satisfies suitable_cond_bb, or ends with a cast and
3068 other_bb is that cast's successor. */
3069 FOR_EACH_EDGE (e, ei, first_bb->succs)
3070 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3071 || e->dest == first_bb)
3072 return;
3073 else if (single_pred_p (e->dest))
3074 {
3075 stmt = last_stmt (e->dest);
3076 if (stmt
3077 && gimple_code (stmt) == GIMPLE_COND
3078 && EDGE_COUNT (e->dest->succs) == 2)
3079 {
3080 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3081 break;
3082 else
3083 other_bb = NULL;
3084 }
3085 else if (stmt
3086 && final_range_test_p (stmt)
3087 && find_edge (first_bb, single_succ (e->dest)))
3088 {
3089 other_bb = single_succ (e->dest);
3090 if (other_bb == first_bb)
3091 other_bb = NULL;
3092 }
3093 }
3094 if (other_bb == NULL)
3095 return;
3096 }
3097 /* Now do the forward search, moving last_bb to successor bbs
3098 that aren't other_bb. */
3099 while (EDGE_COUNT (last_bb->succs) == 2)
3100 {
3101 FOR_EACH_EDGE (e, ei, last_bb->succs)
3102 if (e->dest != other_bb)
3103 break;
3104 if (e == NULL)
3105 break;
3106 if (!single_pred_p (e->dest))
3107 break;
3108 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3109 break;
3110 if (!no_side_effect_bb (e->dest))
3111 break;
3112 last_bb = e->dest;
3113 }
3114 if (first_bb == last_bb)
3115 return;
3116 /* Here basic blocks first_bb through last_bb's predecessor
3117 end with GIMPLE_COND, all of them have one of the edges to
3118 other_bb and another to another block in the range,
3119 all blocks except first_bb don't have side-effects and
3120 last_bb ends with either GIMPLE_COND, or cast satisfying
3121 final_range_test_p. */
3122 for (bb = last_bb; ; bb = single_pred (bb))
3123 {
3124 enum tree_code code;
3125 tree lhs, rhs;
3126 inter_bb_range_test_entry bb_ent;
3127
3128 bb_ent.op = NULL_TREE;
3129 bb_ent.first_idx = ops.length ();
3130 bb_ent.last_idx = bb_ent.first_idx;
3131 e = find_edge (bb, other_bb);
3132 stmt = last_stmt (bb);
3133 gimple_set_visited (stmt, true);
3134 if (gimple_code (stmt) != GIMPLE_COND)
3135 {
3136 use_operand_p use_p;
3137 gimple phi;
3138 edge e2;
3139 unsigned int d;
3140
3141 lhs = gimple_assign_lhs (stmt);
3142 rhs = gimple_assign_rhs1 (stmt);
3143 gcc_assert (bb == last_bb);
3144
3145 /* stmt is
3146 _123 = (int) _234;
3147
3148 followed by:
3149 <bb M>:
3150 # _345 = PHI <_123(N), 1(...), 1(...)>
3151
3152 or 0 instead of 1. If it is 0, the _234
3153 range test is anded together with all the
3154 other range tests, if it is 1, it is ored with
3155 them. */
3156 single_imm_use (lhs, &use_p, &phi);
3157 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3158 e2 = find_edge (first_bb, other_bb);
3159 d = e2->dest_idx;
3160 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3161 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3162 code = BIT_AND_EXPR;
3163 else
3164 {
3165 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3166 code = BIT_IOR_EXPR;
3167 }
3168
3169 /* If _234 SSA_NAME_DEF_STMT is
3170 _234 = _567 | _789;
3171 (or &, corresponding to 1/0 in the phi arguments,
3172 push into ops the individual range test arguments
3173 of the bitwise or resp. and, recursively. */
3174 if (!get_ops (rhs, code, &ops,
3175 loop_containing_stmt (stmt))
3176 && has_single_use (rhs))
3177 {
3178 /* Otherwise, push the _234 range test itself. */
3179 operand_entry_t oe = operand_entry_pool.allocate ();
3180
3181 oe->op = rhs;
3182 oe->rank = code;
3183 oe->id = 0;
3184 oe->count = 1;
3185 ops.safe_push (oe);
3186 bb_ent.last_idx++;
3187 }
3188 else
3189 bb_ent.last_idx = ops.length ();
3190 bb_ent.op = rhs;
3191 bbinfo.safe_push (bb_ent);
3192 continue;
3193 }
3194 /* Otherwise stmt is GIMPLE_COND. */
3195 code = gimple_cond_code (stmt);
3196 lhs = gimple_cond_lhs (stmt);
3197 rhs = gimple_cond_rhs (stmt);
3198 if (TREE_CODE (lhs) == SSA_NAME
3199 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3200 && ((code != EQ_EXPR && code != NE_EXPR)
3201 || rhs != boolean_false_node
3202 /* Either push into ops the individual bitwise
3203 or resp. and operands, depending on which
3204 edge is other_bb. */
3205 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3206 ^ (code == EQ_EXPR))
3207 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3208 loop_containing_stmt (stmt))))
3209 {
3210 /* Or push the GIMPLE_COND stmt itself. */
3211 operand_entry_t oe = operand_entry_pool.allocate ();
3212
3213 oe->op = NULL;
3214 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3215 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3216 /* oe->op = NULL signs that there is no SSA_NAME
3217 for the range test, and oe->id instead is the
3218 basic block number, at which's end the GIMPLE_COND
3219 is. */
3220 oe->id = bb->index;
3221 oe->count = 1;
3222 ops.safe_push (oe);
3223 bb_ent.op = NULL;
3224 bb_ent.last_idx++;
3225 }
3226 else if (ops.length () > bb_ent.first_idx)
3227 {
3228 bb_ent.op = lhs;
3229 bb_ent.last_idx = ops.length ();
3230 }
3231 bbinfo.safe_push (bb_ent);
3232 if (bb == first_bb)
3233 break;
3234 }
3235 if (ops.length () > 1)
3236 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3237 if (any_changes)
3238 {
3239 unsigned int idx;
3240 /* update_ops relies on has_single_use predicates returning the
3241 same values as it did during get_ops earlier. Additionally it
3242 never removes statements, only adds new ones and it should walk
3243 from the single imm use and check the predicate already before
3244 making those changes.
3245 On the other side, the handling of GIMPLE_COND directly can turn
3246 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3247 it needs to be done in a separate loop afterwards. */
3248 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3249 {
3250 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3251 && bbinfo[idx].op != NULL_TREE)
3252 {
3253 tree new_op;
3254
3255 stmt = last_stmt (bb);
3256 new_op = update_ops (bbinfo[idx].op,
3257 (enum tree_code)
3258 ops[bbinfo[idx].first_idx]->rank,
3259 ops, &bbinfo[idx].first_idx,
3260 loop_containing_stmt (stmt));
3261 if (new_op == NULL_TREE)
3262 {
3263 gcc_assert (bb == last_bb);
3264 new_op = ops[bbinfo[idx].first_idx++]->op;
3265 }
3266 if (bbinfo[idx].op != new_op)
3267 {
3268 imm_use_iterator iter;
3269 use_operand_p use_p;
3270 gimple use_stmt, cast_stmt = NULL;
3271
3272 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3273 if (is_gimple_debug (use_stmt))
3274 continue;
3275 else if (gimple_code (use_stmt) == GIMPLE_COND
3276 || gimple_code (use_stmt) == GIMPLE_PHI)
3277 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3278 SET_USE (use_p, new_op);
3279 else if (gimple_assign_cast_p (use_stmt))
3280 cast_stmt = use_stmt;
3281 else
3282 gcc_unreachable ();
3283 if (cast_stmt)
3284 {
3285 gcc_assert (bb == last_bb);
3286 tree lhs = gimple_assign_lhs (cast_stmt);
3287 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3288 enum tree_code rhs_code
3289 = gimple_assign_rhs_code (cast_stmt);
3290 gassign *g;
3291 if (is_gimple_min_invariant (new_op))
3292 {
3293 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3294 g = gimple_build_assign (new_lhs, new_op);
3295 }
3296 else
3297 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3298 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3299 gimple_set_uid (g, gimple_uid (cast_stmt));
3300 gimple_set_visited (g, true);
3301 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3302 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3303 if (is_gimple_debug (use_stmt))
3304 continue;
3305 else if (gimple_code (use_stmt) == GIMPLE_COND
3306 || gimple_code (use_stmt) == GIMPLE_PHI)
3307 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3308 SET_USE (use_p, new_lhs);
3309 else
3310 gcc_unreachable ();
3311 }
3312 }
3313 }
3314 if (bb == first_bb)
3315 break;
3316 }
3317 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3318 {
3319 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3320 && bbinfo[idx].op == NULL_TREE
3321 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3322 {
3323 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3324 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3325 gimple_cond_make_false (cond_stmt);
3326 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3327 gimple_cond_make_true (cond_stmt);
3328 else
3329 {
3330 gimple_cond_set_code (cond_stmt, NE_EXPR);
3331 gimple_cond_set_lhs (cond_stmt,
3332 ops[bbinfo[idx].first_idx]->op);
3333 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3334 }
3335 update_stmt (cond_stmt);
3336 }
3337 if (bb == first_bb)
3338 break;
3339 }
3340 }
3341 }
3342
3343 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3344 of STMT in it's operands. This is also known as a "destructive
3345 update" operation. */
3346
3347 static bool
3348 is_phi_for_stmt (gimple stmt, tree operand)
3349 {
3350 gimple def_stmt;
3351 gphi *def_phi;
3352 tree lhs;
3353 use_operand_p arg_p;
3354 ssa_op_iter i;
3355
3356 if (TREE_CODE (operand) != SSA_NAME)
3357 return false;
3358
3359 lhs = gimple_assign_lhs (stmt);
3360
3361 def_stmt = SSA_NAME_DEF_STMT (operand);
3362 def_phi = dyn_cast <gphi *> (def_stmt);
3363 if (!def_phi)
3364 return false;
3365
3366 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3367 if (lhs == USE_FROM_PTR (arg_p))
3368 return true;
3369 return false;
3370 }
3371
3372 /* Remove def stmt of VAR if VAR has zero uses and recurse
3373 on rhs1 operand if so. */
3374
3375 static void
3376 remove_visited_stmt_chain (tree var)
3377 {
3378 gimple stmt;
3379 gimple_stmt_iterator gsi;
3380
3381 while (1)
3382 {
3383 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3384 return;
3385 stmt = SSA_NAME_DEF_STMT (var);
3386 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3387 {
3388 var = gimple_assign_rhs1 (stmt);
3389 gsi = gsi_for_stmt (stmt);
3390 reassoc_remove_stmt (&gsi);
3391 release_defs (stmt);
3392 }
3393 else
3394 return;
3395 }
3396 }
3397
3398 /* This function checks three consequtive operands in
3399 passed operands vector OPS starting from OPINDEX and
3400 swaps two operands if it is profitable for binary operation
3401 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3402
3403 We pair ops with the same rank if possible.
3404
3405 The alternative we try is to see if STMT is a destructive
3406 update style statement, which is like:
3407 b = phi (a, ...)
3408 a = c + b;
3409 In that case, we want to use the destructive update form to
3410 expose the possible vectorizer sum reduction opportunity.
3411 In that case, the third operand will be the phi node. This
3412 check is not performed if STMT is null.
3413
3414 We could, of course, try to be better as noted above, and do a
3415 lot of work to try to find these opportunities in >3 operand
3416 cases, but it is unlikely to be worth it. */
3417
3418 static void
3419 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3420 unsigned int opindex, gimple stmt)
3421 {
3422 operand_entry_t oe1, oe2, oe3;
3423
3424 oe1 = ops[opindex];
3425 oe2 = ops[opindex + 1];
3426 oe3 = ops[opindex + 2];
3427
3428 if ((oe1->rank == oe2->rank
3429 && oe2->rank != oe3->rank)
3430 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3431 && !is_phi_for_stmt (stmt, oe1->op)
3432 && !is_phi_for_stmt (stmt, oe2->op)))
3433 {
3434 struct operand_entry temp = *oe3;
3435 oe3->op = oe1->op;
3436 oe3->rank = oe1->rank;
3437 oe1->op = temp.op;
3438 oe1->rank= temp.rank;
3439 }
3440 else if ((oe1->rank == oe3->rank
3441 && oe2->rank != oe3->rank)
3442 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3443 && !is_phi_for_stmt (stmt, oe1->op)
3444 && !is_phi_for_stmt (stmt, oe3->op)))
3445 {
3446 struct operand_entry temp = *oe2;
3447 oe2->op = oe1->op;
3448 oe2->rank = oe1->rank;
3449 oe1->op = temp.op;
3450 oe1->rank = temp.rank;
3451 }
3452 }
3453
3454 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3455 two definitions, otherwise return STMT. */
3456
3457 static inline gimple
3458 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3459 {
3460 if (TREE_CODE (rhs1) == SSA_NAME
3461 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3462 stmt = SSA_NAME_DEF_STMT (rhs1);
3463 if (TREE_CODE (rhs2) == SSA_NAME
3464 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3465 stmt = SSA_NAME_DEF_STMT (rhs2);
3466 return stmt;
3467 }
3468
3469 /* Recursively rewrite our linearized statements so that the operators
3470 match those in OPS[OPINDEX], putting the computation in rank
3471 order. Return new lhs. */
3472
3473 static tree
3474 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3475 vec<operand_entry_t> ops, bool changed)
3476 {
3477 tree rhs1 = gimple_assign_rhs1 (stmt);
3478 tree rhs2 = gimple_assign_rhs2 (stmt);
3479 tree lhs = gimple_assign_lhs (stmt);
3480 operand_entry_t oe;
3481
3482 /* The final recursion case for this function is that you have
3483 exactly two operations left.
3484 If we had exactly one op in the entire list to start with, we
3485 would have never called this function, and the tail recursion
3486 rewrites them one at a time. */
3487 if (opindex + 2 == ops.length ())
3488 {
3489 operand_entry_t oe1, oe2;
3490
3491 oe1 = ops[opindex];
3492 oe2 = ops[opindex + 1];
3493
3494 if (rhs1 != oe1->op || rhs2 != oe2->op)
3495 {
3496 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3497 unsigned int uid = gimple_uid (stmt);
3498
3499 if (dump_file && (dump_flags & TDF_DETAILS))
3500 {
3501 fprintf (dump_file, "Transforming ");
3502 print_gimple_stmt (dump_file, stmt, 0, 0);
3503 }
3504
3505 /* Even when changed is false, reassociation could have e.g. removed
3506 some redundant operations, so unless we are just swapping the
3507 arguments or unless there is no change at all (then we just
3508 return lhs), force creation of a new SSA_NAME. */
3509 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3510 {
3511 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3512 lhs = make_ssa_name (TREE_TYPE (lhs));
3513 stmt
3514 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3515 oe1->op, oe2->op);
3516 gimple_set_uid (stmt, uid);
3517 gimple_set_visited (stmt, true);
3518 if (insert_point == gsi_stmt (gsi))
3519 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3520 else
3521 insert_stmt_after (stmt, insert_point);
3522 }
3523 else
3524 {
3525 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3526 == stmt);
3527 gimple_assign_set_rhs1 (stmt, oe1->op);
3528 gimple_assign_set_rhs2 (stmt, oe2->op);
3529 update_stmt (stmt);
3530 }
3531
3532 if (rhs1 != oe1->op && rhs1 != oe2->op)
3533 remove_visited_stmt_chain (rhs1);
3534
3535 if (dump_file && (dump_flags & TDF_DETAILS))
3536 {
3537 fprintf (dump_file, " into ");
3538 print_gimple_stmt (dump_file, stmt, 0, 0);
3539 }
3540 }
3541 return lhs;
3542 }
3543
3544 /* If we hit here, we should have 3 or more ops left. */
3545 gcc_assert (opindex + 2 < ops.length ());
3546
3547 /* Rewrite the next operator. */
3548 oe = ops[opindex];
3549
3550 /* Recurse on the LHS of the binary operator, which is guaranteed to
3551 be the non-leaf side. */
3552 tree new_rhs1
3553 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3554 changed || oe->op != rhs2);
3555
3556 if (oe->op != rhs2 || new_rhs1 != rhs1)
3557 {
3558 if (dump_file && (dump_flags & TDF_DETAILS))
3559 {
3560 fprintf (dump_file, "Transforming ");
3561 print_gimple_stmt (dump_file, stmt, 0, 0);
3562 }
3563
3564 /* If changed is false, this is either opindex == 0
3565 or all outer rhs2's were equal to corresponding oe->op,
3566 and powi_result is NULL.
3567 That means lhs is equivalent before and after reassociation.
3568 Otherwise ensure the old lhs SSA_NAME is not reused and
3569 create a new stmt as well, so that any debug stmts will be
3570 properly adjusted. */
3571 if (changed)
3572 {
3573 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3574 unsigned int uid = gimple_uid (stmt);
3575 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3576
3577 lhs = make_ssa_name (TREE_TYPE (lhs));
3578 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3579 new_rhs1, oe->op);
3580 gimple_set_uid (stmt, uid);
3581 gimple_set_visited (stmt, true);
3582 if (insert_point == gsi_stmt (gsi))
3583 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3584 else
3585 insert_stmt_after (stmt, insert_point);
3586 }
3587 else
3588 {
3589 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3590 == stmt);
3591 gimple_assign_set_rhs1 (stmt, new_rhs1);
3592 gimple_assign_set_rhs2 (stmt, oe->op);
3593 update_stmt (stmt);
3594 }
3595
3596 if (dump_file && (dump_flags & TDF_DETAILS))
3597 {
3598 fprintf (dump_file, " into ");
3599 print_gimple_stmt (dump_file, stmt, 0, 0);
3600 }
3601 }
3602 return lhs;
3603 }
3604
3605 /* Find out how many cycles we need to compute statements chain.
3606 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3607 maximum number of independent statements we may execute per cycle. */
3608
3609 static int
3610 get_required_cycles (int ops_num, int cpu_width)
3611 {
3612 int res;
3613 int elog;
3614 unsigned int rest;
3615
3616 /* While we have more than 2 * cpu_width operands
3617 we may reduce number of operands by cpu_width
3618 per cycle. */
3619 res = ops_num / (2 * cpu_width);
3620
3621 /* Remained operands count may be reduced twice per cycle
3622 until we have only one operand. */
3623 rest = (unsigned)(ops_num - res * cpu_width);
3624 elog = exact_log2 (rest);
3625 if (elog >= 0)
3626 res += elog;
3627 else
3628 res += floor_log2 (rest) + 1;
3629
3630 return res;
3631 }
3632
3633 /* Returns an optimal number of registers to use for computation of
3634 given statements. */
3635
3636 static int
3637 get_reassociation_width (int ops_num, enum tree_code opc,
3638 machine_mode mode)
3639 {
3640 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3641 int width;
3642 int width_min;
3643 int cycles_best;
3644
3645 if (param_width > 0)
3646 width = param_width;
3647 else
3648 width = targetm.sched.reassociation_width (opc, mode);
3649
3650 if (width == 1)
3651 return width;
3652
3653 /* Get the minimal time required for sequence computation. */
3654 cycles_best = get_required_cycles (ops_num, width);
3655
3656 /* Check if we may use less width and still compute sequence for
3657 the same time. It will allow us to reduce registers usage.
3658 get_required_cycles is monotonically increasing with lower width
3659 so we can perform a binary search for the minimal width that still
3660 results in the optimal cycle count. */
3661 width_min = 1;
3662 while (width > width_min)
3663 {
3664 int width_mid = (width + width_min) / 2;
3665
3666 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3667 width = width_mid;
3668 else if (width_min < width_mid)
3669 width_min = width_mid;
3670 else
3671 break;
3672 }
3673
3674 return width;
3675 }
3676
3677 /* Recursively rewrite our linearized statements so that the operators
3678 match those in OPS[OPINDEX], putting the computation in rank
3679 order and trying to allow operations to be executed in
3680 parallel. */
3681
3682 static void
3683 rewrite_expr_tree_parallel (gassign *stmt, int width,
3684 vec<operand_entry_t> ops)
3685 {
3686 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3687 int op_num = ops.length ();
3688 int stmt_num = op_num - 1;
3689 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3690 int op_index = op_num - 1;
3691 int stmt_index = 0;
3692 int ready_stmts_end = 0;
3693 int i = 0;
3694 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3695
3696 /* We start expression rewriting from the top statements.
3697 So, in this loop we create a full list of statements
3698 we will work with. */
3699 stmts[stmt_num - 1] = stmt;
3700 for (i = stmt_num - 2; i >= 0; i--)
3701 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3702
3703 for (i = 0; i < stmt_num; i++)
3704 {
3705 tree op1, op2;
3706
3707 /* Determine whether we should use results of
3708 already handled statements or not. */
3709 if (ready_stmts_end == 0
3710 && (i - stmt_index >= width || op_index < 1))
3711 ready_stmts_end = i;
3712
3713 /* Now we choose operands for the next statement. Non zero
3714 value in ready_stmts_end means here that we should use
3715 the result of already generated statements as new operand. */
3716 if (ready_stmts_end > 0)
3717 {
3718 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3719 if (ready_stmts_end > stmt_index)
3720 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3721 else if (op_index >= 0)
3722 op2 = ops[op_index--]->op;
3723 else
3724 {
3725 gcc_assert (stmt_index < i);
3726 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3727 }
3728
3729 if (stmt_index >= ready_stmts_end)
3730 ready_stmts_end = 0;
3731 }
3732 else
3733 {
3734 if (op_index > 1)
3735 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3736 op2 = ops[op_index--]->op;
3737 op1 = ops[op_index--]->op;
3738 }
3739
3740 /* If we emit the last statement then we should put
3741 operands into the last statement. It will also
3742 break the loop. */
3743 if (op_index < 0 && stmt_index == i)
3744 i = stmt_num - 1;
3745
3746 if (dump_file && (dump_flags & TDF_DETAILS))
3747 {
3748 fprintf (dump_file, "Transforming ");
3749 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3750 }
3751
3752 /* We keep original statement only for the last one. All
3753 others are recreated. */
3754 if (i == stmt_num - 1)
3755 {
3756 gimple_assign_set_rhs1 (stmts[i], op1);
3757 gimple_assign_set_rhs2 (stmts[i], op2);
3758 update_stmt (stmts[i]);
3759 }
3760 else
3761 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3762
3763 if (dump_file && (dump_flags & TDF_DETAILS))
3764 {
3765 fprintf (dump_file, " into ");
3766 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3767 }
3768 }
3769
3770 remove_visited_stmt_chain (last_rhs1);
3771 }
3772
3773 /* Transform STMT, which is really (A +B) + (C + D) into the left
3774 linear form, ((A+B)+C)+D.
3775 Recurse on D if necessary. */
3776
3777 static void
3778 linearize_expr (gimple stmt)
3779 {
3780 gimple_stmt_iterator gsi;
3781 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3782 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3783 gimple oldbinrhs = binrhs;
3784 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3785 gimple newbinrhs = NULL;
3786 struct loop *loop = loop_containing_stmt (stmt);
3787 tree lhs = gimple_assign_lhs (stmt);
3788
3789 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3790 && is_reassociable_op (binrhs, rhscode, loop));
3791
3792 gsi = gsi_for_stmt (stmt);
3793
3794 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3795 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3796 gimple_assign_rhs_code (binrhs),
3797 gimple_assign_lhs (binlhs),
3798 gimple_assign_rhs2 (binrhs));
3799 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3800 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3801 gimple_set_uid (binrhs, gimple_uid (stmt));
3802
3803 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3804 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3805
3806 if (dump_file && (dump_flags & TDF_DETAILS))
3807 {
3808 fprintf (dump_file, "Linearized: ");
3809 print_gimple_stmt (dump_file, stmt, 0, 0);
3810 }
3811
3812 reassociate_stats.linearized++;
3813 update_stmt (stmt);
3814
3815 gsi = gsi_for_stmt (oldbinrhs);
3816 reassoc_remove_stmt (&gsi);
3817 release_defs (oldbinrhs);
3818
3819 gimple_set_visited (stmt, true);
3820 gimple_set_visited (binlhs, true);
3821 gimple_set_visited (binrhs, true);
3822
3823 /* Tail recurse on the new rhs if it still needs reassociation. */
3824 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3825 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3826 want to change the algorithm while converting to tuples. */
3827 linearize_expr (stmt);
3828 }
3829
3830 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3831 it. Otherwise, return NULL. */
3832
3833 static gimple
3834 get_single_immediate_use (tree lhs)
3835 {
3836 use_operand_p immuse;
3837 gimple immusestmt;
3838
3839 if (TREE_CODE (lhs) == SSA_NAME
3840 && single_imm_use (lhs, &immuse, &immusestmt)
3841 && is_gimple_assign (immusestmt))
3842 return immusestmt;
3843
3844 return NULL;
3845 }
3846
3847 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3848 representing the negated value. Insertions of any necessary
3849 instructions go before GSI.
3850 This function is recursive in that, if you hand it "a_5" as the
3851 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3852 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3853
3854 static tree
3855 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3856 {
3857 gimple negatedefstmt = NULL;
3858 tree resultofnegate;
3859 gimple_stmt_iterator gsi;
3860 unsigned int uid;
3861
3862 /* If we are trying to negate a name, defined by an add, negate the
3863 add operands instead. */
3864 if (TREE_CODE (tonegate) == SSA_NAME)
3865 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3866 if (TREE_CODE (tonegate) == SSA_NAME
3867 && is_gimple_assign (negatedefstmt)
3868 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3869 && has_single_use (gimple_assign_lhs (negatedefstmt))
3870 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3871 {
3872 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3873 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3874 tree lhs = gimple_assign_lhs (negatedefstmt);
3875 gimple g;
3876
3877 gsi = gsi_for_stmt (negatedefstmt);
3878 rhs1 = negate_value (rhs1, &gsi);
3879
3880 gsi = gsi_for_stmt (negatedefstmt);
3881 rhs2 = negate_value (rhs2, &gsi);
3882
3883 gsi = gsi_for_stmt (negatedefstmt);
3884 lhs = make_ssa_name (TREE_TYPE (lhs));
3885 gimple_set_visited (negatedefstmt, true);
3886 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3887 gimple_set_uid (g, gimple_uid (negatedefstmt));
3888 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3889 return lhs;
3890 }
3891
3892 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3893 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3894 NULL_TREE, true, GSI_SAME_STMT);
3895 gsi = *gsip;
3896 uid = gimple_uid (gsi_stmt (gsi));
3897 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3898 {
3899 gimple stmt = gsi_stmt (gsi);
3900 if (gimple_uid (stmt) != 0)
3901 break;
3902 gimple_set_uid (stmt, uid);
3903 }
3904 return resultofnegate;
3905 }
3906
3907 /* Return true if we should break up the subtract in STMT into an add
3908 with negate. This is true when we the subtract operands are really
3909 adds, or the subtract itself is used in an add expression. In
3910 either case, breaking up the subtract into an add with negate
3911 exposes the adds to reassociation. */
3912
3913 static bool
3914 should_break_up_subtract (gimple stmt)
3915 {
3916 tree lhs = gimple_assign_lhs (stmt);
3917 tree binlhs = gimple_assign_rhs1 (stmt);
3918 tree binrhs = gimple_assign_rhs2 (stmt);
3919 gimple immusestmt;
3920 struct loop *loop = loop_containing_stmt (stmt);
3921
3922 if (TREE_CODE (binlhs) == SSA_NAME
3923 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3924 return true;
3925
3926 if (TREE_CODE (binrhs) == SSA_NAME
3927 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3928 return true;
3929
3930 if (TREE_CODE (lhs) == SSA_NAME
3931 && (immusestmt = get_single_immediate_use (lhs))
3932 && is_gimple_assign (immusestmt)
3933 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3934 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3935 return true;
3936 return false;
3937 }
3938
3939 /* Transform STMT from A - B into A + -B. */
3940
3941 static void
3942 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3943 {
3944 tree rhs1 = gimple_assign_rhs1 (stmt);
3945 tree rhs2 = gimple_assign_rhs2 (stmt);
3946
3947 if (dump_file && (dump_flags & TDF_DETAILS))
3948 {
3949 fprintf (dump_file, "Breaking up subtract ");
3950 print_gimple_stmt (dump_file, stmt, 0, 0);
3951 }
3952
3953 rhs2 = negate_value (rhs2, gsip);
3954 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3955 update_stmt (stmt);
3956 }
3957
3958 /* Determine whether STMT is a builtin call that raises an SSA name
3959 to an integer power and has only one use. If so, and this is early
3960 reassociation and unsafe math optimizations are permitted, place
3961 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3962 If any of these conditions does not hold, return FALSE. */
3963
3964 static bool
3965 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3966 {
3967 tree fndecl, arg1;
3968 REAL_VALUE_TYPE c, cint;
3969
3970 if (!first_pass_instance
3971 || !flag_unsafe_math_optimizations
3972 || !is_gimple_call (stmt)
3973 || !has_single_use (gimple_call_lhs (stmt)))
3974 return false;
3975
3976 fndecl = gimple_call_fndecl (stmt);
3977
3978 if (!fndecl
3979 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3980 return false;
3981
3982 switch (DECL_FUNCTION_CODE (fndecl))
3983 {
3984 CASE_FLT_FN (BUILT_IN_POW):
3985 if (flag_errno_math)
3986 return false;
3987
3988 *base = gimple_call_arg (stmt, 0);
3989 arg1 = gimple_call_arg (stmt, 1);
3990
3991 if (TREE_CODE (arg1) != REAL_CST)
3992 return false;
3993
3994 c = TREE_REAL_CST (arg1);
3995
3996 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3997 return false;
3998
3999 *exponent = real_to_integer (&c);
4000 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4001 if (!real_identical (&c, &cint))
4002 return false;
4003
4004 break;
4005
4006 CASE_FLT_FN (BUILT_IN_POWI):
4007 *base = gimple_call_arg (stmt, 0);
4008 arg1 = gimple_call_arg (stmt, 1);
4009
4010 if (!tree_fits_shwi_p (arg1))
4011 return false;
4012
4013 *exponent = tree_to_shwi (arg1);
4014 break;
4015
4016 default:
4017 return false;
4018 }
4019
4020 /* Expanding negative exponents is generally unproductive, so we don't
4021 complicate matters with those. Exponents of zero and one should
4022 have been handled by expression folding. */
4023 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4024 return false;
4025
4026 return true;
4027 }
4028
4029 /* Recursively linearize a binary expression that is the RHS of STMT.
4030 Place the operands of the expression tree in the vector named OPS. */
4031
4032 static void
4033 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4034 bool is_associative, bool set_visited)
4035 {
4036 tree binlhs = gimple_assign_rhs1 (stmt);
4037 tree binrhs = gimple_assign_rhs2 (stmt);
4038 gimple binlhsdef = NULL, binrhsdef = NULL;
4039 bool binlhsisreassoc = false;
4040 bool binrhsisreassoc = false;
4041 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4042 struct loop *loop = loop_containing_stmt (stmt);
4043 tree base = NULL_TREE;
4044 HOST_WIDE_INT exponent = 0;
4045
4046 if (set_visited)
4047 gimple_set_visited (stmt, true);
4048
4049 if (TREE_CODE (binlhs) == SSA_NAME)
4050 {
4051 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4052 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4053 && !stmt_could_throw_p (binlhsdef));
4054 }
4055
4056 if (TREE_CODE (binrhs) == SSA_NAME)
4057 {
4058 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4059 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4060 && !stmt_could_throw_p (binrhsdef));
4061 }
4062
4063 /* If the LHS is not reassociable, but the RHS is, we need to swap
4064 them. If neither is reassociable, there is nothing we can do, so
4065 just put them in the ops vector. If the LHS is reassociable,
4066 linearize it. If both are reassociable, then linearize the RHS
4067 and the LHS. */
4068
4069 if (!binlhsisreassoc)
4070 {
4071 /* If this is not a associative operation like division, give up. */
4072 if (!is_associative)
4073 {
4074 add_to_ops_vec (ops, binrhs);
4075 return;
4076 }
4077
4078 if (!binrhsisreassoc)
4079 {
4080 if (rhscode == MULT_EXPR
4081 && TREE_CODE (binrhs) == SSA_NAME
4082 && acceptable_pow_call (binrhsdef, &base, &exponent))
4083 {
4084 add_repeat_to_ops_vec (ops, base, exponent);
4085 gimple_set_visited (binrhsdef, true);
4086 }
4087 else
4088 add_to_ops_vec (ops, binrhs);
4089
4090 if (rhscode == MULT_EXPR
4091 && TREE_CODE (binlhs) == SSA_NAME
4092 && acceptable_pow_call (binlhsdef, &base, &exponent))
4093 {
4094 add_repeat_to_ops_vec (ops, base, exponent);
4095 gimple_set_visited (binlhsdef, true);
4096 }
4097 else
4098 add_to_ops_vec (ops, binlhs);
4099
4100 return;
4101 }
4102
4103 if (dump_file && (dump_flags & TDF_DETAILS))
4104 {
4105 fprintf (dump_file, "swapping operands of ");
4106 print_gimple_stmt (dump_file, stmt, 0, 0);
4107 }
4108
4109 swap_ssa_operands (stmt,
4110 gimple_assign_rhs1_ptr (stmt),
4111 gimple_assign_rhs2_ptr (stmt));
4112 update_stmt (stmt);
4113
4114 if (dump_file && (dump_flags & TDF_DETAILS))
4115 {
4116 fprintf (dump_file, " is now ");
4117 print_gimple_stmt (dump_file, stmt, 0, 0);
4118 }
4119
4120 /* We want to make it so the lhs is always the reassociative op,
4121 so swap. */
4122 std::swap (binlhs, binrhs);
4123 }
4124 else if (binrhsisreassoc)
4125 {
4126 linearize_expr (stmt);
4127 binlhs = gimple_assign_rhs1 (stmt);
4128 binrhs = gimple_assign_rhs2 (stmt);
4129 }
4130
4131 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4132 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4133 rhscode, loop));
4134 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4135 is_associative, set_visited);
4136
4137 if (rhscode == MULT_EXPR
4138 && TREE_CODE (binrhs) == SSA_NAME
4139 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4140 {
4141 add_repeat_to_ops_vec (ops, base, exponent);
4142 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4143 }
4144 else
4145 add_to_ops_vec (ops, binrhs);
4146 }
4147
4148 /* Repropagate the negates back into subtracts, since no other pass
4149 currently does it. */
4150
4151 static void
4152 repropagate_negates (void)
4153 {
4154 unsigned int i = 0;
4155 tree negate;
4156
4157 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4158 {
4159 gimple user = get_single_immediate_use (negate);
4160
4161 if (!user || !is_gimple_assign (user))
4162 continue;
4163
4164 /* The negate operand can be either operand of a PLUS_EXPR
4165 (it can be the LHS if the RHS is a constant for example).
4166
4167 Force the negate operand to the RHS of the PLUS_EXPR, then
4168 transform the PLUS_EXPR into a MINUS_EXPR. */
4169 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4170 {
4171 /* If the negated operand appears on the LHS of the
4172 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4173 to force the negated operand to the RHS of the PLUS_EXPR. */
4174 if (gimple_assign_rhs1 (user) == negate)
4175 {
4176 swap_ssa_operands (user,
4177 gimple_assign_rhs1_ptr (user),
4178 gimple_assign_rhs2_ptr (user));
4179 }
4180
4181 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4182 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4183 if (gimple_assign_rhs2 (user) == negate)
4184 {
4185 tree rhs1 = gimple_assign_rhs1 (user);
4186 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4187 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4188 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4189 update_stmt (user);
4190 }
4191 }
4192 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4193 {
4194 if (gimple_assign_rhs1 (user) == negate)
4195 {
4196 /* We have
4197 x = -a
4198 y = x - b
4199 which we transform into
4200 x = a + b
4201 y = -x .
4202 This pushes down the negate which we possibly can merge
4203 into some other operation, hence insert it into the
4204 plus_negates vector. */
4205 gimple feed = SSA_NAME_DEF_STMT (negate);
4206 tree a = gimple_assign_rhs1 (feed);
4207 tree b = gimple_assign_rhs2 (user);
4208 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4209 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4210 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4211 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4212 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4213 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4214 user = gsi_stmt (gsi2);
4215 update_stmt (user);
4216 reassoc_remove_stmt (&gsi);
4217 release_defs (feed);
4218 plus_negates.safe_push (gimple_assign_lhs (user));
4219 }
4220 else
4221 {
4222 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4223 rid of one operation. */
4224 gimple feed = SSA_NAME_DEF_STMT (negate);
4225 tree a = gimple_assign_rhs1 (feed);
4226 tree rhs1 = gimple_assign_rhs1 (user);
4227 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4228 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4229 update_stmt (gsi_stmt (gsi));
4230 }
4231 }
4232 }
4233 }
4234
4235 /* Returns true if OP is of a type for which we can do reassociation.
4236 That is for integral or non-saturating fixed-point types, and for
4237 floating point type when associative-math is enabled. */
4238
4239 static bool
4240 can_reassociate_p (tree op)
4241 {
4242 tree type = TREE_TYPE (op);
4243 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4244 || NON_SAT_FIXED_POINT_TYPE_P (type)
4245 || (flag_associative_math && FLOAT_TYPE_P (type)))
4246 return true;
4247 return false;
4248 }
4249
4250 /* Break up subtract operations in block BB.
4251
4252 We do this top down because we don't know whether the subtract is
4253 part of a possible chain of reassociation except at the top.
4254
4255 IE given
4256 d = f + g
4257 c = a + e
4258 b = c - d
4259 q = b - r
4260 k = t - q
4261
4262 we want to break up k = t - q, but we won't until we've transformed q
4263 = b - r, which won't be broken up until we transform b = c - d.
4264
4265 En passant, clear the GIMPLE visited flag on every statement
4266 and set UIDs within each basic block. */
4267
4268 static void
4269 break_up_subtract_bb (basic_block bb)
4270 {
4271 gimple_stmt_iterator gsi;
4272 basic_block son;
4273 unsigned int uid = 1;
4274
4275 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4276 {
4277 gimple stmt = gsi_stmt (gsi);
4278 gimple_set_visited (stmt, false);
4279 gimple_set_uid (stmt, uid++);
4280
4281 if (!is_gimple_assign (stmt)
4282 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4283 continue;
4284
4285 /* Look for simple gimple subtract operations. */
4286 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4287 {
4288 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4289 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4290 continue;
4291
4292 /* Check for a subtract used only in an addition. If this
4293 is the case, transform it into add of a negate for better
4294 reassociation. IE transform C = A-B into C = A + -B if C
4295 is only used in an addition. */
4296 if (should_break_up_subtract (stmt))
4297 break_up_subtract (stmt, &gsi);
4298 }
4299 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4300 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4301 plus_negates.safe_push (gimple_assign_lhs (stmt));
4302 }
4303 for (son = first_dom_son (CDI_DOMINATORS, bb);
4304 son;
4305 son = next_dom_son (CDI_DOMINATORS, son))
4306 break_up_subtract_bb (son);
4307 }
4308
4309 /* Used for repeated factor analysis. */
4310 struct repeat_factor_d
4311 {
4312 /* An SSA name that occurs in a multiply chain. */
4313 tree factor;
4314
4315 /* Cached rank of the factor. */
4316 unsigned rank;
4317
4318 /* Number of occurrences of the factor in the chain. */
4319 HOST_WIDE_INT count;
4320
4321 /* An SSA name representing the product of this factor and
4322 all factors appearing later in the repeated factor vector. */
4323 tree repr;
4324 };
4325
4326 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4327 typedef const struct repeat_factor_d *const_repeat_factor_t;
4328
4329
4330 static vec<repeat_factor> repeat_factor_vec;
4331
4332 /* Used for sorting the repeat factor vector. Sort primarily by
4333 ascending occurrence count, secondarily by descending rank. */
4334
4335 static int
4336 compare_repeat_factors (const void *x1, const void *x2)
4337 {
4338 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4339 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4340
4341 if (rf1->count != rf2->count)
4342 return rf1->count - rf2->count;
4343
4344 return rf2->rank - rf1->rank;
4345 }
4346
4347 /* Look for repeated operands in OPS in the multiply tree rooted at
4348 STMT. Replace them with an optimal sequence of multiplies and powi
4349 builtin calls, and remove the used operands from OPS. Return an
4350 SSA name representing the value of the replacement sequence. */
4351
4352 static tree
4353 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4354 {
4355 unsigned i, j, vec_len;
4356 int ii;
4357 operand_entry_t oe;
4358 repeat_factor_t rf1, rf2;
4359 repeat_factor rfnew;
4360 tree result = NULL_TREE;
4361 tree target_ssa, iter_result;
4362 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4363 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4364 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4365 gimple mul_stmt, pow_stmt;
4366
4367 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4368 target. */
4369 if (!powi_fndecl)
4370 return NULL_TREE;
4371
4372 /* Allocate the repeated factor vector. */
4373 repeat_factor_vec.create (10);
4374
4375 /* Scan the OPS vector for all SSA names in the product and build
4376 up a vector of occurrence counts for each factor. */
4377 FOR_EACH_VEC_ELT (*ops, i, oe)
4378 {
4379 if (TREE_CODE (oe->op) == SSA_NAME)
4380 {
4381 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4382 {
4383 if (rf1->factor == oe->op)
4384 {
4385 rf1->count += oe->count;
4386 break;
4387 }
4388 }
4389
4390 if (j >= repeat_factor_vec.length ())
4391 {
4392 rfnew.factor = oe->op;
4393 rfnew.rank = oe->rank;
4394 rfnew.count = oe->count;
4395 rfnew.repr = NULL_TREE;
4396 repeat_factor_vec.safe_push (rfnew);
4397 }
4398 }
4399 }
4400
4401 /* Sort the repeated factor vector by (a) increasing occurrence count,
4402 and (b) decreasing rank. */
4403 repeat_factor_vec.qsort (compare_repeat_factors);
4404
4405 /* It is generally best to combine as many base factors as possible
4406 into a product before applying __builtin_powi to the result.
4407 However, the sort order chosen for the repeated factor vector
4408 allows us to cache partial results for the product of the base
4409 factors for subsequent use. When we already have a cached partial
4410 result from a previous iteration, it is best to make use of it
4411 before looking for another __builtin_pow opportunity.
4412
4413 As an example, consider x * x * y * y * y * z * z * z * z.
4414 We want to first compose the product x * y * z, raise it to the
4415 second power, then multiply this by y * z, and finally multiply
4416 by z. This can be done in 5 multiplies provided we cache y * z
4417 for use in both expressions:
4418
4419 t1 = y * z
4420 t2 = t1 * x
4421 t3 = t2 * t2
4422 t4 = t1 * t3
4423 result = t4 * z
4424
4425 If we instead ignored the cached y * z and first multiplied by
4426 the __builtin_pow opportunity z * z, we would get the inferior:
4427
4428 t1 = y * z
4429 t2 = t1 * x
4430 t3 = t2 * t2
4431 t4 = z * z
4432 t5 = t3 * t4
4433 result = t5 * y */
4434
4435 vec_len = repeat_factor_vec.length ();
4436
4437 /* Repeatedly look for opportunities to create a builtin_powi call. */
4438 while (true)
4439 {
4440 HOST_WIDE_INT power;
4441
4442 /* First look for the largest cached product of factors from
4443 preceding iterations. If found, create a builtin_powi for
4444 it if the minimum occurrence count for its factors is at
4445 least 2, or just use this cached product as our next
4446 multiplicand if the minimum occurrence count is 1. */
4447 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4448 {
4449 if (rf1->repr && rf1->count > 0)
4450 break;
4451 }
4452
4453 if (j < vec_len)
4454 {
4455 power = rf1->count;
4456
4457 if (power == 1)
4458 {
4459 iter_result = rf1->repr;
4460
4461 if (dump_file && (dump_flags & TDF_DETAILS))
4462 {
4463 unsigned elt;
4464 repeat_factor_t rf;
4465 fputs ("Multiplying by cached product ", dump_file);
4466 for (elt = j; elt < vec_len; elt++)
4467 {
4468 rf = &repeat_factor_vec[elt];
4469 print_generic_expr (dump_file, rf->factor, 0);
4470 if (elt < vec_len - 1)
4471 fputs (" * ", dump_file);
4472 }
4473 fputs ("\n", dump_file);
4474 }
4475 }
4476 else
4477 {
4478 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4479 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4480 build_int_cst (integer_type_node,
4481 power));
4482 gimple_call_set_lhs (pow_stmt, iter_result);
4483 gimple_set_location (pow_stmt, gimple_location (stmt));
4484 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4485
4486 if (dump_file && (dump_flags & TDF_DETAILS))
4487 {
4488 unsigned elt;
4489 repeat_factor_t rf;
4490 fputs ("Building __builtin_pow call for cached product (",
4491 dump_file);
4492 for (elt = j; elt < vec_len; elt++)
4493 {
4494 rf = &repeat_factor_vec[elt];
4495 print_generic_expr (dump_file, rf->factor, 0);
4496 if (elt < vec_len - 1)
4497 fputs (" * ", dump_file);
4498 }
4499 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4500 power);
4501 }
4502 }
4503 }
4504 else
4505 {
4506 /* Otherwise, find the first factor in the repeated factor
4507 vector whose occurrence count is at least 2. If no such
4508 factor exists, there are no builtin_powi opportunities
4509 remaining. */
4510 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4511 {
4512 if (rf1->count >= 2)
4513 break;
4514 }
4515
4516 if (j >= vec_len)
4517 break;
4518
4519 power = rf1->count;
4520
4521 if (dump_file && (dump_flags & TDF_DETAILS))
4522 {
4523 unsigned elt;
4524 repeat_factor_t rf;
4525 fputs ("Building __builtin_pow call for (", dump_file);
4526 for (elt = j; elt < vec_len; elt++)
4527 {
4528 rf = &repeat_factor_vec[elt];
4529 print_generic_expr (dump_file, rf->factor, 0);
4530 if (elt < vec_len - 1)
4531 fputs (" * ", dump_file);
4532 }
4533 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4534 }
4535
4536 reassociate_stats.pows_created++;
4537
4538 /* Visit each element of the vector in reverse order (so that
4539 high-occurrence elements are visited first, and within the
4540 same occurrence count, lower-ranked elements are visited
4541 first). Form a linear product of all elements in this order
4542 whose occurrencce count is at least that of element J.
4543 Record the SSA name representing the product of each element
4544 with all subsequent elements in the vector. */
4545 if (j == vec_len - 1)
4546 rf1->repr = rf1->factor;
4547 else
4548 {
4549 for (ii = vec_len - 2; ii >= (int)j; ii--)
4550 {
4551 tree op1, op2;
4552
4553 rf1 = &repeat_factor_vec[ii];
4554 rf2 = &repeat_factor_vec[ii + 1];
4555
4556 /* Init the last factor's representative to be itself. */
4557 if (!rf2->repr)
4558 rf2->repr = rf2->factor;
4559
4560 op1 = rf1->factor;
4561 op2 = rf2->repr;
4562
4563 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4564 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4565 op1, op2);
4566 gimple_set_location (mul_stmt, gimple_location (stmt));
4567 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4568 rf1->repr = target_ssa;
4569
4570 /* Don't reprocess the multiply we just introduced. */
4571 gimple_set_visited (mul_stmt, true);
4572 }
4573 }
4574
4575 /* Form a call to __builtin_powi for the maximum product
4576 just formed, raised to the power obtained earlier. */
4577 rf1 = &repeat_factor_vec[j];
4578 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4579 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4580 build_int_cst (integer_type_node,
4581 power));
4582 gimple_call_set_lhs (pow_stmt, iter_result);
4583 gimple_set_location (pow_stmt, gimple_location (stmt));
4584 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4585 }
4586
4587 /* If we previously formed at least one other builtin_powi call,
4588 form the product of this one and those others. */
4589 if (result)
4590 {
4591 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4592 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4593 result, iter_result);
4594 gimple_set_location (mul_stmt, gimple_location (stmt));
4595 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4596 gimple_set_visited (mul_stmt, true);
4597 result = new_result;
4598 }
4599 else
4600 result = iter_result;
4601
4602 /* Decrement the occurrence count of each element in the product
4603 by the count found above, and remove this many copies of each
4604 factor from OPS. */
4605 for (i = j; i < vec_len; i++)
4606 {
4607 unsigned k = power;
4608 unsigned n;
4609
4610 rf1 = &repeat_factor_vec[i];
4611 rf1->count -= power;
4612
4613 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4614 {
4615 if (oe->op == rf1->factor)
4616 {
4617 if (oe->count <= k)
4618 {
4619 ops->ordered_remove (n);
4620 k -= oe->count;
4621
4622 if (k == 0)
4623 break;
4624 }
4625 else
4626 {
4627 oe->count -= k;
4628 break;
4629 }
4630 }
4631 }
4632 }
4633 }
4634
4635 /* At this point all elements in the repeated factor vector have a
4636 remaining occurrence count of 0 or 1, and those with a count of 1
4637 don't have cached representatives. Re-sort the ops vector and
4638 clean up. */
4639 ops->qsort (sort_by_operand_rank);
4640 repeat_factor_vec.release ();
4641
4642 /* Return the final product computed herein. Note that there may
4643 still be some elements with single occurrence count left in OPS;
4644 those will be handled by the normal reassociation logic. */
4645 return result;
4646 }
4647
4648 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4649
4650 static void
4651 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4652 {
4653 tree rhs1;
4654
4655 if (dump_file && (dump_flags & TDF_DETAILS))
4656 {
4657 fprintf (dump_file, "Transforming ");
4658 print_gimple_stmt (dump_file, stmt, 0, 0);
4659 }
4660
4661 rhs1 = gimple_assign_rhs1 (stmt);
4662 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4663 update_stmt (stmt);
4664 remove_visited_stmt_chain (rhs1);
4665
4666 if (dump_file && (dump_flags & TDF_DETAILS))
4667 {
4668 fprintf (dump_file, " into ");
4669 print_gimple_stmt (dump_file, stmt, 0, 0);
4670 }
4671 }
4672
4673 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4674
4675 static void
4676 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4677 tree rhs1, tree rhs2)
4678 {
4679 if (dump_file && (dump_flags & TDF_DETAILS))
4680 {
4681 fprintf (dump_file, "Transforming ");
4682 print_gimple_stmt (dump_file, stmt, 0, 0);
4683 }
4684
4685 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4686 update_stmt (gsi_stmt (*gsi));
4687 remove_visited_stmt_chain (rhs1);
4688
4689 if (dump_file && (dump_flags & TDF_DETAILS))
4690 {
4691 fprintf (dump_file, " into ");
4692 print_gimple_stmt (dump_file, stmt, 0, 0);
4693 }
4694 }
4695
4696 /* Reassociate expressions in basic block BB and its post-dominator as
4697 children. */
4698
4699 static void
4700 reassociate_bb (basic_block bb)
4701 {
4702 gimple_stmt_iterator gsi;
4703 basic_block son;
4704 gimple stmt = last_stmt (bb);
4705
4706 if (stmt && !gimple_visited_p (stmt))
4707 maybe_optimize_range_tests (stmt);
4708
4709 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4710 {
4711 stmt = gsi_stmt (gsi);
4712
4713 if (is_gimple_assign (stmt)
4714 && !stmt_could_throw_p (stmt))
4715 {
4716 tree lhs, rhs1, rhs2;
4717 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4718
4719 /* If this is not a gimple binary expression, there is
4720 nothing for us to do with it. */
4721 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4722 continue;
4723
4724 /* If this was part of an already processed statement,
4725 we don't need to touch it again. */
4726 if (gimple_visited_p (stmt))
4727 {
4728 /* This statement might have become dead because of previous
4729 reassociations. */
4730 if (has_zero_uses (gimple_get_lhs (stmt)))
4731 {
4732 reassoc_remove_stmt (&gsi);
4733 release_defs (stmt);
4734 /* We might end up removing the last stmt above which
4735 places the iterator to the end of the sequence.
4736 Reset it to the last stmt in this case which might
4737 be the end of the sequence as well if we removed
4738 the last statement of the sequence. In which case
4739 we need to bail out. */
4740 if (gsi_end_p (gsi))
4741 {
4742 gsi = gsi_last_bb (bb);
4743 if (gsi_end_p (gsi))
4744 break;
4745 }
4746 }
4747 continue;
4748 }
4749
4750 lhs = gimple_assign_lhs (stmt);
4751 rhs1 = gimple_assign_rhs1 (stmt);
4752 rhs2 = gimple_assign_rhs2 (stmt);
4753
4754 /* For non-bit or min/max operations we can't associate
4755 all types. Verify that here. */
4756 if (rhs_code != BIT_IOR_EXPR
4757 && rhs_code != BIT_AND_EXPR
4758 && rhs_code != BIT_XOR_EXPR
4759 && rhs_code != MIN_EXPR
4760 && rhs_code != MAX_EXPR
4761 && (!can_reassociate_p (lhs)
4762 || !can_reassociate_p (rhs1)
4763 || !can_reassociate_p (rhs2)))
4764 continue;
4765
4766 if (associative_tree_code (rhs_code))
4767 {
4768 auto_vec<operand_entry_t> ops;
4769 tree powi_result = NULL_TREE;
4770
4771 /* There may be no immediate uses left by the time we
4772 get here because we may have eliminated them all. */
4773 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4774 continue;
4775
4776 gimple_set_visited (stmt, true);
4777 linearize_expr_tree (&ops, stmt, true, true);
4778 ops.qsort (sort_by_operand_rank);
4779 optimize_ops_list (rhs_code, &ops);
4780 if (undistribute_ops_list (rhs_code, &ops,
4781 loop_containing_stmt (stmt)))
4782 {
4783 ops.qsort (sort_by_operand_rank);
4784 optimize_ops_list (rhs_code, &ops);
4785 }
4786
4787 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4788 optimize_range_tests (rhs_code, &ops);
4789
4790 if (first_pass_instance
4791 && rhs_code == MULT_EXPR
4792 && flag_unsafe_math_optimizations)
4793 powi_result = attempt_builtin_powi (stmt, &ops);
4794
4795 /* If the operand vector is now empty, all operands were
4796 consumed by the __builtin_powi optimization. */
4797 if (ops.length () == 0)
4798 transform_stmt_to_copy (&gsi, stmt, powi_result);
4799 else if (ops.length () == 1)
4800 {
4801 tree last_op = ops.last ()->op;
4802
4803 if (powi_result)
4804 transform_stmt_to_multiply (&gsi, stmt, last_op,
4805 powi_result);
4806 else
4807 transform_stmt_to_copy (&gsi, stmt, last_op);
4808 }
4809 else
4810 {
4811 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4812 int ops_num = ops.length ();
4813 int width = get_reassociation_width (ops_num, rhs_code, mode);
4814 tree new_lhs = lhs;
4815
4816 if (dump_file && (dump_flags & TDF_DETAILS))
4817 fprintf (dump_file,
4818 "Width = %d was chosen for reassociation\n", width);
4819
4820 if (width > 1
4821 && ops.length () > 3)
4822 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4823 width, ops);
4824 else
4825 {
4826 /* When there are three operands left, we want
4827 to make sure the ones that get the double
4828 binary op are chosen wisely. */
4829 int len = ops.length ();
4830 if (len >= 3)
4831 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4832
4833 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4834 powi_result != NULL);
4835 }
4836
4837 /* If we combined some repeated factors into a
4838 __builtin_powi call, multiply that result by the
4839 reassociated operands. */
4840 if (powi_result)
4841 {
4842 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4843 tree type = TREE_TYPE (lhs);
4844 tree target_ssa = make_temp_ssa_name (type, NULL,
4845 "reassocpow");
4846 gimple_set_lhs (lhs_stmt, target_ssa);
4847 update_stmt (lhs_stmt);
4848 if (lhs != new_lhs)
4849 target_ssa = new_lhs;
4850 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4851 powi_result, target_ssa);
4852 gimple_set_location (mul_stmt, gimple_location (stmt));
4853 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4854 }
4855 }
4856 }
4857 }
4858 }
4859 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4860 son;
4861 son = next_dom_son (CDI_POST_DOMINATORS, son))
4862 reassociate_bb (son);
4863 }
4864
4865 /* Add jumps around shifts for range tests turned into bit tests.
4866 For each SSA_NAME VAR we have code like:
4867 VAR = ...; // final stmt of range comparison
4868 // bit test here...;
4869 OTHERVAR = ...; // final stmt of the bit test sequence
4870 RES = VAR | OTHERVAR;
4871 Turn the above into:
4872 VAR = ...;
4873 if (VAR != 0)
4874 goto <l3>;
4875 else
4876 goto <l2>;
4877 <l2>:
4878 // bit test here...;
4879 OTHERVAR = ...;
4880 <l3>:
4881 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4882
4883 static void
4884 branch_fixup (void)
4885 {
4886 tree var;
4887 unsigned int i;
4888
4889 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4890 {
4891 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4892 gimple use_stmt;
4893 use_operand_p use;
4894 bool ok = single_imm_use (var, &use, &use_stmt);
4895 gcc_assert (ok
4896 && is_gimple_assign (use_stmt)
4897 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4898 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4899
4900 basic_block cond_bb = gimple_bb (def_stmt);
4901 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4902 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4903
4904 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4905 gimple g = gimple_build_cond (NE_EXPR, var,
4906 build_zero_cst (TREE_TYPE (var)),
4907 NULL_TREE, NULL_TREE);
4908 location_t loc = gimple_location (use_stmt);
4909 gimple_set_location (g, loc);
4910 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4911
4912 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4913 etrue->probability = REG_BR_PROB_BASE / 2;
4914 etrue->count = cond_bb->count / 2;
4915 edge efalse = find_edge (cond_bb, then_bb);
4916 efalse->flags = EDGE_FALSE_VALUE;
4917 efalse->probability -= etrue->probability;
4918 efalse->count -= etrue->count;
4919 then_bb->count -= etrue->count;
4920
4921 tree othervar = NULL_TREE;
4922 if (gimple_assign_rhs1 (use_stmt) == var)
4923 othervar = gimple_assign_rhs2 (use_stmt);
4924 else if (gimple_assign_rhs2 (use_stmt) == var)
4925 othervar = gimple_assign_rhs1 (use_stmt);
4926 else
4927 gcc_unreachable ();
4928 tree lhs = gimple_assign_lhs (use_stmt);
4929 gphi *phi = create_phi_node (lhs, merge_bb);
4930 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4931 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4932 gsi = gsi_for_stmt (use_stmt);
4933 gsi_remove (&gsi, true);
4934
4935 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4936 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4937 }
4938 reassoc_branch_fixups.release ();
4939 }
4940
4941 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4942 void debug_ops_vector (vec<operand_entry_t> ops);
4943
4944 /* Dump the operand entry vector OPS to FILE. */
4945
4946 void
4947 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4948 {
4949 operand_entry_t oe;
4950 unsigned int i;
4951
4952 FOR_EACH_VEC_ELT (ops, i, oe)
4953 {
4954 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4955 print_generic_expr (file, oe->op, 0);
4956 }
4957 }
4958
4959 /* Dump the operand entry vector OPS to STDERR. */
4960
4961 DEBUG_FUNCTION void
4962 debug_ops_vector (vec<operand_entry_t> ops)
4963 {
4964 dump_ops_vector (stderr, ops);
4965 }
4966
4967 static void
4968 do_reassoc (void)
4969 {
4970 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4971 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4972 }
4973
4974 /* Initialize the reassociation pass. */
4975
4976 static void
4977 init_reassoc (void)
4978 {
4979 int i;
4980 long rank = 2;
4981 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4982
4983 /* Find the loops, so that we can prevent moving calculations in
4984 them. */
4985 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4986
4987 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4988
4989 next_operand_entry_id = 0;
4990
4991 /* Reverse RPO (Reverse Post Order) will give us something where
4992 deeper loops come later. */
4993 pre_and_rev_post_order_compute (NULL, bbs, false);
4994 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4995 operand_rank = new hash_map<tree, long>;
4996
4997 /* Give each default definition a distinct rank. This includes
4998 parameters and the static chain. Walk backwards over all
4999 SSA names so that we get proper rank ordering according
5000 to tree_swap_operands_p. */
5001 for (i = num_ssa_names - 1; i > 0; --i)
5002 {
5003 tree name = ssa_name (i);
5004 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5005 insert_operand_rank (name, ++rank);
5006 }
5007
5008 /* Set up rank for each BB */
5009 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5010 bb_rank[bbs[i]] = ++rank << 16;
5011
5012 free (bbs);
5013 calculate_dominance_info (CDI_POST_DOMINATORS);
5014 plus_negates = vNULL;
5015 }
5016
5017 /* Cleanup after the reassociation pass, and print stats if
5018 requested. */
5019
5020 static void
5021 fini_reassoc (void)
5022 {
5023 statistics_counter_event (cfun, "Linearized",
5024 reassociate_stats.linearized);
5025 statistics_counter_event (cfun, "Constants eliminated",
5026 reassociate_stats.constants_eliminated);
5027 statistics_counter_event (cfun, "Ops eliminated",
5028 reassociate_stats.ops_eliminated);
5029 statistics_counter_event (cfun, "Statements rewritten",
5030 reassociate_stats.rewritten);
5031 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5032 reassociate_stats.pows_encountered);
5033 statistics_counter_event (cfun, "Built-in powi calls created",
5034 reassociate_stats.pows_created);
5035
5036 delete operand_rank;
5037 operand_entry_pool.release ();
5038 free (bb_rank);
5039 plus_negates.release ();
5040 free_dominance_info (CDI_POST_DOMINATORS);
5041 loop_optimizer_finalize ();
5042 }
5043
5044 /* Gate and execute functions for Reassociation. */
5045
5046 static unsigned int
5047 execute_reassoc (void)
5048 {
5049 init_reassoc ();
5050
5051 do_reassoc ();
5052 repropagate_negates ();
5053 branch_fixup ();
5054
5055 fini_reassoc ();
5056 return 0;
5057 }
5058
5059 namespace {
5060
5061 const pass_data pass_data_reassoc =
5062 {
5063 GIMPLE_PASS, /* type */
5064 "reassoc", /* name */
5065 OPTGROUP_NONE, /* optinfo_flags */
5066 TV_TREE_REASSOC, /* tv_id */
5067 ( PROP_cfg | PROP_ssa ), /* properties_required */
5068 0, /* properties_provided */
5069 0, /* properties_destroyed */
5070 0, /* todo_flags_start */
5071 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5072 };
5073
5074 class pass_reassoc : public gimple_opt_pass
5075 {
5076 public:
5077 pass_reassoc (gcc::context *ctxt)
5078 : gimple_opt_pass (pass_data_reassoc, ctxt)
5079 {}
5080
5081 /* opt_pass methods: */
5082 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5083 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5084 virtual unsigned int execute (function *) { return execute_reassoc (); }
5085
5086 }; // class pass_reassoc
5087
5088 } // anon namespace
5089
5090 gimple_opt_pass *
5091 make_pass_reassoc (gcc::context *ctxt)
5092 {
5093 return new pass_reassoc (ctxt);
5094 }