tree-ssa-reassoc.c (get_rank): Simplify.
[gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "cfganal.h"
46 #include "basic-block.h"
47 #include "gimple-pretty-print.h"
48 #include "tree-inline.h"
49 #include "hash-map.h"
50 #include "tree-ssa-alias.h"
51 #include "internal-fn.h"
52 #include "gimple-fold.h"
53 #include "tree-eh.h"
54 #include "gimple-expr.h"
55 #include "is-a.h"
56 #include "gimple.h"
57 #include "gimple-iterator.h"
58 #include "gimplify-me.h"
59 #include "gimple-ssa.h"
60 #include "tree-cfg.h"
61 #include "tree-phinodes.h"
62 #include "ssa-iterators.h"
63 #include "stringpool.h"
64 #include "tree-ssanames.h"
65 #include "tree-ssa-loop-niter.h"
66 #include "tree-ssa-loop.h"
67 #include "hashtab.h"
68 #include "flags.h"
69 #include "statistics.h"
70 #include "real.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
73 #include "expmed.h"
74 #include "dojump.h"
75 #include "explow.h"
76 #include "calls.h"
77 #include "emit-rtl.h"
78 #include "varasm.h"
79 #include "stmt.h"
80 #include "expr.h"
81 #include "tree-dfa.h"
82 #include "tree-ssa.h"
83 #include "tree-iterator.h"
84 #include "tree-pass.h"
85 #include "alloc-pool.h"
86 #include "langhooks.h"
87 #include "cfgloop.h"
88 #include "target.h"
89 #include "params.h"
90 #include "diagnostic-core.h"
91 #include "builtins.h"
92 #include "gimplify.h"
93 #include "insn-codes.h"
94 #include "optabs.h"
95
96 /* This is a simple global reassociation pass. It is, in part, based
97 on the LLVM pass of the same name (They do some things more/less
98 than we do, in different orders, etc).
99
100 It consists of five steps:
101
102 1. Breaking up subtract operations into addition + negate, where
103 it would promote the reassociation of adds.
104
105 2. Left linearization of the expression trees, so that (A+B)+(C+D)
106 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
107 During linearization, we place the operands of the binary
108 expressions into a vector of operand_entry_t
109
110 3. Optimization of the operand lists, eliminating things like a +
111 -a, a & a, etc.
112
113 3a. Combine repeated factors with the same occurrence counts
114 into a __builtin_powi call that will later be optimized into
115 an optimal number of multiplies.
116
117 4. Rewrite the expression trees we linearized and optimized so
118 they are in proper rank order.
119
120 5. Repropagate negates, as nothing else will clean it up ATM.
121
122 A bit of theory on #4, since nobody seems to write anything down
123 about why it makes sense to do it the way they do it:
124
125 We could do this much nicer theoretically, but don't (for reasons
126 explained after how to do it theoretically nice :P).
127
128 In order to promote the most redundancy elimination, you want
129 binary expressions whose operands are the same rank (or
130 preferably, the same value) exposed to the redundancy eliminator,
131 for possible elimination.
132
133 So the way to do this if we really cared, is to build the new op
134 tree from the leaves to the roots, merging as you go, and putting the
135 new op on the end of the worklist, until you are left with one
136 thing on the worklist.
137
138 IE if you have to rewrite the following set of operands (listed with
139 rank in parentheses), with opcode PLUS_EXPR:
140
141 a (1), b (1), c (1), d (2), e (2)
142
143
144 We start with our merge worklist empty, and the ops list with all of
145 those on it.
146
147 You want to first merge all leaves of the same rank, as much as
148 possible.
149
150 So first build a binary op of
151
152 mergetmp = a + b, and put "mergetmp" on the merge worklist.
153
154 Because there is no three operand form of PLUS_EXPR, c is not going to
155 be exposed to redundancy elimination as a rank 1 operand.
156
157 So you might as well throw it on the merge worklist (you could also
158 consider it to now be a rank two operand, and merge it with d and e,
159 but in this case, you then have evicted e from a binary op. So at
160 least in this situation, you can't win.)
161
162 Then build a binary op of d + e
163 mergetmp2 = d + e
164
165 and put mergetmp2 on the merge worklist.
166
167 so merge worklist = {mergetmp, c, mergetmp2}
168
169 Continue building binary ops of these operations until you have only
170 one operation left on the worklist.
171
172 So we have
173
174 build binary op
175 mergetmp3 = mergetmp + c
176
177 worklist = {mergetmp2, mergetmp3}
178
179 mergetmp4 = mergetmp2 + mergetmp3
180
181 worklist = {mergetmp4}
182
183 because we have one operation left, we can now just set the original
184 statement equal to the result of that operation.
185
186 This will at least expose a + b and d + e to redundancy elimination
187 as binary operations.
188
189 For extra points, you can reuse the old statements to build the
190 mergetmps, since you shouldn't run out.
191
192 So why don't we do this?
193
194 Because it's expensive, and rarely will help. Most trees we are
195 reassociating have 3 or less ops. If they have 2 ops, they already
196 will be written into a nice single binary op. If you have 3 ops, a
197 single simple check suffices to tell you whether the first two are of the
198 same rank. If so, you know to order it
199
200 mergetmp = op1 + op2
201 newstmt = mergetmp + op3
202
203 instead of
204 mergetmp = op2 + op3
205 newstmt = mergetmp + op1
206
207 If all three are of the same rank, you can't expose them all in a
208 single binary operator anyway, so the above is *still* the best you
209 can do.
210
211 Thus, this is what we do. When we have three ops left, we check to see
212 what order to put them in, and call it a day. As a nod to vector sum
213 reduction, we check if any of the ops are really a phi node that is a
214 destructive update for the associating op, and keep the destructive
215 update together for vector sum reduction recognition. */
216
217
218 /* Statistics */
219 static struct
220 {
221 int linearized;
222 int constants_eliminated;
223 int ops_eliminated;
224 int rewritten;
225 int pows_encountered;
226 int pows_created;
227 } reassociate_stats;
228
229 /* Operator, rank pair. */
230 typedef struct operand_entry
231 {
232 unsigned int rank;
233 int id;
234 tree op;
235 unsigned int count;
236 } *operand_entry_t;
237
238 static alloc_pool operand_entry_pool;
239
240 /* This is used to assign a unique ID to each struct operand_entry
241 so that qsort results are identical on different hosts. */
242 static int next_operand_entry_id;
243
244 /* Starting rank number for a given basic block, so that we can rank
245 operations using unmovable instructions in that BB based on the bb
246 depth. */
247 static long *bb_rank;
248
249 /* Operand->rank hashtable. */
250 static hash_map<tree, long> *operand_rank;
251
252 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
253 all basic blocks the CFG should be adjusted - basic blocks
254 split right after that SSA_NAME's definition statement and before
255 the only use, which must be a bit ior. */
256 static vec<tree> reassoc_branch_fixups;
257
258 /* Forward decls. */
259 static long get_rank (tree);
260 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
261
262 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
263 possibly added by gsi_remove. */
264
265 bool
266 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
267 {
268 gimple stmt = gsi_stmt (*gsi);
269
270 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
271 return gsi_remove (gsi, true);
272
273 gimple_stmt_iterator prev = *gsi;
274 gsi_prev (&prev);
275 unsigned uid = gimple_uid (stmt);
276 basic_block bb = gimple_bb (stmt);
277 bool ret = gsi_remove (gsi, true);
278 if (!gsi_end_p (prev))
279 gsi_next (&prev);
280 else
281 prev = gsi_start_bb (bb);
282 gimple end_stmt = gsi_stmt (*gsi);
283 while ((stmt = gsi_stmt (prev)) != end_stmt)
284 {
285 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
286 gimple_set_uid (stmt, uid);
287 gsi_next (&prev);
288 }
289 return ret;
290 }
291
292 /* Bias amount for loop-carried phis. We want this to be larger than
293 the depth of any reassociation tree we can see, but not larger than
294 the rank difference between two blocks. */
295 #define PHI_LOOP_BIAS (1 << 15)
296
297 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
298 an innermost loop, and the phi has only a single use which is inside
299 the loop, then the rank is the block rank of the loop latch plus an
300 extra bias for the loop-carried dependence. This causes expressions
301 calculated into an accumulator variable to be independent for each
302 iteration of the loop. If STMT is some other phi, the rank is the
303 block rank of its containing block. */
304 static long
305 phi_rank (gimple stmt)
306 {
307 basic_block bb = gimple_bb (stmt);
308 struct loop *father = bb->loop_father;
309 tree res;
310 unsigned i;
311 use_operand_p use;
312 gimple use_stmt;
313
314 /* We only care about real loops (those with a latch). */
315 if (!father->latch)
316 return bb_rank[bb->index];
317
318 /* Interesting phis must be in headers of innermost loops. */
319 if (bb != father->header
320 || father->inner)
321 return bb_rank[bb->index];
322
323 /* Ignore virtual SSA_NAMEs. */
324 res = gimple_phi_result (stmt);
325 if (virtual_operand_p (res))
326 return bb_rank[bb->index];
327
328 /* The phi definition must have a single use, and that use must be
329 within the loop. Otherwise this isn't an accumulator pattern. */
330 if (!single_imm_use (res, &use, &use_stmt)
331 || gimple_bb (use_stmt)->loop_father != father)
332 return bb_rank[bb->index];
333
334 /* Look for phi arguments from within the loop. If found, bias this phi. */
335 for (i = 0; i < gimple_phi_num_args (stmt); i++)
336 {
337 tree arg = gimple_phi_arg_def (stmt, i);
338 if (TREE_CODE (arg) == SSA_NAME
339 && !SSA_NAME_IS_DEFAULT_DEF (arg))
340 {
341 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
342 if (gimple_bb (def_stmt)->loop_father == father)
343 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
344 }
345 }
346
347 /* Must be an uninteresting phi. */
348 return bb_rank[bb->index];
349 }
350
351 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
352 loop-carried dependence of an innermost loop, return TRUE; else
353 return FALSE. */
354 static bool
355 loop_carried_phi (tree exp)
356 {
357 gimple phi_stmt;
358 long block_rank;
359
360 if (TREE_CODE (exp) != SSA_NAME
361 || SSA_NAME_IS_DEFAULT_DEF (exp))
362 return false;
363
364 phi_stmt = SSA_NAME_DEF_STMT (exp);
365
366 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
367 return false;
368
369 /* Non-loop-carried phis have block rank. Loop-carried phis have
370 an additional bias added in. If this phi doesn't have block rank,
371 it's biased and should not be propagated. */
372 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
373
374 if (phi_rank (phi_stmt) != block_rank)
375 return true;
376
377 return false;
378 }
379
380 /* Return the maximum of RANK and the rank that should be propagated
381 from expression OP. For most operands, this is just the rank of OP.
382 For loop-carried phis, the value is zero to avoid undoing the bias
383 in favor of the phi. */
384 static long
385 propagate_rank (long rank, tree op)
386 {
387 long op_rank;
388
389 if (loop_carried_phi (op))
390 return rank;
391
392 op_rank = get_rank (op);
393
394 return MAX (rank, op_rank);
395 }
396
397 /* Look up the operand rank structure for expression E. */
398
399 static inline long
400 find_operand_rank (tree e)
401 {
402 long *slot = operand_rank->get (e);
403 return slot ? *slot : -1;
404 }
405
406 /* Insert {E,RANK} into the operand rank hashtable. */
407
408 static inline void
409 insert_operand_rank (tree e, long rank)
410 {
411 gcc_assert (rank > 0);
412 gcc_assert (!operand_rank->put (e, rank));
413 }
414
415 /* Given an expression E, return the rank of the expression. */
416
417 static long
418 get_rank (tree e)
419 {
420 /* SSA_NAME's have the rank of the expression they are the result
421 of.
422 For globals and uninitialized values, the rank is 0.
423 For function arguments, use the pre-setup rank.
424 For PHI nodes, stores, asm statements, etc, we use the rank of
425 the BB.
426 For simple operations, the rank is the maximum rank of any of
427 its operands, or the bb_rank, whichever is less.
428 I make no claims that this is optimal, however, it gives good
429 results. */
430
431 /* We make an exception to the normal ranking system to break
432 dependences of accumulator variables in loops. Suppose we
433 have a simple one-block loop containing:
434
435 x_1 = phi(x_0, x_2)
436 b = a + x_1
437 c = b + d
438 x_2 = c + e
439
440 As shown, each iteration of the calculation into x is fully
441 dependent upon the iteration before it. We would prefer to
442 see this in the form:
443
444 x_1 = phi(x_0, x_2)
445 b = a + d
446 c = b + e
447 x_2 = c + x_1
448
449 If the loop is unrolled, the calculations of b and c from
450 different iterations can be interleaved.
451
452 To obtain this result during reassociation, we bias the rank
453 of the phi definition x_1 upward, when it is recognized as an
454 accumulator pattern. The artificial rank causes it to be
455 added last, providing the desired independence. */
456
457 if (TREE_CODE (e) == SSA_NAME)
458 {
459 ssa_op_iter iter;
460 gimple stmt;
461 long rank;
462 tree op;
463
464 if (SSA_NAME_IS_DEFAULT_DEF (e))
465 return find_operand_rank (e);
466
467 stmt = SSA_NAME_DEF_STMT (e);
468 if (gimple_code (stmt) == GIMPLE_PHI)
469 return phi_rank (stmt);
470
471 if (!is_gimple_assign (stmt))
472 return bb_rank[gimple_bb (stmt)->index];
473
474 /* If we already have a rank for this expression, use that. */
475 rank = find_operand_rank (e);
476 if (rank != -1)
477 return rank;
478
479 /* Otherwise, find the maximum rank for the operands. As an
480 exception, remove the bias from loop-carried phis when propagating
481 the rank so that dependent operations are not also biased. */
482 /* Simply walk over all SSA uses - this takes advatage of the
483 fact that non-SSA operands are is_gimple_min_invariant and
484 thus have rank 0. */
485 rank = 0;
486 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
487 rank = propagate_rank (rank, op);
488
489 if (dump_file && (dump_flags & TDF_DETAILS))
490 {
491 fprintf (dump_file, "Rank for ");
492 print_generic_expr (dump_file, e, 0);
493 fprintf (dump_file, " is %ld\n", (rank + 1));
494 }
495
496 /* Note the rank in the hashtable so we don't recompute it. */
497 insert_operand_rank (e, (rank + 1));
498 return (rank + 1);
499 }
500
501 /* Constants, globals, etc., are rank 0 */
502 return 0;
503 }
504
505
506 /* We want integer ones to end up last no matter what, since they are
507 the ones we can do the most with. */
508 #define INTEGER_CONST_TYPE 1 << 3
509 #define FLOAT_CONST_TYPE 1 << 2
510 #define OTHER_CONST_TYPE 1 << 1
511
512 /* Classify an invariant tree into integer, float, or other, so that
513 we can sort them to be near other constants of the same type. */
514 static inline int
515 constant_type (tree t)
516 {
517 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
518 return INTEGER_CONST_TYPE;
519 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
520 return FLOAT_CONST_TYPE;
521 else
522 return OTHER_CONST_TYPE;
523 }
524
525 /* qsort comparison function to sort operand entries PA and PB by rank
526 so that the sorted array is ordered by rank in decreasing order. */
527 static int
528 sort_by_operand_rank (const void *pa, const void *pb)
529 {
530 const operand_entry_t oea = *(const operand_entry_t *)pa;
531 const operand_entry_t oeb = *(const operand_entry_t *)pb;
532
533 /* It's nicer for optimize_expression if constants that are likely
534 to fold when added/multiplied//whatever are put next to each
535 other. Since all constants have rank 0, order them by type. */
536 if (oeb->rank == 0 && oea->rank == 0)
537 {
538 if (constant_type (oeb->op) != constant_type (oea->op))
539 return constant_type (oeb->op) - constant_type (oea->op);
540 else
541 /* To make sorting result stable, we use unique IDs to determine
542 order. */
543 return oeb->id - oea->id;
544 }
545
546 /* Lastly, make sure the versions that are the same go next to each
547 other. */
548 if ((oeb->rank - oea->rank == 0)
549 && TREE_CODE (oea->op) == SSA_NAME
550 && TREE_CODE (oeb->op) == SSA_NAME)
551 {
552 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
553 versions of removed SSA_NAMEs, so if possible, prefer to sort
554 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
555 See PR60418. */
556 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
557 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
558 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
559 {
560 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
561 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
562 basic_block bba = gimple_bb (stmta);
563 basic_block bbb = gimple_bb (stmtb);
564 if (bbb != bba)
565 {
566 if (bb_rank[bbb->index] != bb_rank[bba->index])
567 return bb_rank[bbb->index] - bb_rank[bba->index];
568 }
569 else
570 {
571 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
572 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
573 if (da != db)
574 return da ? 1 : -1;
575 }
576 }
577
578 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
579 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
580 else
581 return oeb->id - oea->id;
582 }
583
584 if (oeb->rank != oea->rank)
585 return oeb->rank - oea->rank;
586 else
587 return oeb->id - oea->id;
588 }
589
590 /* Add an operand entry to *OPS for the tree operand OP. */
591
592 static void
593 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
594 {
595 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
596
597 oe->op = op;
598 oe->rank = get_rank (op);
599 oe->id = next_operand_entry_id++;
600 oe->count = 1;
601 ops->safe_push (oe);
602 }
603
604 /* Add an operand entry to *OPS for the tree operand OP with repeat
605 count REPEAT. */
606
607 static void
608 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
609 HOST_WIDE_INT repeat)
610 {
611 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
612
613 oe->op = op;
614 oe->rank = get_rank (op);
615 oe->id = next_operand_entry_id++;
616 oe->count = repeat;
617 ops->safe_push (oe);
618
619 reassociate_stats.pows_encountered++;
620 }
621
622 /* Return true if STMT is reassociable operation containing a binary
623 operation with tree code CODE, and is inside LOOP. */
624
625 static bool
626 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
627 {
628 basic_block bb = gimple_bb (stmt);
629
630 if (gimple_bb (stmt) == NULL)
631 return false;
632
633 if (!flow_bb_inside_loop_p (loop, bb))
634 return false;
635
636 if (is_gimple_assign (stmt)
637 && gimple_assign_rhs_code (stmt) == code
638 && has_single_use (gimple_assign_lhs (stmt)))
639 return true;
640
641 return false;
642 }
643
644
645 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
646 operand of the negate operation. Otherwise, return NULL. */
647
648 static tree
649 get_unary_op (tree name, enum tree_code opcode)
650 {
651 gimple stmt = SSA_NAME_DEF_STMT (name);
652
653 if (!is_gimple_assign (stmt))
654 return NULL_TREE;
655
656 if (gimple_assign_rhs_code (stmt) == opcode)
657 return gimple_assign_rhs1 (stmt);
658 return NULL_TREE;
659 }
660
661 /* If CURR and LAST are a pair of ops that OPCODE allows us to
662 eliminate through equivalences, do so, remove them from OPS, and
663 return true. Otherwise, return false. */
664
665 static bool
666 eliminate_duplicate_pair (enum tree_code opcode,
667 vec<operand_entry_t> *ops,
668 bool *all_done,
669 unsigned int i,
670 operand_entry_t curr,
671 operand_entry_t last)
672 {
673
674 /* If we have two of the same op, and the opcode is & |, min, or max,
675 we can eliminate one of them.
676 If we have two of the same op, and the opcode is ^, we can
677 eliminate both of them. */
678
679 if (last && last->op == curr->op)
680 {
681 switch (opcode)
682 {
683 case MAX_EXPR:
684 case MIN_EXPR:
685 case BIT_IOR_EXPR:
686 case BIT_AND_EXPR:
687 if (dump_file && (dump_flags & TDF_DETAILS))
688 {
689 fprintf (dump_file, "Equivalence: ");
690 print_generic_expr (dump_file, curr->op, 0);
691 fprintf (dump_file, " [&|minmax] ");
692 print_generic_expr (dump_file, last->op, 0);
693 fprintf (dump_file, " -> ");
694 print_generic_stmt (dump_file, last->op, 0);
695 }
696
697 ops->ordered_remove (i);
698 reassociate_stats.ops_eliminated ++;
699
700 return true;
701
702 case BIT_XOR_EXPR:
703 if (dump_file && (dump_flags & TDF_DETAILS))
704 {
705 fprintf (dump_file, "Equivalence: ");
706 print_generic_expr (dump_file, curr->op, 0);
707 fprintf (dump_file, " ^ ");
708 print_generic_expr (dump_file, last->op, 0);
709 fprintf (dump_file, " -> nothing\n");
710 }
711
712 reassociate_stats.ops_eliminated += 2;
713
714 if (ops->length () == 2)
715 {
716 ops->create (0);
717 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
718 *all_done = true;
719 }
720 else
721 {
722 ops->ordered_remove (i-1);
723 ops->ordered_remove (i-1);
724 }
725
726 return true;
727
728 default:
729 break;
730 }
731 }
732 return false;
733 }
734
735 static vec<tree> plus_negates;
736
737 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
738 expression, look in OPS for a corresponding positive operation to cancel
739 it out. If we find one, remove the other from OPS, replace
740 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
741 return false. */
742
743 static bool
744 eliminate_plus_minus_pair (enum tree_code opcode,
745 vec<operand_entry_t> *ops,
746 unsigned int currindex,
747 operand_entry_t curr)
748 {
749 tree negateop;
750 tree notop;
751 unsigned int i;
752 operand_entry_t oe;
753
754 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
755 return false;
756
757 negateop = get_unary_op (curr->op, NEGATE_EXPR);
758 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
759 if (negateop == NULL_TREE && notop == NULL_TREE)
760 return false;
761
762 /* Any non-negated version will have a rank that is one less than
763 the current rank. So once we hit those ranks, if we don't find
764 one, we can stop. */
765
766 for (i = currindex + 1;
767 ops->iterate (i, &oe)
768 && oe->rank >= curr->rank - 1 ;
769 i++)
770 {
771 if (oe->op == negateop)
772 {
773
774 if (dump_file && (dump_flags & TDF_DETAILS))
775 {
776 fprintf (dump_file, "Equivalence: ");
777 print_generic_expr (dump_file, negateop, 0);
778 fprintf (dump_file, " + -");
779 print_generic_expr (dump_file, oe->op, 0);
780 fprintf (dump_file, " -> 0\n");
781 }
782
783 ops->ordered_remove (i);
784 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
785 ops->ordered_remove (currindex);
786 reassociate_stats.ops_eliminated ++;
787
788 return true;
789 }
790 else if (oe->op == notop)
791 {
792 tree op_type = TREE_TYPE (oe->op);
793
794 if (dump_file && (dump_flags & TDF_DETAILS))
795 {
796 fprintf (dump_file, "Equivalence: ");
797 print_generic_expr (dump_file, notop, 0);
798 fprintf (dump_file, " + ~");
799 print_generic_expr (dump_file, oe->op, 0);
800 fprintf (dump_file, " -> -1\n");
801 }
802
803 ops->ordered_remove (i);
804 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
805 ops->ordered_remove (currindex);
806 reassociate_stats.ops_eliminated ++;
807
808 return true;
809 }
810 }
811
812 /* CURR->OP is a negate expr in a plus expr: save it for later
813 inspection in repropagate_negates(). */
814 if (negateop != NULL_TREE)
815 plus_negates.safe_push (curr->op);
816
817 return false;
818 }
819
820 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
821 bitwise not expression, look in OPS for a corresponding operand to
822 cancel it out. If we find one, remove the other from OPS, replace
823 OPS[CURRINDEX] with 0, and return true. Otherwise, return
824 false. */
825
826 static bool
827 eliminate_not_pairs (enum tree_code opcode,
828 vec<operand_entry_t> *ops,
829 unsigned int currindex,
830 operand_entry_t curr)
831 {
832 tree notop;
833 unsigned int i;
834 operand_entry_t oe;
835
836 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
837 || TREE_CODE (curr->op) != SSA_NAME)
838 return false;
839
840 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
841 if (notop == NULL_TREE)
842 return false;
843
844 /* Any non-not version will have a rank that is one less than
845 the current rank. So once we hit those ranks, if we don't find
846 one, we can stop. */
847
848 for (i = currindex + 1;
849 ops->iterate (i, &oe)
850 && oe->rank >= curr->rank - 1;
851 i++)
852 {
853 if (oe->op == notop)
854 {
855 if (dump_file && (dump_flags & TDF_DETAILS))
856 {
857 fprintf (dump_file, "Equivalence: ");
858 print_generic_expr (dump_file, notop, 0);
859 if (opcode == BIT_AND_EXPR)
860 fprintf (dump_file, " & ~");
861 else if (opcode == BIT_IOR_EXPR)
862 fprintf (dump_file, " | ~");
863 print_generic_expr (dump_file, oe->op, 0);
864 if (opcode == BIT_AND_EXPR)
865 fprintf (dump_file, " -> 0\n");
866 else if (opcode == BIT_IOR_EXPR)
867 fprintf (dump_file, " -> -1\n");
868 }
869
870 if (opcode == BIT_AND_EXPR)
871 oe->op = build_zero_cst (TREE_TYPE (oe->op));
872 else if (opcode == BIT_IOR_EXPR)
873 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
874
875 reassociate_stats.ops_eliminated += ops->length () - 1;
876 ops->truncate (0);
877 ops->quick_push (oe);
878 return true;
879 }
880 }
881
882 return false;
883 }
884
885 /* Use constant value that may be present in OPS to try to eliminate
886 operands. Note that this function is only really used when we've
887 eliminated ops for other reasons, or merged constants. Across
888 single statements, fold already does all of this, plus more. There
889 is little point in duplicating logic, so I've only included the
890 identities that I could ever construct testcases to trigger. */
891
892 static void
893 eliminate_using_constants (enum tree_code opcode,
894 vec<operand_entry_t> *ops)
895 {
896 operand_entry_t oelast = ops->last ();
897 tree type = TREE_TYPE (oelast->op);
898
899 if (oelast->rank == 0
900 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
901 {
902 switch (opcode)
903 {
904 case BIT_AND_EXPR:
905 if (integer_zerop (oelast->op))
906 {
907 if (ops->length () != 1)
908 {
909 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Found & 0, removing all other ops\n");
911
912 reassociate_stats.ops_eliminated += ops->length () - 1;
913
914 ops->truncate (0);
915 ops->quick_push (oelast);
916 return;
917 }
918 }
919 else if (integer_all_onesp (oelast->op))
920 {
921 if (ops->length () != 1)
922 {
923 if (dump_file && (dump_flags & TDF_DETAILS))
924 fprintf (dump_file, "Found & -1, removing\n");
925 ops->pop ();
926 reassociate_stats.ops_eliminated++;
927 }
928 }
929 break;
930 case BIT_IOR_EXPR:
931 if (integer_all_onesp (oelast->op))
932 {
933 if (ops->length () != 1)
934 {
935 if (dump_file && (dump_flags & TDF_DETAILS))
936 fprintf (dump_file, "Found | -1, removing all other ops\n");
937
938 reassociate_stats.ops_eliminated += ops->length () - 1;
939
940 ops->truncate (0);
941 ops->quick_push (oelast);
942 return;
943 }
944 }
945 else if (integer_zerop (oelast->op))
946 {
947 if (ops->length () != 1)
948 {
949 if (dump_file && (dump_flags & TDF_DETAILS))
950 fprintf (dump_file, "Found | 0, removing\n");
951 ops->pop ();
952 reassociate_stats.ops_eliminated++;
953 }
954 }
955 break;
956 case MULT_EXPR:
957 if (integer_zerop (oelast->op)
958 || (FLOAT_TYPE_P (type)
959 && !HONOR_NANS (type)
960 && !HONOR_SIGNED_ZEROS (type)
961 && real_zerop (oelast->op)))
962 {
963 if (ops->length () != 1)
964 {
965 if (dump_file && (dump_flags & TDF_DETAILS))
966 fprintf (dump_file, "Found * 0, removing all other ops\n");
967
968 reassociate_stats.ops_eliminated += ops->length () - 1;
969 ops->truncate (1);
970 ops->quick_push (oelast);
971 return;
972 }
973 }
974 else if (integer_onep (oelast->op)
975 || (FLOAT_TYPE_P (type)
976 && !HONOR_SNANS (type)
977 && real_onep (oelast->op)))
978 {
979 if (ops->length () != 1)
980 {
981 if (dump_file && (dump_flags & TDF_DETAILS))
982 fprintf (dump_file, "Found * 1, removing\n");
983 ops->pop ();
984 reassociate_stats.ops_eliminated++;
985 return;
986 }
987 }
988 break;
989 case BIT_XOR_EXPR:
990 case PLUS_EXPR:
991 case MINUS_EXPR:
992 if (integer_zerop (oelast->op)
993 || (FLOAT_TYPE_P (type)
994 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
995 && fold_real_zero_addition_p (type, oelast->op,
996 opcode == MINUS_EXPR)))
997 {
998 if (ops->length () != 1)
999 {
1000 if (dump_file && (dump_flags & TDF_DETAILS))
1001 fprintf (dump_file, "Found [|^+] 0, removing\n");
1002 ops->pop ();
1003 reassociate_stats.ops_eliminated++;
1004 return;
1005 }
1006 }
1007 break;
1008 default:
1009 break;
1010 }
1011 }
1012 }
1013
1014
1015 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1016 bool, bool);
1017
1018 /* Structure for tracking and counting operands. */
1019 typedef struct oecount_s {
1020 int cnt;
1021 int id;
1022 enum tree_code oecode;
1023 tree op;
1024 } oecount;
1025
1026
1027 /* The heap for the oecount hashtable and the sorted list of operands. */
1028 static vec<oecount> cvec;
1029
1030
1031 /* Oecount hashtable helpers. */
1032
1033 struct oecount_hasher
1034 {
1035 typedef int value_type;
1036 typedef int compare_type;
1037 static inline hashval_t hash (const value_type &);
1038 static inline bool equal (const value_type &, const compare_type &);
1039 static bool is_deleted (int &v) { return v == 1; }
1040 static void mark_deleted (int &e) { e = 1; }
1041 static bool is_empty (int &v) { return v == 0; }
1042 static void mark_empty (int &e) { e = 0; }
1043 static void remove (int &) {}
1044 };
1045
1046 /* Hash function for oecount. */
1047
1048 inline hashval_t
1049 oecount_hasher::hash (const value_type &p)
1050 {
1051 const oecount *c = &cvec[p - 42];
1052 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1053 }
1054
1055 /* Comparison function for oecount. */
1056
1057 inline bool
1058 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1059 {
1060 const oecount *c1 = &cvec[p1 - 42];
1061 const oecount *c2 = &cvec[p2 - 42];
1062 return (c1->oecode == c2->oecode
1063 && c1->op == c2->op);
1064 }
1065
1066 /* Comparison function for qsort sorting oecount elements by count. */
1067
1068 static int
1069 oecount_cmp (const void *p1, const void *p2)
1070 {
1071 const oecount *c1 = (const oecount *)p1;
1072 const oecount *c2 = (const oecount *)p2;
1073 if (c1->cnt != c2->cnt)
1074 return c1->cnt - c2->cnt;
1075 else
1076 /* If counts are identical, use unique IDs to stabilize qsort. */
1077 return c1->id - c2->id;
1078 }
1079
1080 /* Return TRUE iff STMT represents a builtin call that raises OP
1081 to some exponent. */
1082
1083 static bool
1084 stmt_is_power_of_op (gimple stmt, tree op)
1085 {
1086 tree fndecl;
1087
1088 if (!is_gimple_call (stmt))
1089 return false;
1090
1091 fndecl = gimple_call_fndecl (stmt);
1092
1093 if (!fndecl
1094 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1095 return false;
1096
1097 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1098 {
1099 CASE_FLT_FN (BUILT_IN_POW):
1100 CASE_FLT_FN (BUILT_IN_POWI):
1101 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1102
1103 default:
1104 return false;
1105 }
1106 }
1107
1108 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1109 in place and return the result. Assumes that stmt_is_power_of_op
1110 was previously called for STMT and returned TRUE. */
1111
1112 static HOST_WIDE_INT
1113 decrement_power (gimple stmt)
1114 {
1115 REAL_VALUE_TYPE c, cint;
1116 HOST_WIDE_INT power;
1117 tree arg1;
1118
1119 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1120 {
1121 CASE_FLT_FN (BUILT_IN_POW):
1122 arg1 = gimple_call_arg (stmt, 1);
1123 c = TREE_REAL_CST (arg1);
1124 power = real_to_integer (&c) - 1;
1125 real_from_integer (&cint, VOIDmode, power, SIGNED);
1126 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1127 return power;
1128
1129 CASE_FLT_FN (BUILT_IN_POWI):
1130 arg1 = gimple_call_arg (stmt, 1);
1131 power = TREE_INT_CST_LOW (arg1) - 1;
1132 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1133 return power;
1134
1135 default:
1136 gcc_unreachable ();
1137 }
1138 }
1139
1140 /* Find the single immediate use of STMT's LHS, and replace it
1141 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1142 replace *DEF with OP as well. */
1143
1144 static void
1145 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1146 {
1147 tree lhs;
1148 gimple use_stmt;
1149 use_operand_p use;
1150 gimple_stmt_iterator gsi;
1151
1152 if (is_gimple_call (stmt))
1153 lhs = gimple_call_lhs (stmt);
1154 else
1155 lhs = gimple_assign_lhs (stmt);
1156
1157 gcc_assert (has_single_use (lhs));
1158 single_imm_use (lhs, &use, &use_stmt);
1159 if (lhs == *def)
1160 *def = op;
1161 SET_USE (use, op);
1162 if (TREE_CODE (op) != SSA_NAME)
1163 update_stmt (use_stmt);
1164 gsi = gsi_for_stmt (stmt);
1165 unlink_stmt_vdef (stmt);
1166 reassoc_remove_stmt (&gsi);
1167 release_defs (stmt);
1168 }
1169
1170 /* Walks the linear chain with result *DEF searching for an operation
1171 with operand OP and code OPCODE removing that from the chain. *DEF
1172 is updated if there is only one operand but no operation left. */
1173
1174 static void
1175 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1176 {
1177 gimple stmt = SSA_NAME_DEF_STMT (*def);
1178
1179 do
1180 {
1181 tree name;
1182
1183 if (opcode == MULT_EXPR
1184 && stmt_is_power_of_op (stmt, op))
1185 {
1186 if (decrement_power (stmt) == 1)
1187 propagate_op_to_single_use (op, stmt, def);
1188 return;
1189 }
1190
1191 name = gimple_assign_rhs1 (stmt);
1192
1193 /* If this is the operation we look for and one of the operands
1194 is ours simply propagate the other operand into the stmts
1195 single use. */
1196 if (gimple_assign_rhs_code (stmt) == opcode
1197 && (name == op
1198 || gimple_assign_rhs2 (stmt) == op))
1199 {
1200 if (name == op)
1201 name = gimple_assign_rhs2 (stmt);
1202 propagate_op_to_single_use (name, stmt, def);
1203 return;
1204 }
1205
1206 /* We might have a multiply of two __builtin_pow* calls, and
1207 the operand might be hiding in the rightmost one. */
1208 if (opcode == MULT_EXPR
1209 && gimple_assign_rhs_code (stmt) == opcode
1210 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1211 && has_single_use (gimple_assign_rhs2 (stmt)))
1212 {
1213 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1214 if (stmt_is_power_of_op (stmt2, op))
1215 {
1216 if (decrement_power (stmt2) == 1)
1217 propagate_op_to_single_use (op, stmt2, def);
1218 return;
1219 }
1220 }
1221
1222 /* Continue walking the chain. */
1223 gcc_assert (name != op
1224 && TREE_CODE (name) == SSA_NAME);
1225 stmt = SSA_NAME_DEF_STMT (name);
1226 }
1227 while (1);
1228 }
1229
1230 /* Returns true if statement S1 dominates statement S2. Like
1231 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1232
1233 static bool
1234 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1235 {
1236 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1237
1238 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1239 SSA_NAME. Assume it lives at the beginning of function and
1240 thus dominates everything. */
1241 if (!bb1 || s1 == s2)
1242 return true;
1243
1244 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1245 if (!bb2)
1246 return false;
1247
1248 if (bb1 == bb2)
1249 {
1250 /* PHIs in the same basic block are assumed to be
1251 executed all in parallel, if only one stmt is a PHI,
1252 it dominates the other stmt in the same basic block. */
1253 if (gimple_code (s1) == GIMPLE_PHI)
1254 return true;
1255
1256 if (gimple_code (s2) == GIMPLE_PHI)
1257 return false;
1258
1259 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1260
1261 if (gimple_uid (s1) < gimple_uid (s2))
1262 return true;
1263
1264 if (gimple_uid (s1) > gimple_uid (s2))
1265 return false;
1266
1267 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1268 unsigned int uid = gimple_uid (s1);
1269 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1270 {
1271 gimple s = gsi_stmt (gsi);
1272 if (gimple_uid (s) != uid)
1273 break;
1274 if (s == s2)
1275 return true;
1276 }
1277
1278 return false;
1279 }
1280
1281 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1282 }
1283
1284 /* Insert STMT after INSERT_POINT. */
1285
1286 static void
1287 insert_stmt_after (gimple stmt, gimple insert_point)
1288 {
1289 gimple_stmt_iterator gsi;
1290 basic_block bb;
1291
1292 if (gimple_code (insert_point) == GIMPLE_PHI)
1293 bb = gimple_bb (insert_point);
1294 else if (!stmt_ends_bb_p (insert_point))
1295 {
1296 gsi = gsi_for_stmt (insert_point);
1297 gimple_set_uid (stmt, gimple_uid (insert_point));
1298 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1299 return;
1300 }
1301 else
1302 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1303 thus if it must end a basic block, it should be a call that can
1304 throw, or some assignment that can throw. If it throws, the LHS
1305 of it will not be initialized though, so only valid places using
1306 the SSA_NAME should be dominated by the fallthru edge. */
1307 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1308 gsi = gsi_after_labels (bb);
1309 if (gsi_end_p (gsi))
1310 {
1311 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1312 gimple_set_uid (stmt,
1313 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1314 }
1315 else
1316 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1317 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1318 }
1319
1320 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1321 the result. Places the statement after the definition of either
1322 OP1 or OP2. Returns the new statement. */
1323
1324 static gimple
1325 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1326 {
1327 gimple op1def = NULL, op2def = NULL;
1328 gimple_stmt_iterator gsi;
1329 tree op;
1330 gassign *sum;
1331
1332 /* Create the addition statement. */
1333 op = make_ssa_name (type);
1334 sum = gimple_build_assign (op, opcode, op1, op2);
1335
1336 /* Find an insertion place and insert. */
1337 if (TREE_CODE (op1) == SSA_NAME)
1338 op1def = SSA_NAME_DEF_STMT (op1);
1339 if (TREE_CODE (op2) == SSA_NAME)
1340 op2def = SSA_NAME_DEF_STMT (op2);
1341 if ((!op1def || gimple_nop_p (op1def))
1342 && (!op2def || gimple_nop_p (op2def)))
1343 {
1344 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1345 if (gsi_end_p (gsi))
1346 {
1347 gimple_stmt_iterator gsi2
1348 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1349 gimple_set_uid (sum,
1350 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1351 }
1352 else
1353 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1354 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1355 }
1356 else
1357 {
1358 gimple insert_point;
1359 if ((!op1def || gimple_nop_p (op1def))
1360 || (op2def && !gimple_nop_p (op2def)
1361 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1362 insert_point = op2def;
1363 else
1364 insert_point = op1def;
1365 insert_stmt_after (sum, insert_point);
1366 }
1367 update_stmt (sum);
1368
1369 return sum;
1370 }
1371
1372 /* Perform un-distribution of divisions and multiplications.
1373 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1374 to (A + B) / X for real X.
1375
1376 The algorithm is organized as follows.
1377
1378 - First we walk the addition chain *OPS looking for summands that
1379 are defined by a multiplication or a real division. This results
1380 in the candidates bitmap with relevant indices into *OPS.
1381
1382 - Second we build the chains of multiplications or divisions for
1383 these candidates, counting the number of occurrences of (operand, code)
1384 pairs in all of the candidates chains.
1385
1386 - Third we sort the (operand, code) pairs by number of occurrence and
1387 process them starting with the pair with the most uses.
1388
1389 * For each such pair we walk the candidates again to build a
1390 second candidate bitmap noting all multiplication/division chains
1391 that have at least one occurrence of (operand, code).
1392
1393 * We build an alternate addition chain only covering these
1394 candidates with one (operand, code) operation removed from their
1395 multiplication/division chain.
1396
1397 * The first candidate gets replaced by the alternate addition chain
1398 multiplied/divided by the operand.
1399
1400 * All candidate chains get disabled for further processing and
1401 processing of (operand, code) pairs continues.
1402
1403 The alternate addition chains built are re-processed by the main
1404 reassociation algorithm which allows optimizing a * x * y + b * y * x
1405 to (a + b ) * x * y in one invocation of the reassociation pass. */
1406
1407 static bool
1408 undistribute_ops_list (enum tree_code opcode,
1409 vec<operand_entry_t> *ops, struct loop *loop)
1410 {
1411 unsigned int length = ops->length ();
1412 operand_entry_t oe1;
1413 unsigned i, j;
1414 sbitmap candidates, candidates2;
1415 unsigned nr_candidates, nr_candidates2;
1416 sbitmap_iterator sbi0;
1417 vec<operand_entry_t> *subops;
1418 bool changed = false;
1419 int next_oecount_id = 0;
1420
1421 if (length <= 1
1422 || opcode != PLUS_EXPR)
1423 return false;
1424
1425 /* Build a list of candidates to process. */
1426 candidates = sbitmap_alloc (length);
1427 bitmap_clear (candidates);
1428 nr_candidates = 0;
1429 FOR_EACH_VEC_ELT (*ops, i, oe1)
1430 {
1431 enum tree_code dcode;
1432 gimple oe1def;
1433
1434 if (TREE_CODE (oe1->op) != SSA_NAME)
1435 continue;
1436 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1437 if (!is_gimple_assign (oe1def))
1438 continue;
1439 dcode = gimple_assign_rhs_code (oe1def);
1440 if ((dcode != MULT_EXPR
1441 && dcode != RDIV_EXPR)
1442 || !is_reassociable_op (oe1def, dcode, loop))
1443 continue;
1444
1445 bitmap_set_bit (candidates, i);
1446 nr_candidates++;
1447 }
1448
1449 if (nr_candidates < 2)
1450 {
1451 sbitmap_free (candidates);
1452 return false;
1453 }
1454
1455 if (dump_file && (dump_flags & TDF_DETAILS))
1456 {
1457 fprintf (dump_file, "searching for un-distribute opportunities ");
1458 print_generic_expr (dump_file,
1459 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1460 fprintf (dump_file, " %d\n", nr_candidates);
1461 }
1462
1463 /* Build linearized sub-operand lists and the counting table. */
1464 cvec.create (0);
1465
1466 hash_table<oecount_hasher> ctable (15);
1467
1468 /* ??? Macro arguments cannot have multi-argument template types in
1469 them. This typedef is needed to workaround that limitation. */
1470 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1471 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1472 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1473 {
1474 gimple oedef;
1475 enum tree_code oecode;
1476 unsigned j;
1477
1478 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1479 oecode = gimple_assign_rhs_code (oedef);
1480 linearize_expr_tree (&subops[i], oedef,
1481 associative_tree_code (oecode), false);
1482
1483 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1484 {
1485 oecount c;
1486 int *slot;
1487 int idx;
1488 c.oecode = oecode;
1489 c.cnt = 1;
1490 c.id = next_oecount_id++;
1491 c.op = oe1->op;
1492 cvec.safe_push (c);
1493 idx = cvec.length () + 41;
1494 slot = ctable.find_slot (idx, INSERT);
1495 if (!*slot)
1496 {
1497 *slot = idx;
1498 }
1499 else
1500 {
1501 cvec.pop ();
1502 cvec[*slot - 42].cnt++;
1503 }
1504 }
1505 }
1506
1507 /* Sort the counting table. */
1508 cvec.qsort (oecount_cmp);
1509
1510 if (dump_file && (dump_flags & TDF_DETAILS))
1511 {
1512 oecount *c;
1513 fprintf (dump_file, "Candidates:\n");
1514 FOR_EACH_VEC_ELT (cvec, j, c)
1515 {
1516 fprintf (dump_file, " %u %s: ", c->cnt,
1517 c->oecode == MULT_EXPR
1518 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1519 print_generic_expr (dump_file, c->op, 0);
1520 fprintf (dump_file, "\n");
1521 }
1522 }
1523
1524 /* Process the (operand, code) pairs in order of most occurrence. */
1525 candidates2 = sbitmap_alloc (length);
1526 while (!cvec.is_empty ())
1527 {
1528 oecount *c = &cvec.last ();
1529 if (c->cnt < 2)
1530 break;
1531
1532 /* Now collect the operands in the outer chain that contain
1533 the common operand in their inner chain. */
1534 bitmap_clear (candidates2);
1535 nr_candidates2 = 0;
1536 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1537 {
1538 gimple oedef;
1539 enum tree_code oecode;
1540 unsigned j;
1541 tree op = (*ops)[i]->op;
1542
1543 /* If we undistributed in this chain already this may be
1544 a constant. */
1545 if (TREE_CODE (op) != SSA_NAME)
1546 continue;
1547
1548 oedef = SSA_NAME_DEF_STMT (op);
1549 oecode = gimple_assign_rhs_code (oedef);
1550 if (oecode != c->oecode)
1551 continue;
1552
1553 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1554 {
1555 if (oe1->op == c->op)
1556 {
1557 bitmap_set_bit (candidates2, i);
1558 ++nr_candidates2;
1559 break;
1560 }
1561 }
1562 }
1563
1564 if (nr_candidates2 >= 2)
1565 {
1566 operand_entry_t oe1, oe2;
1567 gimple prod;
1568 int first = bitmap_first_set_bit (candidates2);
1569
1570 /* Build the new addition chain. */
1571 oe1 = (*ops)[first];
1572 if (dump_file && (dump_flags & TDF_DETAILS))
1573 {
1574 fprintf (dump_file, "Building (");
1575 print_generic_expr (dump_file, oe1->op, 0);
1576 }
1577 zero_one_operation (&oe1->op, c->oecode, c->op);
1578 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1579 {
1580 gimple sum;
1581 oe2 = (*ops)[i];
1582 if (dump_file && (dump_flags & TDF_DETAILS))
1583 {
1584 fprintf (dump_file, " + ");
1585 print_generic_expr (dump_file, oe2->op, 0);
1586 }
1587 zero_one_operation (&oe2->op, c->oecode, c->op);
1588 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1589 oe1->op, oe2->op, opcode);
1590 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1591 oe2->rank = 0;
1592 oe1->op = gimple_get_lhs (sum);
1593 }
1594
1595 /* Apply the multiplication/division. */
1596 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1597 oe1->op, c->op, c->oecode);
1598 if (dump_file && (dump_flags & TDF_DETAILS))
1599 {
1600 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1601 print_generic_expr (dump_file, c->op, 0);
1602 fprintf (dump_file, "\n");
1603 }
1604
1605 /* Record it in the addition chain and disable further
1606 undistribution with this op. */
1607 oe1->op = gimple_assign_lhs (prod);
1608 oe1->rank = get_rank (oe1->op);
1609 subops[first].release ();
1610
1611 changed = true;
1612 }
1613
1614 cvec.pop ();
1615 }
1616
1617 for (i = 0; i < ops->length (); ++i)
1618 subops[i].release ();
1619 free (subops);
1620 cvec.release ();
1621 sbitmap_free (candidates);
1622 sbitmap_free (candidates2);
1623
1624 return changed;
1625 }
1626
1627 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1628 expression, examine the other OPS to see if any of them are comparisons
1629 of the same values, which we may be able to combine or eliminate.
1630 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1631
1632 static bool
1633 eliminate_redundant_comparison (enum tree_code opcode,
1634 vec<operand_entry_t> *ops,
1635 unsigned int currindex,
1636 operand_entry_t curr)
1637 {
1638 tree op1, op2;
1639 enum tree_code lcode, rcode;
1640 gimple def1, def2;
1641 int i;
1642 operand_entry_t oe;
1643
1644 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1645 return false;
1646
1647 /* Check that CURR is a comparison. */
1648 if (TREE_CODE (curr->op) != SSA_NAME)
1649 return false;
1650 def1 = SSA_NAME_DEF_STMT (curr->op);
1651 if (!is_gimple_assign (def1))
1652 return false;
1653 lcode = gimple_assign_rhs_code (def1);
1654 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1655 return false;
1656 op1 = gimple_assign_rhs1 (def1);
1657 op2 = gimple_assign_rhs2 (def1);
1658
1659 /* Now look for a similar comparison in the remaining OPS. */
1660 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1661 {
1662 tree t;
1663
1664 if (TREE_CODE (oe->op) != SSA_NAME)
1665 continue;
1666 def2 = SSA_NAME_DEF_STMT (oe->op);
1667 if (!is_gimple_assign (def2))
1668 continue;
1669 rcode = gimple_assign_rhs_code (def2);
1670 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1671 continue;
1672
1673 /* If we got here, we have a match. See if we can combine the
1674 two comparisons. */
1675 if (opcode == BIT_IOR_EXPR)
1676 t = maybe_fold_or_comparisons (lcode, op1, op2,
1677 rcode, gimple_assign_rhs1 (def2),
1678 gimple_assign_rhs2 (def2));
1679 else
1680 t = maybe_fold_and_comparisons (lcode, op1, op2,
1681 rcode, gimple_assign_rhs1 (def2),
1682 gimple_assign_rhs2 (def2));
1683 if (!t)
1684 continue;
1685
1686 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1687 always give us a boolean_type_node value back. If the original
1688 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1689 we need to convert. */
1690 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1691 t = fold_convert (TREE_TYPE (curr->op), t);
1692
1693 if (TREE_CODE (t) != INTEGER_CST
1694 && !operand_equal_p (t, curr->op, 0))
1695 {
1696 enum tree_code subcode;
1697 tree newop1, newop2;
1698 if (!COMPARISON_CLASS_P (t))
1699 continue;
1700 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1701 STRIP_USELESS_TYPE_CONVERSION (newop1);
1702 STRIP_USELESS_TYPE_CONVERSION (newop2);
1703 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1704 continue;
1705 }
1706
1707 if (dump_file && (dump_flags & TDF_DETAILS))
1708 {
1709 fprintf (dump_file, "Equivalence: ");
1710 print_generic_expr (dump_file, curr->op, 0);
1711 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1712 print_generic_expr (dump_file, oe->op, 0);
1713 fprintf (dump_file, " -> ");
1714 print_generic_expr (dump_file, t, 0);
1715 fprintf (dump_file, "\n");
1716 }
1717
1718 /* Now we can delete oe, as it has been subsumed by the new combined
1719 expression t. */
1720 ops->ordered_remove (i);
1721 reassociate_stats.ops_eliminated ++;
1722
1723 /* If t is the same as curr->op, we're done. Otherwise we must
1724 replace curr->op with t. Special case is if we got a constant
1725 back, in which case we add it to the end instead of in place of
1726 the current entry. */
1727 if (TREE_CODE (t) == INTEGER_CST)
1728 {
1729 ops->ordered_remove (currindex);
1730 add_to_ops_vec (ops, t);
1731 }
1732 else if (!operand_equal_p (t, curr->op, 0))
1733 {
1734 gimple sum;
1735 enum tree_code subcode;
1736 tree newop1;
1737 tree newop2;
1738 gcc_assert (COMPARISON_CLASS_P (t));
1739 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1740 STRIP_USELESS_TYPE_CONVERSION (newop1);
1741 STRIP_USELESS_TYPE_CONVERSION (newop2);
1742 gcc_checking_assert (is_gimple_val (newop1)
1743 && is_gimple_val (newop2));
1744 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1745 curr->op = gimple_get_lhs (sum);
1746 }
1747 return true;
1748 }
1749
1750 return false;
1751 }
1752
1753 /* Perform various identities and other optimizations on the list of
1754 operand entries, stored in OPS. The tree code for the binary
1755 operation between all the operands is OPCODE. */
1756
1757 static void
1758 optimize_ops_list (enum tree_code opcode,
1759 vec<operand_entry_t> *ops)
1760 {
1761 unsigned int length = ops->length ();
1762 unsigned int i;
1763 operand_entry_t oe;
1764 operand_entry_t oelast = NULL;
1765 bool iterate = false;
1766
1767 if (length == 1)
1768 return;
1769
1770 oelast = ops->last ();
1771
1772 /* If the last two are constants, pop the constants off, merge them
1773 and try the next two. */
1774 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1775 {
1776 operand_entry_t oelm1 = (*ops)[length - 2];
1777
1778 if (oelm1->rank == 0
1779 && is_gimple_min_invariant (oelm1->op)
1780 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1781 TREE_TYPE (oelast->op)))
1782 {
1783 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1784 oelm1->op, oelast->op);
1785
1786 if (folded && is_gimple_min_invariant (folded))
1787 {
1788 if (dump_file && (dump_flags & TDF_DETAILS))
1789 fprintf (dump_file, "Merging constants\n");
1790
1791 ops->pop ();
1792 ops->pop ();
1793
1794 add_to_ops_vec (ops, folded);
1795 reassociate_stats.constants_eliminated++;
1796
1797 optimize_ops_list (opcode, ops);
1798 return;
1799 }
1800 }
1801 }
1802
1803 eliminate_using_constants (opcode, ops);
1804 oelast = NULL;
1805
1806 for (i = 0; ops->iterate (i, &oe);)
1807 {
1808 bool done = false;
1809
1810 if (eliminate_not_pairs (opcode, ops, i, oe))
1811 return;
1812 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1813 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1814 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1815 {
1816 if (done)
1817 return;
1818 iterate = true;
1819 oelast = NULL;
1820 continue;
1821 }
1822 oelast = oe;
1823 i++;
1824 }
1825
1826 length = ops->length ();
1827 oelast = ops->last ();
1828
1829 if (iterate)
1830 optimize_ops_list (opcode, ops);
1831 }
1832
1833 /* The following functions are subroutines to optimize_range_tests and allow
1834 it to try to change a logical combination of comparisons into a range
1835 test.
1836
1837 For example, both
1838 X == 2 || X == 5 || X == 3 || X == 4
1839 and
1840 X >= 2 && X <= 5
1841 are converted to
1842 (unsigned) (X - 2) <= 3
1843
1844 For more information see comments above fold_test_range in fold-const.c,
1845 this implementation is for GIMPLE. */
1846
1847 struct range_entry
1848 {
1849 tree exp;
1850 tree low;
1851 tree high;
1852 bool in_p;
1853 bool strict_overflow_p;
1854 unsigned int idx, next;
1855 };
1856
1857 /* This is similar to make_range in fold-const.c, but on top of
1858 GIMPLE instead of trees. If EXP is non-NULL, it should be
1859 an SSA_NAME and STMT argument is ignored, otherwise STMT
1860 argument should be a GIMPLE_COND. */
1861
1862 static void
1863 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1864 {
1865 int in_p;
1866 tree low, high;
1867 bool is_bool, strict_overflow_p;
1868
1869 r->exp = NULL_TREE;
1870 r->in_p = false;
1871 r->strict_overflow_p = false;
1872 r->low = NULL_TREE;
1873 r->high = NULL_TREE;
1874 if (exp != NULL_TREE
1875 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1876 return;
1877
1878 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1879 and see if we can refine the range. Some of the cases below may not
1880 happen, but it doesn't seem worth worrying about this. We "continue"
1881 the outer loop when we've changed something; otherwise we "break"
1882 the switch, which will "break" the while. */
1883 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1884 high = low;
1885 in_p = 0;
1886 strict_overflow_p = false;
1887 is_bool = false;
1888 if (exp == NULL_TREE)
1889 is_bool = true;
1890 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1891 {
1892 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1893 is_bool = true;
1894 else
1895 return;
1896 }
1897 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1898 is_bool = true;
1899
1900 while (1)
1901 {
1902 enum tree_code code;
1903 tree arg0, arg1, exp_type;
1904 tree nexp;
1905 location_t loc;
1906
1907 if (exp != NULL_TREE)
1908 {
1909 if (TREE_CODE (exp) != SSA_NAME
1910 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1911 break;
1912
1913 stmt = SSA_NAME_DEF_STMT (exp);
1914 if (!is_gimple_assign (stmt))
1915 break;
1916
1917 code = gimple_assign_rhs_code (stmt);
1918 arg0 = gimple_assign_rhs1 (stmt);
1919 arg1 = gimple_assign_rhs2 (stmt);
1920 exp_type = TREE_TYPE (exp);
1921 }
1922 else
1923 {
1924 code = gimple_cond_code (stmt);
1925 arg0 = gimple_cond_lhs (stmt);
1926 arg1 = gimple_cond_rhs (stmt);
1927 exp_type = boolean_type_node;
1928 }
1929
1930 if (TREE_CODE (arg0) != SSA_NAME)
1931 break;
1932 loc = gimple_location (stmt);
1933 switch (code)
1934 {
1935 case BIT_NOT_EXPR:
1936 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1937 /* Ensure the range is either +[-,0], +[0,0],
1938 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1939 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1940 or similar expression of unconditional true or
1941 false, it should not be negated. */
1942 && ((high && integer_zerop (high))
1943 || (low && integer_onep (low))))
1944 {
1945 in_p = !in_p;
1946 exp = arg0;
1947 continue;
1948 }
1949 break;
1950 case SSA_NAME:
1951 exp = arg0;
1952 continue;
1953 CASE_CONVERT:
1954 if (is_bool)
1955 goto do_default;
1956 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1957 {
1958 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1959 is_bool = true;
1960 else
1961 return;
1962 }
1963 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1964 is_bool = true;
1965 goto do_default;
1966 case EQ_EXPR:
1967 case NE_EXPR:
1968 case LT_EXPR:
1969 case LE_EXPR:
1970 case GE_EXPR:
1971 case GT_EXPR:
1972 is_bool = true;
1973 /* FALLTHRU */
1974 default:
1975 if (!is_bool)
1976 return;
1977 do_default:
1978 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1979 &low, &high, &in_p,
1980 &strict_overflow_p);
1981 if (nexp != NULL_TREE)
1982 {
1983 exp = nexp;
1984 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1985 continue;
1986 }
1987 break;
1988 }
1989 break;
1990 }
1991 if (is_bool)
1992 {
1993 r->exp = exp;
1994 r->in_p = in_p;
1995 r->low = low;
1996 r->high = high;
1997 r->strict_overflow_p = strict_overflow_p;
1998 }
1999 }
2000
2001 /* Comparison function for qsort. Sort entries
2002 without SSA_NAME exp first, then with SSA_NAMEs sorted
2003 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2004 by increasing ->low and if ->low is the same, by increasing
2005 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2006 maximum. */
2007
2008 static int
2009 range_entry_cmp (const void *a, const void *b)
2010 {
2011 const struct range_entry *p = (const struct range_entry *) a;
2012 const struct range_entry *q = (const struct range_entry *) b;
2013
2014 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2015 {
2016 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2017 {
2018 /* Group range_entries for the same SSA_NAME together. */
2019 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2020 return -1;
2021 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2022 return 1;
2023 /* If ->low is different, NULL low goes first, then by
2024 ascending low. */
2025 if (p->low != NULL_TREE)
2026 {
2027 if (q->low != NULL_TREE)
2028 {
2029 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2030 p->low, q->low);
2031 if (tem && integer_onep (tem))
2032 return -1;
2033 tem = fold_binary (GT_EXPR, boolean_type_node,
2034 p->low, q->low);
2035 if (tem && integer_onep (tem))
2036 return 1;
2037 }
2038 else
2039 return 1;
2040 }
2041 else if (q->low != NULL_TREE)
2042 return -1;
2043 /* If ->high is different, NULL high goes last, before that by
2044 ascending high. */
2045 if (p->high != NULL_TREE)
2046 {
2047 if (q->high != NULL_TREE)
2048 {
2049 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2050 p->high, q->high);
2051 if (tem && integer_onep (tem))
2052 return -1;
2053 tem = fold_binary (GT_EXPR, boolean_type_node,
2054 p->high, q->high);
2055 if (tem && integer_onep (tem))
2056 return 1;
2057 }
2058 else
2059 return -1;
2060 }
2061 else if (q->high != NULL_TREE)
2062 return 1;
2063 /* If both ranges are the same, sort below by ascending idx. */
2064 }
2065 else
2066 return 1;
2067 }
2068 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2069 return -1;
2070
2071 if (p->idx < q->idx)
2072 return -1;
2073 else
2074 {
2075 gcc_checking_assert (p->idx > q->idx);
2076 return 1;
2077 }
2078 }
2079
2080 /* Helper routine of optimize_range_test.
2081 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2082 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2083 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2084 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2085 an array of COUNT pointers to other ranges. Return
2086 true if the range merge has been successful.
2087 If OPCODE is ERROR_MARK, this is called from within
2088 maybe_optimize_range_tests and is performing inter-bb range optimization.
2089 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2090 oe->rank. */
2091
2092 static bool
2093 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2094 struct range_entry **otherrangep,
2095 unsigned int count, enum tree_code opcode,
2096 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2097 bool in_p, tree low, tree high, bool strict_overflow_p)
2098 {
2099 operand_entry_t oe = (*ops)[range->idx];
2100 tree op = oe->op;
2101 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2102 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2103 location_t loc = gimple_location (stmt);
2104 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2105 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2106 in_p, low, high);
2107 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2108 gimple_stmt_iterator gsi;
2109 unsigned int i;
2110
2111 if (tem == NULL_TREE)
2112 return false;
2113
2114 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2115 warning_at (loc, OPT_Wstrict_overflow,
2116 "assuming signed overflow does not occur "
2117 "when simplifying range test");
2118
2119 if (dump_file && (dump_flags & TDF_DETAILS))
2120 {
2121 struct range_entry *r;
2122 fprintf (dump_file, "Optimizing range tests ");
2123 print_generic_expr (dump_file, range->exp, 0);
2124 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2125 print_generic_expr (dump_file, range->low, 0);
2126 fprintf (dump_file, ", ");
2127 print_generic_expr (dump_file, range->high, 0);
2128 fprintf (dump_file, "]");
2129 for (i = 0; i < count; i++)
2130 {
2131 if (otherrange)
2132 r = otherrange + i;
2133 else
2134 r = otherrangep[i];
2135 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2136 print_generic_expr (dump_file, r->low, 0);
2137 fprintf (dump_file, ", ");
2138 print_generic_expr (dump_file, r->high, 0);
2139 fprintf (dump_file, "]");
2140 }
2141 fprintf (dump_file, "\n into ");
2142 print_generic_expr (dump_file, tem, 0);
2143 fprintf (dump_file, "\n");
2144 }
2145
2146 if (opcode == BIT_IOR_EXPR
2147 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2148 tem = invert_truthvalue_loc (loc, tem);
2149
2150 tem = fold_convert_loc (loc, optype, tem);
2151 gsi = gsi_for_stmt (stmt);
2152 unsigned int uid = gimple_uid (stmt);
2153 /* In rare cases range->exp can be equal to lhs of stmt.
2154 In that case we have to insert after the stmt rather then before
2155 it. If stmt is a PHI, insert it at the start of the basic block. */
2156 if (op != range->exp)
2157 {
2158 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2159 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2160 GSI_SAME_STMT);
2161 gsi_prev (&gsi);
2162 }
2163 else if (gimple_code (stmt) != GIMPLE_PHI)
2164 {
2165 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2166 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2167 GSI_CONTINUE_LINKING);
2168 }
2169 else
2170 {
2171 gsi = gsi_after_labels (gimple_bb (stmt));
2172 if (!gsi_end_p (gsi))
2173 uid = gimple_uid (gsi_stmt (gsi));
2174 else
2175 {
2176 gsi = gsi_start_bb (gimple_bb (stmt));
2177 uid = 1;
2178 while (!gsi_end_p (gsi))
2179 {
2180 uid = gimple_uid (gsi_stmt (gsi));
2181 gsi_next (&gsi);
2182 }
2183 }
2184 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2185 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2186 GSI_SAME_STMT);
2187 if (gsi_end_p (gsi))
2188 gsi = gsi_last_bb (gimple_bb (stmt));
2189 else
2190 gsi_prev (&gsi);
2191 }
2192 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2193 if (gimple_uid (gsi_stmt (gsi)))
2194 break;
2195 else
2196 gimple_set_uid (gsi_stmt (gsi), uid);
2197
2198 oe->op = tem;
2199 range->exp = exp;
2200 range->low = low;
2201 range->high = high;
2202 range->in_p = in_p;
2203 range->strict_overflow_p = false;
2204
2205 for (i = 0; i < count; i++)
2206 {
2207 if (otherrange)
2208 range = otherrange + i;
2209 else
2210 range = otherrangep[i];
2211 oe = (*ops)[range->idx];
2212 /* Now change all the other range test immediate uses, so that
2213 those tests will be optimized away. */
2214 if (opcode == ERROR_MARK)
2215 {
2216 if (oe->op)
2217 oe->op = build_int_cst (TREE_TYPE (oe->op),
2218 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2219 else
2220 oe->op = (oe->rank == BIT_IOR_EXPR
2221 ? boolean_false_node : boolean_true_node);
2222 }
2223 else
2224 oe->op = error_mark_node;
2225 range->exp = NULL_TREE;
2226 }
2227 return true;
2228 }
2229
2230 /* Optimize X == CST1 || X == CST2
2231 if popcount (CST1 ^ CST2) == 1 into
2232 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2233 Similarly for ranges. E.g.
2234 X != 2 && X != 3 && X != 10 && X != 11
2235 will be transformed by the previous optimization into
2236 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2237 and this loop can transform that into
2238 !(((X & ~8) - 2U) <= 1U). */
2239
2240 static bool
2241 optimize_range_tests_xor (enum tree_code opcode, tree type,
2242 tree lowi, tree lowj, tree highi, tree highj,
2243 vec<operand_entry_t> *ops,
2244 struct range_entry *rangei,
2245 struct range_entry *rangej)
2246 {
2247 tree lowxor, highxor, tem, exp;
2248 /* Check lowi ^ lowj == highi ^ highj and
2249 popcount (lowi ^ lowj) == 1. */
2250 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2251 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2252 return false;
2253 if (!integer_pow2p (lowxor))
2254 return false;
2255 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2256 if (!tree_int_cst_equal (lowxor, highxor))
2257 return false;
2258
2259 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2260 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2261 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2262 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2263 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2264 NULL, rangei->in_p, lowj, highj,
2265 rangei->strict_overflow_p
2266 || rangej->strict_overflow_p))
2267 return true;
2268 return false;
2269 }
2270
2271 /* Optimize X == CST1 || X == CST2
2272 if popcount (CST2 - CST1) == 1 into
2273 ((X - CST1) & ~(CST2 - CST1)) == 0.
2274 Similarly for ranges. E.g.
2275 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2276 || X == 75 || X == 45
2277 will be transformed by the previous optimization into
2278 (X - 43U) <= 3U || (X - 75U) <= 3U
2279 and this loop can transform that into
2280 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2281 static bool
2282 optimize_range_tests_diff (enum tree_code opcode, tree type,
2283 tree lowi, tree lowj, tree highi, tree highj,
2284 vec<operand_entry_t> *ops,
2285 struct range_entry *rangei,
2286 struct range_entry *rangej)
2287 {
2288 tree tem1, tem2, mask;
2289 /* Check highi - lowi == highj - lowj. */
2290 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2291 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2292 return false;
2293 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2294 if (!tree_int_cst_equal (tem1, tem2))
2295 return false;
2296 /* Check popcount (lowj - lowi) == 1. */
2297 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2298 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2299 return false;
2300 if (!integer_pow2p (tem1))
2301 return false;
2302
2303 type = unsigned_type_for (type);
2304 tem1 = fold_convert (type, tem1);
2305 tem2 = fold_convert (type, tem2);
2306 lowi = fold_convert (type, lowi);
2307 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2308 tem1 = fold_binary (MINUS_EXPR, type,
2309 fold_convert (type, rangei->exp), lowi);
2310 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2311 lowj = build_int_cst (type, 0);
2312 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2313 NULL, rangei->in_p, lowj, tem2,
2314 rangei->strict_overflow_p
2315 || rangej->strict_overflow_p))
2316 return true;
2317 return false;
2318 }
2319
2320 /* It does some common checks for function optimize_range_tests_xor and
2321 optimize_range_tests_diff.
2322 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2323 Else it calls optimize_range_tests_diff. */
2324
2325 static bool
2326 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2327 bool optimize_xor, vec<operand_entry_t> *ops,
2328 struct range_entry *ranges)
2329 {
2330 int i, j;
2331 bool any_changes = false;
2332 for (i = first; i < length; i++)
2333 {
2334 tree lowi, highi, lowj, highj, type, tem;
2335
2336 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2337 continue;
2338 type = TREE_TYPE (ranges[i].exp);
2339 if (!INTEGRAL_TYPE_P (type))
2340 continue;
2341 lowi = ranges[i].low;
2342 if (lowi == NULL_TREE)
2343 lowi = TYPE_MIN_VALUE (type);
2344 highi = ranges[i].high;
2345 if (highi == NULL_TREE)
2346 continue;
2347 for (j = i + 1; j < length && j < i + 64; j++)
2348 {
2349 bool changes;
2350 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2351 continue;
2352 lowj = ranges[j].low;
2353 if (lowj == NULL_TREE)
2354 continue;
2355 highj = ranges[j].high;
2356 if (highj == NULL_TREE)
2357 highj = TYPE_MAX_VALUE (type);
2358 /* Check lowj > highi. */
2359 tem = fold_binary (GT_EXPR, boolean_type_node,
2360 lowj, highi);
2361 if (tem == NULL_TREE || !integer_onep (tem))
2362 continue;
2363 if (optimize_xor)
2364 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2365 highi, highj, ops,
2366 ranges + i, ranges + j);
2367 else
2368 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2369 highi, highj, ops,
2370 ranges + i, ranges + j);
2371 if (changes)
2372 {
2373 any_changes = true;
2374 break;
2375 }
2376 }
2377 }
2378 return any_changes;
2379 }
2380
2381 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2382 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2383 EXP on success, NULL otherwise. */
2384
2385 static tree
2386 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2387 wide_int *mask, tree *totallowp)
2388 {
2389 tree tem = int_const_binop (MINUS_EXPR, high, low);
2390 if (tem == NULL_TREE
2391 || TREE_CODE (tem) != INTEGER_CST
2392 || TREE_OVERFLOW (tem)
2393 || tree_int_cst_sgn (tem) == -1
2394 || compare_tree_int (tem, prec) != -1)
2395 return NULL_TREE;
2396
2397 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2398 *mask = wi::shifted_mask (0, max, false, prec);
2399 if (TREE_CODE (exp) == BIT_AND_EXPR
2400 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2401 {
2402 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2403 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2404 if (wi::popcount (msk) == 1
2405 && wi::ltu_p (msk, prec - max))
2406 {
2407 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2408 max += msk.to_uhwi ();
2409 exp = TREE_OPERAND (exp, 0);
2410 if (integer_zerop (low)
2411 && TREE_CODE (exp) == PLUS_EXPR
2412 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2413 {
2414 tree ret = TREE_OPERAND (exp, 0);
2415 STRIP_NOPS (ret);
2416 widest_int bias
2417 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2418 TYPE_PRECISION (TREE_TYPE (low))));
2419 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2420 if (totallowp)
2421 {
2422 *totallowp = tbias;
2423 return ret;
2424 }
2425 else if (!tree_int_cst_lt (totallow, tbias))
2426 return NULL_TREE;
2427 bias = wi::to_widest (tbias);
2428 bias -= wi::to_widest (totallow);
2429 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2430 {
2431 *mask = wi::lshift (*mask, bias);
2432 return ret;
2433 }
2434 }
2435 }
2436 }
2437 if (totallowp)
2438 return exp;
2439 if (!tree_int_cst_lt (totallow, low))
2440 return exp;
2441 tem = int_const_binop (MINUS_EXPR, low, totallow);
2442 if (tem == NULL_TREE
2443 || TREE_CODE (tem) != INTEGER_CST
2444 || TREE_OVERFLOW (tem)
2445 || compare_tree_int (tem, prec - max) == 1)
2446 return NULL_TREE;
2447
2448 *mask = wi::lshift (*mask, wi::to_widest (tem));
2449 return exp;
2450 }
2451
2452 /* Attempt to optimize small range tests using bit test.
2453 E.g.
2454 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2455 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2456 has been by earlier optimizations optimized into:
2457 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2458 As all the 43 through 82 range is less than 64 numbers,
2459 for 64-bit word targets optimize that into:
2460 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2461
2462 static bool
2463 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2464 vec<operand_entry_t> *ops,
2465 struct range_entry *ranges)
2466 {
2467 int i, j;
2468 bool any_changes = false;
2469 int prec = GET_MODE_BITSIZE (word_mode);
2470 auto_vec<struct range_entry *, 64> candidates;
2471
2472 for (i = first; i < length - 2; i++)
2473 {
2474 tree lowi, highi, lowj, highj, type;
2475
2476 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2477 continue;
2478 type = TREE_TYPE (ranges[i].exp);
2479 if (!INTEGRAL_TYPE_P (type))
2480 continue;
2481 lowi = ranges[i].low;
2482 if (lowi == NULL_TREE)
2483 lowi = TYPE_MIN_VALUE (type);
2484 highi = ranges[i].high;
2485 if (highi == NULL_TREE)
2486 continue;
2487 wide_int mask;
2488 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2489 highi, &mask, &lowi);
2490 if (exp == NULL_TREE)
2491 continue;
2492 bool strict_overflow_p = ranges[i].strict_overflow_p;
2493 candidates.truncate (0);
2494 int end = MIN (i + 64, length);
2495 for (j = i + 1; j < end; j++)
2496 {
2497 tree exp2;
2498 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2499 continue;
2500 if (ranges[j].exp == exp)
2501 ;
2502 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2503 {
2504 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2505 if (exp2 == exp)
2506 ;
2507 else if (TREE_CODE (exp2) == PLUS_EXPR)
2508 {
2509 exp2 = TREE_OPERAND (exp2, 0);
2510 STRIP_NOPS (exp2);
2511 if (exp2 != exp)
2512 continue;
2513 }
2514 else
2515 continue;
2516 }
2517 else
2518 continue;
2519 lowj = ranges[j].low;
2520 if (lowj == NULL_TREE)
2521 continue;
2522 highj = ranges[j].high;
2523 if (highj == NULL_TREE)
2524 highj = TYPE_MAX_VALUE (type);
2525 wide_int mask2;
2526 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2527 highj, &mask2, NULL);
2528 if (exp2 != exp)
2529 continue;
2530 mask |= mask2;
2531 strict_overflow_p |= ranges[j].strict_overflow_p;
2532 candidates.safe_push (&ranges[j]);
2533 }
2534
2535 /* If we need otherwise 3 or more comparisons, use a bit test. */
2536 if (candidates.length () >= 2)
2537 {
2538 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2539 wi::to_widest (lowi)
2540 + prec - 1 - wi::clz (mask));
2541 operand_entry_t oe = (*ops)[ranges[i].idx];
2542 tree op = oe->op;
2543 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2544 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2545 location_t loc = gimple_location (stmt);
2546 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2547
2548 /* See if it isn't cheaper to pretend the minimum value of the
2549 range is 0, if maximum value is small enough.
2550 We can avoid then subtraction of the minimum value, but the
2551 mask constant could be perhaps more expensive. */
2552 if (compare_tree_int (lowi, 0) > 0
2553 && compare_tree_int (high, prec) < 0)
2554 {
2555 int cost_diff;
2556 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2557 rtx reg = gen_raw_REG (word_mode, 10000);
2558 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2559 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2560 GEN_INT (-m)), speed_p);
2561 rtx r = immed_wide_int_const (mask, word_mode);
2562 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2563 speed_p);
2564 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2565 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2566 speed_p);
2567 if (cost_diff > 0)
2568 {
2569 mask = wi::lshift (mask, m);
2570 lowi = build_zero_cst (TREE_TYPE (lowi));
2571 }
2572 }
2573
2574 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2575 false, lowi, high);
2576 if (tem == NULL_TREE || is_gimple_val (tem))
2577 continue;
2578 tree etype = unsigned_type_for (TREE_TYPE (exp));
2579 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2580 fold_convert_loc (loc, etype, exp),
2581 fold_convert_loc (loc, etype, lowi));
2582 exp = fold_convert_loc (loc, integer_type_node, exp);
2583 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2584 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2585 build_int_cst (word_type, 1), exp);
2586 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2587 wide_int_to_tree (word_type, mask));
2588 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2589 build_zero_cst (word_type));
2590 if (is_gimple_val (exp))
2591 continue;
2592
2593 /* The shift might have undefined behavior if TEM is true,
2594 but reassociate_bb isn't prepared to have basic blocks
2595 split when it is running. So, temporarily emit a code
2596 with BIT_IOR_EXPR instead of &&, and fix it up in
2597 branch_fixup. */
2598 gimple_seq seq;
2599 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2600 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2601 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2602 gimple_seq seq2;
2603 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2604 gimple_seq_add_seq_without_update (&seq, seq2);
2605 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2606 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2607 gimple g = gimple_build_assign (make_ssa_name (optype),
2608 BIT_IOR_EXPR, tem, exp);
2609 gimple_set_location (g, loc);
2610 gimple_seq_add_stmt_without_update (&seq, g);
2611 exp = gimple_assign_lhs (g);
2612 tree val = build_zero_cst (optype);
2613 if (update_range_test (&ranges[i], NULL, candidates.address (),
2614 candidates.length (), opcode, ops, exp,
2615 seq, false, val, val, strict_overflow_p))
2616 {
2617 any_changes = true;
2618 reassoc_branch_fixups.safe_push (tem);
2619 }
2620 else
2621 gimple_seq_discard (seq);
2622 }
2623 }
2624 return any_changes;
2625 }
2626
2627 /* Optimize range tests, similarly how fold_range_test optimizes
2628 it on trees. The tree code for the binary
2629 operation between all the operands is OPCODE.
2630 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2631 maybe_optimize_range_tests for inter-bb range optimization.
2632 In that case if oe->op is NULL, oe->id is bb->index whose
2633 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2634 the actual opcode. */
2635
2636 static bool
2637 optimize_range_tests (enum tree_code opcode,
2638 vec<operand_entry_t> *ops)
2639 {
2640 unsigned int length = ops->length (), i, j, first;
2641 operand_entry_t oe;
2642 struct range_entry *ranges;
2643 bool any_changes = false;
2644
2645 if (length == 1)
2646 return false;
2647
2648 ranges = XNEWVEC (struct range_entry, length);
2649 for (i = 0; i < length; i++)
2650 {
2651 oe = (*ops)[i];
2652 ranges[i].idx = i;
2653 init_range_entry (ranges + i, oe->op,
2654 oe->op ? NULL :
2655 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2656 /* For | invert it now, we will invert it again before emitting
2657 the optimized expression. */
2658 if (opcode == BIT_IOR_EXPR
2659 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2660 ranges[i].in_p = !ranges[i].in_p;
2661 }
2662
2663 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2664 for (i = 0; i < length; i++)
2665 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2666 break;
2667
2668 /* Try to merge ranges. */
2669 for (first = i; i < length; i++)
2670 {
2671 tree low = ranges[i].low;
2672 tree high = ranges[i].high;
2673 int in_p = ranges[i].in_p;
2674 bool strict_overflow_p = ranges[i].strict_overflow_p;
2675 int update_fail_count = 0;
2676
2677 for (j = i + 1; j < length; j++)
2678 {
2679 if (ranges[i].exp != ranges[j].exp)
2680 break;
2681 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2682 ranges[j].in_p, ranges[j].low, ranges[j].high))
2683 break;
2684 strict_overflow_p |= ranges[j].strict_overflow_p;
2685 }
2686
2687 if (j == i + 1)
2688 continue;
2689
2690 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2691 opcode, ops, ranges[i].exp, NULL, in_p,
2692 low, high, strict_overflow_p))
2693 {
2694 i = j - 1;
2695 any_changes = true;
2696 }
2697 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2698 while update_range_test would fail. */
2699 else if (update_fail_count == 64)
2700 i = j - 1;
2701 else
2702 ++update_fail_count;
2703 }
2704
2705 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2706 ops, ranges);
2707
2708 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2709 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2710 ops, ranges);
2711 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2712 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2713 ops, ranges);
2714
2715 if (any_changes && opcode != ERROR_MARK)
2716 {
2717 j = 0;
2718 FOR_EACH_VEC_ELT (*ops, i, oe)
2719 {
2720 if (oe->op == error_mark_node)
2721 continue;
2722 else if (i != j)
2723 (*ops)[j] = oe;
2724 j++;
2725 }
2726 ops->truncate (j);
2727 }
2728
2729 XDELETEVEC (ranges);
2730 return any_changes;
2731 }
2732
2733 /* Return true if STMT is a cast like:
2734 <bb N>:
2735 ...
2736 _123 = (int) _234;
2737
2738 <bb M>:
2739 # _345 = PHI <_123(N), 1(...), 1(...)>
2740 where _234 has bool type, _123 has single use and
2741 bb N has a single successor M. This is commonly used in
2742 the last block of a range test. */
2743
2744 static bool
2745 final_range_test_p (gimple stmt)
2746 {
2747 basic_block bb, rhs_bb;
2748 edge e;
2749 tree lhs, rhs;
2750 use_operand_p use_p;
2751 gimple use_stmt;
2752
2753 if (!gimple_assign_cast_p (stmt))
2754 return false;
2755 bb = gimple_bb (stmt);
2756 if (!single_succ_p (bb))
2757 return false;
2758 e = single_succ_edge (bb);
2759 if (e->flags & EDGE_COMPLEX)
2760 return false;
2761
2762 lhs = gimple_assign_lhs (stmt);
2763 rhs = gimple_assign_rhs1 (stmt);
2764 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2765 || TREE_CODE (rhs) != SSA_NAME
2766 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2767 return false;
2768
2769 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2770 if (!single_imm_use (lhs, &use_p, &use_stmt))
2771 return false;
2772
2773 if (gimple_code (use_stmt) != GIMPLE_PHI
2774 || gimple_bb (use_stmt) != e->dest)
2775 return false;
2776
2777 /* And that the rhs is defined in the same loop. */
2778 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2779 if (rhs_bb == NULL
2780 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2781 return false;
2782
2783 return true;
2784 }
2785
2786 /* Return true if BB is suitable basic block for inter-bb range test
2787 optimization. If BACKWARD is true, BB should be the only predecessor
2788 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2789 or compared with to find a common basic block to which all conditions
2790 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2791 be the only predecessor of BB. */
2792
2793 static bool
2794 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2795 bool backward)
2796 {
2797 edge_iterator ei, ei2;
2798 edge e, e2;
2799 gimple stmt;
2800 gphi_iterator gsi;
2801 bool other_edge_seen = false;
2802 bool is_cond;
2803
2804 if (test_bb == bb)
2805 return false;
2806 /* Check last stmt first. */
2807 stmt = last_stmt (bb);
2808 if (stmt == NULL
2809 || (gimple_code (stmt) != GIMPLE_COND
2810 && (backward || !final_range_test_p (stmt)))
2811 || gimple_visited_p (stmt)
2812 || stmt_could_throw_p (stmt)
2813 || *other_bb == bb)
2814 return false;
2815 is_cond = gimple_code (stmt) == GIMPLE_COND;
2816 if (is_cond)
2817 {
2818 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2819 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2820 to *OTHER_BB (if not set yet, try to find it out). */
2821 if (EDGE_COUNT (bb->succs) != 2)
2822 return false;
2823 FOR_EACH_EDGE (e, ei, bb->succs)
2824 {
2825 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2826 return false;
2827 if (e->dest == test_bb)
2828 {
2829 if (backward)
2830 continue;
2831 else
2832 return false;
2833 }
2834 if (e->dest == bb)
2835 return false;
2836 if (*other_bb == NULL)
2837 {
2838 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2839 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2840 return false;
2841 else if (e->dest == e2->dest)
2842 *other_bb = e->dest;
2843 if (*other_bb == NULL)
2844 return false;
2845 }
2846 if (e->dest == *other_bb)
2847 other_edge_seen = true;
2848 else if (backward)
2849 return false;
2850 }
2851 if (*other_bb == NULL || !other_edge_seen)
2852 return false;
2853 }
2854 else if (single_succ (bb) != *other_bb)
2855 return false;
2856
2857 /* Now check all PHIs of *OTHER_BB. */
2858 e = find_edge (bb, *other_bb);
2859 e2 = find_edge (test_bb, *other_bb);
2860 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2861 {
2862 gphi *phi = gsi.phi ();
2863 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2864 corresponding to BB and TEST_BB predecessor must be the same. */
2865 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2866 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2867 {
2868 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2869 one of the PHIs should have the lhs of the last stmt in
2870 that block as PHI arg and that PHI should have 0 or 1
2871 corresponding to it in all other range test basic blocks
2872 considered. */
2873 if (!is_cond)
2874 {
2875 if (gimple_phi_arg_def (phi, e->dest_idx)
2876 == gimple_assign_lhs (stmt)
2877 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2878 || integer_onep (gimple_phi_arg_def (phi,
2879 e2->dest_idx))))
2880 continue;
2881 }
2882 else
2883 {
2884 gimple test_last = last_stmt (test_bb);
2885 if (gimple_code (test_last) != GIMPLE_COND
2886 && gimple_phi_arg_def (phi, e2->dest_idx)
2887 == gimple_assign_lhs (test_last)
2888 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2889 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2890 continue;
2891 }
2892
2893 return false;
2894 }
2895 }
2896 return true;
2897 }
2898
2899 /* Return true if BB doesn't have side-effects that would disallow
2900 range test optimization, all SSA_NAMEs set in the bb are consumed
2901 in the bb and there are no PHIs. */
2902
2903 static bool
2904 no_side_effect_bb (basic_block bb)
2905 {
2906 gimple_stmt_iterator gsi;
2907 gimple last;
2908
2909 if (!gimple_seq_empty_p (phi_nodes (bb)))
2910 return false;
2911 last = last_stmt (bb);
2912 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2913 {
2914 gimple stmt = gsi_stmt (gsi);
2915 tree lhs;
2916 imm_use_iterator imm_iter;
2917 use_operand_p use_p;
2918
2919 if (is_gimple_debug (stmt))
2920 continue;
2921 if (gimple_has_side_effects (stmt))
2922 return false;
2923 if (stmt == last)
2924 return true;
2925 if (!is_gimple_assign (stmt))
2926 return false;
2927 lhs = gimple_assign_lhs (stmt);
2928 if (TREE_CODE (lhs) != SSA_NAME)
2929 return false;
2930 if (gimple_assign_rhs_could_trap_p (stmt))
2931 return false;
2932 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2933 {
2934 gimple use_stmt = USE_STMT (use_p);
2935 if (is_gimple_debug (use_stmt))
2936 continue;
2937 if (gimple_bb (use_stmt) != bb)
2938 return false;
2939 }
2940 }
2941 return false;
2942 }
2943
2944 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2945 return true and fill in *OPS recursively. */
2946
2947 static bool
2948 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2949 struct loop *loop)
2950 {
2951 gimple stmt = SSA_NAME_DEF_STMT (var);
2952 tree rhs[2];
2953 int i;
2954
2955 if (!is_reassociable_op (stmt, code, loop))
2956 return false;
2957
2958 rhs[0] = gimple_assign_rhs1 (stmt);
2959 rhs[1] = gimple_assign_rhs2 (stmt);
2960 gimple_set_visited (stmt, true);
2961 for (i = 0; i < 2; i++)
2962 if (TREE_CODE (rhs[i]) == SSA_NAME
2963 && !get_ops (rhs[i], code, ops, loop)
2964 && has_single_use (rhs[i]))
2965 {
2966 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2967
2968 oe->op = rhs[i];
2969 oe->rank = code;
2970 oe->id = 0;
2971 oe->count = 1;
2972 ops->safe_push (oe);
2973 }
2974 return true;
2975 }
2976
2977 /* Find the ops that were added by get_ops starting from VAR, see if
2978 they were changed during update_range_test and if yes, create new
2979 stmts. */
2980
2981 static tree
2982 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2983 unsigned int *pidx, struct loop *loop)
2984 {
2985 gimple stmt = SSA_NAME_DEF_STMT (var);
2986 tree rhs[4];
2987 int i;
2988
2989 if (!is_reassociable_op (stmt, code, loop))
2990 return NULL;
2991
2992 rhs[0] = gimple_assign_rhs1 (stmt);
2993 rhs[1] = gimple_assign_rhs2 (stmt);
2994 rhs[2] = rhs[0];
2995 rhs[3] = rhs[1];
2996 for (i = 0; i < 2; i++)
2997 if (TREE_CODE (rhs[i]) == SSA_NAME)
2998 {
2999 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3000 if (rhs[2 + i] == NULL_TREE)
3001 {
3002 if (has_single_use (rhs[i]))
3003 rhs[2 + i] = ops[(*pidx)++]->op;
3004 else
3005 rhs[2 + i] = rhs[i];
3006 }
3007 }
3008 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3009 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3010 {
3011 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3012 var = make_ssa_name (TREE_TYPE (var));
3013 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3014 rhs[2], rhs[3]);
3015 gimple_set_uid (g, gimple_uid (stmt));
3016 gimple_set_visited (g, true);
3017 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3018 }
3019 return var;
3020 }
3021
3022 /* Structure to track the initial value passed to get_ops and
3023 the range in the ops vector for each basic block. */
3024
3025 struct inter_bb_range_test_entry
3026 {
3027 tree op;
3028 unsigned int first_idx, last_idx;
3029 };
3030
3031 /* Inter-bb range test optimization. */
3032
3033 static void
3034 maybe_optimize_range_tests (gimple stmt)
3035 {
3036 basic_block first_bb = gimple_bb (stmt);
3037 basic_block last_bb = first_bb;
3038 basic_block other_bb = NULL;
3039 basic_block bb;
3040 edge_iterator ei;
3041 edge e;
3042 auto_vec<operand_entry_t> ops;
3043 auto_vec<inter_bb_range_test_entry> bbinfo;
3044 bool any_changes = false;
3045
3046 /* Consider only basic blocks that end with GIMPLE_COND or
3047 a cast statement satisfying final_range_test_p. All
3048 but the last bb in the first_bb .. last_bb range
3049 should end with GIMPLE_COND. */
3050 if (gimple_code (stmt) == GIMPLE_COND)
3051 {
3052 if (EDGE_COUNT (first_bb->succs) != 2)
3053 return;
3054 }
3055 else if (final_range_test_p (stmt))
3056 other_bb = single_succ (first_bb);
3057 else
3058 return;
3059
3060 if (stmt_could_throw_p (stmt))
3061 return;
3062
3063 /* As relative ordering of post-dominator sons isn't fixed,
3064 maybe_optimize_range_tests can be called first on any
3065 bb in the range we want to optimize. So, start searching
3066 backwards, if first_bb can be set to a predecessor. */
3067 while (single_pred_p (first_bb))
3068 {
3069 basic_block pred_bb = single_pred (first_bb);
3070 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3071 break;
3072 if (!no_side_effect_bb (first_bb))
3073 break;
3074 first_bb = pred_bb;
3075 }
3076 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3077 Before starting forward search in last_bb successors, find
3078 out the other_bb. */
3079 if (first_bb == last_bb)
3080 {
3081 other_bb = NULL;
3082 /* As non-GIMPLE_COND last stmt always terminates the range,
3083 if forward search didn't discover anything, just give up. */
3084 if (gimple_code (stmt) != GIMPLE_COND)
3085 return;
3086 /* Look at both successors. Either it ends with a GIMPLE_COND
3087 and satisfies suitable_cond_bb, or ends with a cast and
3088 other_bb is that cast's successor. */
3089 FOR_EACH_EDGE (e, ei, first_bb->succs)
3090 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3091 || e->dest == first_bb)
3092 return;
3093 else if (single_pred_p (e->dest))
3094 {
3095 stmt = last_stmt (e->dest);
3096 if (stmt
3097 && gimple_code (stmt) == GIMPLE_COND
3098 && EDGE_COUNT (e->dest->succs) == 2)
3099 {
3100 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3101 break;
3102 else
3103 other_bb = NULL;
3104 }
3105 else if (stmt
3106 && final_range_test_p (stmt)
3107 && find_edge (first_bb, single_succ (e->dest)))
3108 {
3109 other_bb = single_succ (e->dest);
3110 if (other_bb == first_bb)
3111 other_bb = NULL;
3112 }
3113 }
3114 if (other_bb == NULL)
3115 return;
3116 }
3117 /* Now do the forward search, moving last_bb to successor bbs
3118 that aren't other_bb. */
3119 while (EDGE_COUNT (last_bb->succs) == 2)
3120 {
3121 FOR_EACH_EDGE (e, ei, last_bb->succs)
3122 if (e->dest != other_bb)
3123 break;
3124 if (e == NULL)
3125 break;
3126 if (!single_pred_p (e->dest))
3127 break;
3128 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3129 break;
3130 if (!no_side_effect_bb (e->dest))
3131 break;
3132 last_bb = e->dest;
3133 }
3134 if (first_bb == last_bb)
3135 return;
3136 /* Here basic blocks first_bb through last_bb's predecessor
3137 end with GIMPLE_COND, all of them have one of the edges to
3138 other_bb and another to another block in the range,
3139 all blocks except first_bb don't have side-effects and
3140 last_bb ends with either GIMPLE_COND, or cast satisfying
3141 final_range_test_p. */
3142 for (bb = last_bb; ; bb = single_pred (bb))
3143 {
3144 enum tree_code code;
3145 tree lhs, rhs;
3146 inter_bb_range_test_entry bb_ent;
3147
3148 bb_ent.op = NULL_TREE;
3149 bb_ent.first_idx = ops.length ();
3150 bb_ent.last_idx = bb_ent.first_idx;
3151 e = find_edge (bb, other_bb);
3152 stmt = last_stmt (bb);
3153 gimple_set_visited (stmt, true);
3154 if (gimple_code (stmt) != GIMPLE_COND)
3155 {
3156 use_operand_p use_p;
3157 gimple phi;
3158 edge e2;
3159 unsigned int d;
3160
3161 lhs = gimple_assign_lhs (stmt);
3162 rhs = gimple_assign_rhs1 (stmt);
3163 gcc_assert (bb == last_bb);
3164
3165 /* stmt is
3166 _123 = (int) _234;
3167
3168 followed by:
3169 <bb M>:
3170 # _345 = PHI <_123(N), 1(...), 1(...)>
3171
3172 or 0 instead of 1. If it is 0, the _234
3173 range test is anded together with all the
3174 other range tests, if it is 1, it is ored with
3175 them. */
3176 single_imm_use (lhs, &use_p, &phi);
3177 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3178 e2 = find_edge (first_bb, other_bb);
3179 d = e2->dest_idx;
3180 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3181 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3182 code = BIT_AND_EXPR;
3183 else
3184 {
3185 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3186 code = BIT_IOR_EXPR;
3187 }
3188
3189 /* If _234 SSA_NAME_DEF_STMT is
3190 _234 = _567 | _789;
3191 (or &, corresponding to 1/0 in the phi arguments,
3192 push into ops the individual range test arguments
3193 of the bitwise or resp. and, recursively. */
3194 if (!get_ops (rhs, code, &ops,
3195 loop_containing_stmt (stmt))
3196 && has_single_use (rhs))
3197 {
3198 /* Otherwise, push the _234 range test itself. */
3199 operand_entry_t oe
3200 = (operand_entry_t) pool_alloc (operand_entry_pool);
3201
3202 oe->op = rhs;
3203 oe->rank = code;
3204 oe->id = 0;
3205 oe->count = 1;
3206 ops.safe_push (oe);
3207 bb_ent.last_idx++;
3208 }
3209 else
3210 bb_ent.last_idx = ops.length ();
3211 bb_ent.op = rhs;
3212 bbinfo.safe_push (bb_ent);
3213 continue;
3214 }
3215 /* Otherwise stmt is GIMPLE_COND. */
3216 code = gimple_cond_code (stmt);
3217 lhs = gimple_cond_lhs (stmt);
3218 rhs = gimple_cond_rhs (stmt);
3219 if (TREE_CODE (lhs) == SSA_NAME
3220 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3221 && ((code != EQ_EXPR && code != NE_EXPR)
3222 || rhs != boolean_false_node
3223 /* Either push into ops the individual bitwise
3224 or resp. and operands, depending on which
3225 edge is other_bb. */
3226 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3227 ^ (code == EQ_EXPR))
3228 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3229 loop_containing_stmt (stmt))))
3230 {
3231 /* Or push the GIMPLE_COND stmt itself. */
3232 operand_entry_t oe
3233 = (operand_entry_t) pool_alloc (operand_entry_pool);
3234
3235 oe->op = NULL;
3236 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3237 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3238 /* oe->op = NULL signs that there is no SSA_NAME
3239 for the range test, and oe->id instead is the
3240 basic block number, at which's end the GIMPLE_COND
3241 is. */
3242 oe->id = bb->index;
3243 oe->count = 1;
3244 ops.safe_push (oe);
3245 bb_ent.op = NULL;
3246 bb_ent.last_idx++;
3247 }
3248 else if (ops.length () > bb_ent.first_idx)
3249 {
3250 bb_ent.op = lhs;
3251 bb_ent.last_idx = ops.length ();
3252 }
3253 bbinfo.safe_push (bb_ent);
3254 if (bb == first_bb)
3255 break;
3256 }
3257 if (ops.length () > 1)
3258 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3259 if (any_changes)
3260 {
3261 unsigned int idx;
3262 /* update_ops relies on has_single_use predicates returning the
3263 same values as it did during get_ops earlier. Additionally it
3264 never removes statements, only adds new ones and it should walk
3265 from the single imm use and check the predicate already before
3266 making those changes.
3267 On the other side, the handling of GIMPLE_COND directly can turn
3268 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3269 it needs to be done in a separate loop afterwards. */
3270 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3271 {
3272 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3273 && bbinfo[idx].op != NULL_TREE)
3274 {
3275 tree new_op;
3276
3277 stmt = last_stmt (bb);
3278 new_op = update_ops (bbinfo[idx].op,
3279 (enum tree_code)
3280 ops[bbinfo[idx].first_idx]->rank,
3281 ops, &bbinfo[idx].first_idx,
3282 loop_containing_stmt (stmt));
3283 if (new_op == NULL_TREE)
3284 {
3285 gcc_assert (bb == last_bb);
3286 new_op = ops[bbinfo[idx].first_idx++]->op;
3287 }
3288 if (bbinfo[idx].op != new_op)
3289 {
3290 imm_use_iterator iter;
3291 use_operand_p use_p;
3292 gimple use_stmt, cast_stmt = NULL;
3293
3294 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3295 if (is_gimple_debug (use_stmt))
3296 continue;
3297 else if (gimple_code (use_stmt) == GIMPLE_COND
3298 || gimple_code (use_stmt) == GIMPLE_PHI)
3299 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3300 SET_USE (use_p, new_op);
3301 else if (gimple_assign_cast_p (use_stmt))
3302 cast_stmt = use_stmt;
3303 else
3304 gcc_unreachable ();
3305 if (cast_stmt)
3306 {
3307 gcc_assert (bb == last_bb);
3308 tree lhs = gimple_assign_lhs (cast_stmt);
3309 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3310 enum tree_code rhs_code
3311 = gimple_assign_rhs_code (cast_stmt);
3312 gassign *g;
3313 if (is_gimple_min_invariant (new_op))
3314 {
3315 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3316 g = gimple_build_assign (new_lhs, new_op);
3317 }
3318 else
3319 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3320 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3321 gimple_set_uid (g, gimple_uid (cast_stmt));
3322 gimple_set_visited (g, true);
3323 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3324 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3325 if (is_gimple_debug (use_stmt))
3326 continue;
3327 else if (gimple_code (use_stmt) == GIMPLE_COND
3328 || gimple_code (use_stmt) == GIMPLE_PHI)
3329 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3330 SET_USE (use_p, new_lhs);
3331 else
3332 gcc_unreachable ();
3333 }
3334 }
3335 }
3336 if (bb == first_bb)
3337 break;
3338 }
3339 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3340 {
3341 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3342 && bbinfo[idx].op == NULL_TREE
3343 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3344 {
3345 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3346 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3347 gimple_cond_make_false (cond_stmt);
3348 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3349 gimple_cond_make_true (cond_stmt);
3350 else
3351 {
3352 gimple_cond_set_code (cond_stmt, NE_EXPR);
3353 gimple_cond_set_lhs (cond_stmt,
3354 ops[bbinfo[idx].first_idx]->op);
3355 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3356 }
3357 update_stmt (cond_stmt);
3358 }
3359 if (bb == first_bb)
3360 break;
3361 }
3362 }
3363 }
3364
3365 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3366 of STMT in it's operands. This is also known as a "destructive
3367 update" operation. */
3368
3369 static bool
3370 is_phi_for_stmt (gimple stmt, tree operand)
3371 {
3372 gimple def_stmt;
3373 gphi *def_phi;
3374 tree lhs;
3375 use_operand_p arg_p;
3376 ssa_op_iter i;
3377
3378 if (TREE_CODE (operand) != SSA_NAME)
3379 return false;
3380
3381 lhs = gimple_assign_lhs (stmt);
3382
3383 def_stmt = SSA_NAME_DEF_STMT (operand);
3384 def_phi = dyn_cast <gphi *> (def_stmt);
3385 if (!def_phi)
3386 return false;
3387
3388 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3389 if (lhs == USE_FROM_PTR (arg_p))
3390 return true;
3391 return false;
3392 }
3393
3394 /* Remove def stmt of VAR if VAR has zero uses and recurse
3395 on rhs1 operand if so. */
3396
3397 static void
3398 remove_visited_stmt_chain (tree var)
3399 {
3400 gimple stmt;
3401 gimple_stmt_iterator gsi;
3402
3403 while (1)
3404 {
3405 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3406 return;
3407 stmt = SSA_NAME_DEF_STMT (var);
3408 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3409 {
3410 var = gimple_assign_rhs1 (stmt);
3411 gsi = gsi_for_stmt (stmt);
3412 reassoc_remove_stmt (&gsi);
3413 release_defs (stmt);
3414 }
3415 else
3416 return;
3417 }
3418 }
3419
3420 /* This function checks three consequtive operands in
3421 passed operands vector OPS starting from OPINDEX and
3422 swaps two operands if it is profitable for binary operation
3423 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3424
3425 We pair ops with the same rank if possible.
3426
3427 The alternative we try is to see if STMT is a destructive
3428 update style statement, which is like:
3429 b = phi (a, ...)
3430 a = c + b;
3431 In that case, we want to use the destructive update form to
3432 expose the possible vectorizer sum reduction opportunity.
3433 In that case, the third operand will be the phi node. This
3434 check is not performed if STMT is null.
3435
3436 We could, of course, try to be better as noted above, and do a
3437 lot of work to try to find these opportunities in >3 operand
3438 cases, but it is unlikely to be worth it. */
3439
3440 static void
3441 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3442 unsigned int opindex, gimple stmt)
3443 {
3444 operand_entry_t oe1, oe2, oe3;
3445
3446 oe1 = ops[opindex];
3447 oe2 = ops[opindex + 1];
3448 oe3 = ops[opindex + 2];
3449
3450 if ((oe1->rank == oe2->rank
3451 && oe2->rank != oe3->rank)
3452 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3453 && !is_phi_for_stmt (stmt, oe1->op)
3454 && !is_phi_for_stmt (stmt, oe2->op)))
3455 {
3456 struct operand_entry temp = *oe3;
3457 oe3->op = oe1->op;
3458 oe3->rank = oe1->rank;
3459 oe1->op = temp.op;
3460 oe1->rank= temp.rank;
3461 }
3462 else if ((oe1->rank == oe3->rank
3463 && oe2->rank != oe3->rank)
3464 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3465 && !is_phi_for_stmt (stmt, oe1->op)
3466 && !is_phi_for_stmt (stmt, oe3->op)))
3467 {
3468 struct operand_entry temp = *oe2;
3469 oe2->op = oe1->op;
3470 oe2->rank = oe1->rank;
3471 oe1->op = temp.op;
3472 oe1->rank = temp.rank;
3473 }
3474 }
3475
3476 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3477 two definitions, otherwise return STMT. */
3478
3479 static inline gimple
3480 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3481 {
3482 if (TREE_CODE (rhs1) == SSA_NAME
3483 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3484 stmt = SSA_NAME_DEF_STMT (rhs1);
3485 if (TREE_CODE (rhs2) == SSA_NAME
3486 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3487 stmt = SSA_NAME_DEF_STMT (rhs2);
3488 return stmt;
3489 }
3490
3491 /* Recursively rewrite our linearized statements so that the operators
3492 match those in OPS[OPINDEX], putting the computation in rank
3493 order. Return new lhs. */
3494
3495 static tree
3496 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3497 vec<operand_entry_t> ops, bool changed)
3498 {
3499 tree rhs1 = gimple_assign_rhs1 (stmt);
3500 tree rhs2 = gimple_assign_rhs2 (stmt);
3501 tree lhs = gimple_assign_lhs (stmt);
3502 operand_entry_t oe;
3503
3504 /* The final recursion case for this function is that you have
3505 exactly two operations left.
3506 If we had exactly one op in the entire list to start with, we
3507 would have never called this function, and the tail recursion
3508 rewrites them one at a time. */
3509 if (opindex + 2 == ops.length ())
3510 {
3511 operand_entry_t oe1, oe2;
3512
3513 oe1 = ops[opindex];
3514 oe2 = ops[opindex + 1];
3515
3516 if (rhs1 != oe1->op || rhs2 != oe2->op)
3517 {
3518 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3519 unsigned int uid = gimple_uid (stmt);
3520
3521 if (dump_file && (dump_flags & TDF_DETAILS))
3522 {
3523 fprintf (dump_file, "Transforming ");
3524 print_gimple_stmt (dump_file, stmt, 0, 0);
3525 }
3526
3527 /* Even when changed is false, reassociation could have e.g. removed
3528 some redundant operations, so unless we are just swapping the
3529 arguments or unless there is no change at all (then we just
3530 return lhs), force creation of a new SSA_NAME. */
3531 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3532 {
3533 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3534 lhs = make_ssa_name (TREE_TYPE (lhs));
3535 stmt
3536 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3537 oe1->op, oe2->op);
3538 gimple_set_uid (stmt, uid);
3539 gimple_set_visited (stmt, true);
3540 if (insert_point == gsi_stmt (gsi))
3541 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3542 else
3543 insert_stmt_after (stmt, insert_point);
3544 }
3545 else
3546 {
3547 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3548 == stmt);
3549 gimple_assign_set_rhs1 (stmt, oe1->op);
3550 gimple_assign_set_rhs2 (stmt, oe2->op);
3551 update_stmt (stmt);
3552 }
3553
3554 if (rhs1 != oe1->op && rhs1 != oe2->op)
3555 remove_visited_stmt_chain (rhs1);
3556
3557 if (dump_file && (dump_flags & TDF_DETAILS))
3558 {
3559 fprintf (dump_file, " into ");
3560 print_gimple_stmt (dump_file, stmt, 0, 0);
3561 }
3562 }
3563 return lhs;
3564 }
3565
3566 /* If we hit here, we should have 3 or more ops left. */
3567 gcc_assert (opindex + 2 < ops.length ());
3568
3569 /* Rewrite the next operator. */
3570 oe = ops[opindex];
3571
3572 /* Recurse on the LHS of the binary operator, which is guaranteed to
3573 be the non-leaf side. */
3574 tree new_rhs1
3575 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3576 changed || oe->op != rhs2);
3577
3578 if (oe->op != rhs2 || new_rhs1 != rhs1)
3579 {
3580 if (dump_file && (dump_flags & TDF_DETAILS))
3581 {
3582 fprintf (dump_file, "Transforming ");
3583 print_gimple_stmt (dump_file, stmt, 0, 0);
3584 }
3585
3586 /* If changed is false, this is either opindex == 0
3587 or all outer rhs2's were equal to corresponding oe->op,
3588 and powi_result is NULL.
3589 That means lhs is equivalent before and after reassociation.
3590 Otherwise ensure the old lhs SSA_NAME is not reused and
3591 create a new stmt as well, so that any debug stmts will be
3592 properly adjusted. */
3593 if (changed)
3594 {
3595 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3596 unsigned int uid = gimple_uid (stmt);
3597 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3598
3599 lhs = make_ssa_name (TREE_TYPE (lhs));
3600 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3601 new_rhs1, oe->op);
3602 gimple_set_uid (stmt, uid);
3603 gimple_set_visited (stmt, true);
3604 if (insert_point == gsi_stmt (gsi))
3605 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3606 else
3607 insert_stmt_after (stmt, insert_point);
3608 }
3609 else
3610 {
3611 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3612 == stmt);
3613 gimple_assign_set_rhs1 (stmt, new_rhs1);
3614 gimple_assign_set_rhs2 (stmt, oe->op);
3615 update_stmt (stmt);
3616 }
3617
3618 if (dump_file && (dump_flags & TDF_DETAILS))
3619 {
3620 fprintf (dump_file, " into ");
3621 print_gimple_stmt (dump_file, stmt, 0, 0);
3622 }
3623 }
3624 return lhs;
3625 }
3626
3627 /* Find out how many cycles we need to compute statements chain.
3628 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3629 maximum number of independent statements we may execute per cycle. */
3630
3631 static int
3632 get_required_cycles (int ops_num, int cpu_width)
3633 {
3634 int res;
3635 int elog;
3636 unsigned int rest;
3637
3638 /* While we have more than 2 * cpu_width operands
3639 we may reduce number of operands by cpu_width
3640 per cycle. */
3641 res = ops_num / (2 * cpu_width);
3642
3643 /* Remained operands count may be reduced twice per cycle
3644 until we have only one operand. */
3645 rest = (unsigned)(ops_num - res * cpu_width);
3646 elog = exact_log2 (rest);
3647 if (elog >= 0)
3648 res += elog;
3649 else
3650 res += floor_log2 (rest) + 1;
3651
3652 return res;
3653 }
3654
3655 /* Returns an optimal number of registers to use for computation of
3656 given statements. */
3657
3658 static int
3659 get_reassociation_width (int ops_num, enum tree_code opc,
3660 machine_mode mode)
3661 {
3662 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3663 int width;
3664 int width_min;
3665 int cycles_best;
3666
3667 if (param_width > 0)
3668 width = param_width;
3669 else
3670 width = targetm.sched.reassociation_width (opc, mode);
3671
3672 if (width == 1)
3673 return width;
3674
3675 /* Get the minimal time required for sequence computation. */
3676 cycles_best = get_required_cycles (ops_num, width);
3677
3678 /* Check if we may use less width and still compute sequence for
3679 the same time. It will allow us to reduce registers usage.
3680 get_required_cycles is monotonically increasing with lower width
3681 so we can perform a binary search for the minimal width that still
3682 results in the optimal cycle count. */
3683 width_min = 1;
3684 while (width > width_min)
3685 {
3686 int width_mid = (width + width_min) / 2;
3687
3688 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3689 width = width_mid;
3690 else if (width_min < width_mid)
3691 width_min = width_mid;
3692 else
3693 break;
3694 }
3695
3696 return width;
3697 }
3698
3699 /* Recursively rewrite our linearized statements so that the operators
3700 match those in OPS[OPINDEX], putting the computation in rank
3701 order and trying to allow operations to be executed in
3702 parallel. */
3703
3704 static void
3705 rewrite_expr_tree_parallel (gassign *stmt, int width,
3706 vec<operand_entry_t> ops)
3707 {
3708 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3709 int op_num = ops.length ();
3710 int stmt_num = op_num - 1;
3711 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3712 int op_index = op_num - 1;
3713 int stmt_index = 0;
3714 int ready_stmts_end = 0;
3715 int i = 0;
3716 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3717
3718 /* We start expression rewriting from the top statements.
3719 So, in this loop we create a full list of statements
3720 we will work with. */
3721 stmts[stmt_num - 1] = stmt;
3722 for (i = stmt_num - 2; i >= 0; i--)
3723 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3724
3725 for (i = 0; i < stmt_num; i++)
3726 {
3727 tree op1, op2;
3728
3729 /* Determine whether we should use results of
3730 already handled statements or not. */
3731 if (ready_stmts_end == 0
3732 && (i - stmt_index >= width || op_index < 1))
3733 ready_stmts_end = i;
3734
3735 /* Now we choose operands for the next statement. Non zero
3736 value in ready_stmts_end means here that we should use
3737 the result of already generated statements as new operand. */
3738 if (ready_stmts_end > 0)
3739 {
3740 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3741 if (ready_stmts_end > stmt_index)
3742 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3743 else if (op_index >= 0)
3744 op2 = ops[op_index--]->op;
3745 else
3746 {
3747 gcc_assert (stmt_index < i);
3748 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3749 }
3750
3751 if (stmt_index >= ready_stmts_end)
3752 ready_stmts_end = 0;
3753 }
3754 else
3755 {
3756 if (op_index > 1)
3757 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3758 op2 = ops[op_index--]->op;
3759 op1 = ops[op_index--]->op;
3760 }
3761
3762 /* If we emit the last statement then we should put
3763 operands into the last statement. It will also
3764 break the loop. */
3765 if (op_index < 0 && stmt_index == i)
3766 i = stmt_num - 1;
3767
3768 if (dump_file && (dump_flags & TDF_DETAILS))
3769 {
3770 fprintf (dump_file, "Transforming ");
3771 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3772 }
3773
3774 /* We keep original statement only for the last one. All
3775 others are recreated. */
3776 if (i == stmt_num - 1)
3777 {
3778 gimple_assign_set_rhs1 (stmts[i], op1);
3779 gimple_assign_set_rhs2 (stmts[i], op2);
3780 update_stmt (stmts[i]);
3781 }
3782 else
3783 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3784
3785 if (dump_file && (dump_flags & TDF_DETAILS))
3786 {
3787 fprintf (dump_file, " into ");
3788 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3789 }
3790 }
3791
3792 remove_visited_stmt_chain (last_rhs1);
3793 }
3794
3795 /* Transform STMT, which is really (A +B) + (C + D) into the left
3796 linear form, ((A+B)+C)+D.
3797 Recurse on D if necessary. */
3798
3799 static void
3800 linearize_expr (gimple stmt)
3801 {
3802 gimple_stmt_iterator gsi;
3803 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3804 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3805 gimple oldbinrhs = binrhs;
3806 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3807 gimple newbinrhs = NULL;
3808 struct loop *loop = loop_containing_stmt (stmt);
3809 tree lhs = gimple_assign_lhs (stmt);
3810
3811 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3812 && is_reassociable_op (binrhs, rhscode, loop));
3813
3814 gsi = gsi_for_stmt (stmt);
3815
3816 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3817 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3818 gimple_assign_rhs_code (binrhs),
3819 gimple_assign_lhs (binlhs),
3820 gimple_assign_rhs2 (binrhs));
3821 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3822 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3823 gimple_set_uid (binrhs, gimple_uid (stmt));
3824
3825 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3826 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3827
3828 if (dump_file && (dump_flags & TDF_DETAILS))
3829 {
3830 fprintf (dump_file, "Linearized: ");
3831 print_gimple_stmt (dump_file, stmt, 0, 0);
3832 }
3833
3834 reassociate_stats.linearized++;
3835 update_stmt (stmt);
3836
3837 gsi = gsi_for_stmt (oldbinrhs);
3838 reassoc_remove_stmt (&gsi);
3839 release_defs (oldbinrhs);
3840
3841 gimple_set_visited (stmt, true);
3842 gimple_set_visited (binlhs, true);
3843 gimple_set_visited (binrhs, true);
3844
3845 /* Tail recurse on the new rhs if it still needs reassociation. */
3846 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3847 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3848 want to change the algorithm while converting to tuples. */
3849 linearize_expr (stmt);
3850 }
3851
3852 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3853 it. Otherwise, return NULL. */
3854
3855 static gimple
3856 get_single_immediate_use (tree lhs)
3857 {
3858 use_operand_p immuse;
3859 gimple immusestmt;
3860
3861 if (TREE_CODE (lhs) == SSA_NAME
3862 && single_imm_use (lhs, &immuse, &immusestmt)
3863 && is_gimple_assign (immusestmt))
3864 return immusestmt;
3865
3866 return NULL;
3867 }
3868
3869 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3870 representing the negated value. Insertions of any necessary
3871 instructions go before GSI.
3872 This function is recursive in that, if you hand it "a_5" as the
3873 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3874 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3875
3876 static tree
3877 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3878 {
3879 gimple negatedefstmt = NULL;
3880 tree resultofnegate;
3881 gimple_stmt_iterator gsi;
3882 unsigned int uid;
3883
3884 /* If we are trying to negate a name, defined by an add, negate the
3885 add operands instead. */
3886 if (TREE_CODE (tonegate) == SSA_NAME)
3887 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3888 if (TREE_CODE (tonegate) == SSA_NAME
3889 && is_gimple_assign (negatedefstmt)
3890 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3891 && has_single_use (gimple_assign_lhs (negatedefstmt))
3892 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3893 {
3894 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3895 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3896 tree lhs = gimple_assign_lhs (negatedefstmt);
3897 gimple g;
3898
3899 gsi = gsi_for_stmt (negatedefstmt);
3900 rhs1 = negate_value (rhs1, &gsi);
3901
3902 gsi = gsi_for_stmt (negatedefstmt);
3903 rhs2 = negate_value (rhs2, &gsi);
3904
3905 gsi = gsi_for_stmt (negatedefstmt);
3906 lhs = make_ssa_name (TREE_TYPE (lhs));
3907 gimple_set_visited (negatedefstmt, true);
3908 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3909 gimple_set_uid (g, gimple_uid (negatedefstmt));
3910 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3911 return lhs;
3912 }
3913
3914 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3915 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3916 NULL_TREE, true, GSI_SAME_STMT);
3917 gsi = *gsip;
3918 uid = gimple_uid (gsi_stmt (gsi));
3919 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3920 {
3921 gimple stmt = gsi_stmt (gsi);
3922 if (gimple_uid (stmt) != 0)
3923 break;
3924 gimple_set_uid (stmt, uid);
3925 }
3926 return resultofnegate;
3927 }
3928
3929 /* Return true if we should break up the subtract in STMT into an add
3930 with negate. This is true when we the subtract operands are really
3931 adds, or the subtract itself is used in an add expression. In
3932 either case, breaking up the subtract into an add with negate
3933 exposes the adds to reassociation. */
3934
3935 static bool
3936 should_break_up_subtract (gimple stmt)
3937 {
3938 tree lhs = gimple_assign_lhs (stmt);
3939 tree binlhs = gimple_assign_rhs1 (stmt);
3940 tree binrhs = gimple_assign_rhs2 (stmt);
3941 gimple immusestmt;
3942 struct loop *loop = loop_containing_stmt (stmt);
3943
3944 if (TREE_CODE (binlhs) == SSA_NAME
3945 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3946 return true;
3947
3948 if (TREE_CODE (binrhs) == SSA_NAME
3949 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3950 return true;
3951
3952 if (TREE_CODE (lhs) == SSA_NAME
3953 && (immusestmt = get_single_immediate_use (lhs))
3954 && is_gimple_assign (immusestmt)
3955 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3956 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3957 return true;
3958 return false;
3959 }
3960
3961 /* Transform STMT from A - B into A + -B. */
3962
3963 static void
3964 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3965 {
3966 tree rhs1 = gimple_assign_rhs1 (stmt);
3967 tree rhs2 = gimple_assign_rhs2 (stmt);
3968
3969 if (dump_file && (dump_flags & TDF_DETAILS))
3970 {
3971 fprintf (dump_file, "Breaking up subtract ");
3972 print_gimple_stmt (dump_file, stmt, 0, 0);
3973 }
3974
3975 rhs2 = negate_value (rhs2, gsip);
3976 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3977 update_stmt (stmt);
3978 }
3979
3980 /* Determine whether STMT is a builtin call that raises an SSA name
3981 to an integer power and has only one use. If so, and this is early
3982 reassociation and unsafe math optimizations are permitted, place
3983 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3984 If any of these conditions does not hold, return FALSE. */
3985
3986 static bool
3987 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3988 {
3989 tree fndecl, arg1;
3990 REAL_VALUE_TYPE c, cint;
3991
3992 if (!first_pass_instance
3993 || !flag_unsafe_math_optimizations
3994 || !is_gimple_call (stmt)
3995 || !has_single_use (gimple_call_lhs (stmt)))
3996 return false;
3997
3998 fndecl = gimple_call_fndecl (stmt);
3999
4000 if (!fndecl
4001 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
4002 return false;
4003
4004 switch (DECL_FUNCTION_CODE (fndecl))
4005 {
4006 CASE_FLT_FN (BUILT_IN_POW):
4007 if (flag_errno_math)
4008 return false;
4009
4010 *base = gimple_call_arg (stmt, 0);
4011 arg1 = gimple_call_arg (stmt, 1);
4012
4013 if (TREE_CODE (arg1) != REAL_CST)
4014 return false;
4015
4016 c = TREE_REAL_CST (arg1);
4017
4018 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4019 return false;
4020
4021 *exponent = real_to_integer (&c);
4022 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4023 if (!real_identical (&c, &cint))
4024 return false;
4025
4026 break;
4027
4028 CASE_FLT_FN (BUILT_IN_POWI):
4029 *base = gimple_call_arg (stmt, 0);
4030 arg1 = gimple_call_arg (stmt, 1);
4031
4032 if (!tree_fits_shwi_p (arg1))
4033 return false;
4034
4035 *exponent = tree_to_shwi (arg1);
4036 break;
4037
4038 default:
4039 return false;
4040 }
4041
4042 /* Expanding negative exponents is generally unproductive, so we don't
4043 complicate matters with those. Exponents of zero and one should
4044 have been handled by expression folding. */
4045 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4046 return false;
4047
4048 return true;
4049 }
4050
4051 /* Recursively linearize a binary expression that is the RHS of STMT.
4052 Place the operands of the expression tree in the vector named OPS. */
4053
4054 static void
4055 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4056 bool is_associative, bool set_visited)
4057 {
4058 tree binlhs = gimple_assign_rhs1 (stmt);
4059 tree binrhs = gimple_assign_rhs2 (stmt);
4060 gimple binlhsdef = NULL, binrhsdef = NULL;
4061 bool binlhsisreassoc = false;
4062 bool binrhsisreassoc = false;
4063 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4064 struct loop *loop = loop_containing_stmt (stmt);
4065 tree base = NULL_TREE;
4066 HOST_WIDE_INT exponent = 0;
4067
4068 if (set_visited)
4069 gimple_set_visited (stmt, true);
4070
4071 if (TREE_CODE (binlhs) == SSA_NAME)
4072 {
4073 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4074 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4075 && !stmt_could_throw_p (binlhsdef));
4076 }
4077
4078 if (TREE_CODE (binrhs) == SSA_NAME)
4079 {
4080 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4081 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4082 && !stmt_could_throw_p (binrhsdef));
4083 }
4084
4085 /* If the LHS is not reassociable, but the RHS is, we need to swap
4086 them. If neither is reassociable, there is nothing we can do, so
4087 just put them in the ops vector. If the LHS is reassociable,
4088 linearize it. If both are reassociable, then linearize the RHS
4089 and the LHS. */
4090
4091 if (!binlhsisreassoc)
4092 {
4093 /* If this is not a associative operation like division, give up. */
4094 if (!is_associative)
4095 {
4096 add_to_ops_vec (ops, binrhs);
4097 return;
4098 }
4099
4100 if (!binrhsisreassoc)
4101 {
4102 if (rhscode == MULT_EXPR
4103 && TREE_CODE (binrhs) == SSA_NAME
4104 && acceptable_pow_call (binrhsdef, &base, &exponent))
4105 {
4106 add_repeat_to_ops_vec (ops, base, exponent);
4107 gimple_set_visited (binrhsdef, true);
4108 }
4109 else
4110 add_to_ops_vec (ops, binrhs);
4111
4112 if (rhscode == MULT_EXPR
4113 && TREE_CODE (binlhs) == SSA_NAME
4114 && acceptable_pow_call (binlhsdef, &base, &exponent))
4115 {
4116 add_repeat_to_ops_vec (ops, base, exponent);
4117 gimple_set_visited (binlhsdef, true);
4118 }
4119 else
4120 add_to_ops_vec (ops, binlhs);
4121
4122 return;
4123 }
4124
4125 if (dump_file && (dump_flags & TDF_DETAILS))
4126 {
4127 fprintf (dump_file, "swapping operands of ");
4128 print_gimple_stmt (dump_file, stmt, 0, 0);
4129 }
4130
4131 swap_ssa_operands (stmt,
4132 gimple_assign_rhs1_ptr (stmt),
4133 gimple_assign_rhs2_ptr (stmt));
4134 update_stmt (stmt);
4135
4136 if (dump_file && (dump_flags & TDF_DETAILS))
4137 {
4138 fprintf (dump_file, " is now ");
4139 print_gimple_stmt (dump_file, stmt, 0, 0);
4140 }
4141
4142 /* We want to make it so the lhs is always the reassociative op,
4143 so swap. */
4144 std::swap (binlhs, binrhs);
4145 }
4146 else if (binrhsisreassoc)
4147 {
4148 linearize_expr (stmt);
4149 binlhs = gimple_assign_rhs1 (stmt);
4150 binrhs = gimple_assign_rhs2 (stmt);
4151 }
4152
4153 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4154 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4155 rhscode, loop));
4156 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4157 is_associative, set_visited);
4158
4159 if (rhscode == MULT_EXPR
4160 && TREE_CODE (binrhs) == SSA_NAME
4161 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4162 {
4163 add_repeat_to_ops_vec (ops, base, exponent);
4164 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4165 }
4166 else
4167 add_to_ops_vec (ops, binrhs);
4168 }
4169
4170 /* Repropagate the negates back into subtracts, since no other pass
4171 currently does it. */
4172
4173 static void
4174 repropagate_negates (void)
4175 {
4176 unsigned int i = 0;
4177 tree negate;
4178
4179 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4180 {
4181 gimple user = get_single_immediate_use (negate);
4182
4183 if (!user || !is_gimple_assign (user))
4184 continue;
4185
4186 /* The negate operand can be either operand of a PLUS_EXPR
4187 (it can be the LHS if the RHS is a constant for example).
4188
4189 Force the negate operand to the RHS of the PLUS_EXPR, then
4190 transform the PLUS_EXPR into a MINUS_EXPR. */
4191 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4192 {
4193 /* If the negated operand appears on the LHS of the
4194 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4195 to force the negated operand to the RHS of the PLUS_EXPR. */
4196 if (gimple_assign_rhs1 (user) == negate)
4197 {
4198 swap_ssa_operands (user,
4199 gimple_assign_rhs1_ptr (user),
4200 gimple_assign_rhs2_ptr (user));
4201 }
4202
4203 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4204 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4205 if (gimple_assign_rhs2 (user) == negate)
4206 {
4207 tree rhs1 = gimple_assign_rhs1 (user);
4208 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4209 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4210 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4211 update_stmt (user);
4212 }
4213 }
4214 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4215 {
4216 if (gimple_assign_rhs1 (user) == negate)
4217 {
4218 /* We have
4219 x = -a
4220 y = x - b
4221 which we transform into
4222 x = a + b
4223 y = -x .
4224 This pushes down the negate which we possibly can merge
4225 into some other operation, hence insert it into the
4226 plus_negates vector. */
4227 gimple feed = SSA_NAME_DEF_STMT (negate);
4228 tree a = gimple_assign_rhs1 (feed);
4229 tree b = gimple_assign_rhs2 (user);
4230 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4231 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4232 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4233 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4234 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4235 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4236 user = gsi_stmt (gsi2);
4237 update_stmt (user);
4238 reassoc_remove_stmt (&gsi);
4239 release_defs (feed);
4240 plus_negates.safe_push (gimple_assign_lhs (user));
4241 }
4242 else
4243 {
4244 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4245 rid of one operation. */
4246 gimple feed = SSA_NAME_DEF_STMT (negate);
4247 tree a = gimple_assign_rhs1 (feed);
4248 tree rhs1 = gimple_assign_rhs1 (user);
4249 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4250 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4251 update_stmt (gsi_stmt (gsi));
4252 }
4253 }
4254 }
4255 }
4256
4257 /* Returns true if OP is of a type for which we can do reassociation.
4258 That is for integral or non-saturating fixed-point types, and for
4259 floating point type when associative-math is enabled. */
4260
4261 static bool
4262 can_reassociate_p (tree op)
4263 {
4264 tree type = TREE_TYPE (op);
4265 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4266 || NON_SAT_FIXED_POINT_TYPE_P (type)
4267 || (flag_associative_math && FLOAT_TYPE_P (type)))
4268 return true;
4269 return false;
4270 }
4271
4272 /* Break up subtract operations in block BB.
4273
4274 We do this top down because we don't know whether the subtract is
4275 part of a possible chain of reassociation except at the top.
4276
4277 IE given
4278 d = f + g
4279 c = a + e
4280 b = c - d
4281 q = b - r
4282 k = t - q
4283
4284 we want to break up k = t - q, but we won't until we've transformed q
4285 = b - r, which won't be broken up until we transform b = c - d.
4286
4287 En passant, clear the GIMPLE visited flag on every statement
4288 and set UIDs within each basic block. */
4289
4290 static void
4291 break_up_subtract_bb (basic_block bb)
4292 {
4293 gimple_stmt_iterator gsi;
4294 basic_block son;
4295 unsigned int uid = 1;
4296
4297 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4298 {
4299 gimple stmt = gsi_stmt (gsi);
4300 gimple_set_visited (stmt, false);
4301 gimple_set_uid (stmt, uid++);
4302
4303 if (!is_gimple_assign (stmt)
4304 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4305 continue;
4306
4307 /* Look for simple gimple subtract operations. */
4308 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4309 {
4310 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4311 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4312 continue;
4313
4314 /* Check for a subtract used only in an addition. If this
4315 is the case, transform it into add of a negate for better
4316 reassociation. IE transform C = A-B into C = A + -B if C
4317 is only used in an addition. */
4318 if (should_break_up_subtract (stmt))
4319 break_up_subtract (stmt, &gsi);
4320 }
4321 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4322 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4323 plus_negates.safe_push (gimple_assign_lhs (stmt));
4324 }
4325 for (son = first_dom_son (CDI_DOMINATORS, bb);
4326 son;
4327 son = next_dom_son (CDI_DOMINATORS, son))
4328 break_up_subtract_bb (son);
4329 }
4330
4331 /* Used for repeated factor analysis. */
4332 struct repeat_factor_d
4333 {
4334 /* An SSA name that occurs in a multiply chain. */
4335 tree factor;
4336
4337 /* Cached rank of the factor. */
4338 unsigned rank;
4339
4340 /* Number of occurrences of the factor in the chain. */
4341 HOST_WIDE_INT count;
4342
4343 /* An SSA name representing the product of this factor and
4344 all factors appearing later in the repeated factor vector. */
4345 tree repr;
4346 };
4347
4348 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4349 typedef const struct repeat_factor_d *const_repeat_factor_t;
4350
4351
4352 static vec<repeat_factor> repeat_factor_vec;
4353
4354 /* Used for sorting the repeat factor vector. Sort primarily by
4355 ascending occurrence count, secondarily by descending rank. */
4356
4357 static int
4358 compare_repeat_factors (const void *x1, const void *x2)
4359 {
4360 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4361 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4362
4363 if (rf1->count != rf2->count)
4364 return rf1->count - rf2->count;
4365
4366 return rf2->rank - rf1->rank;
4367 }
4368
4369 /* Look for repeated operands in OPS in the multiply tree rooted at
4370 STMT. Replace them with an optimal sequence of multiplies and powi
4371 builtin calls, and remove the used operands from OPS. Return an
4372 SSA name representing the value of the replacement sequence. */
4373
4374 static tree
4375 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4376 {
4377 unsigned i, j, vec_len;
4378 int ii;
4379 operand_entry_t oe;
4380 repeat_factor_t rf1, rf2;
4381 repeat_factor rfnew;
4382 tree result = NULL_TREE;
4383 tree target_ssa, iter_result;
4384 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4385 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4386 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4387 gimple mul_stmt, pow_stmt;
4388
4389 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4390 target. */
4391 if (!powi_fndecl)
4392 return NULL_TREE;
4393
4394 /* Allocate the repeated factor vector. */
4395 repeat_factor_vec.create (10);
4396
4397 /* Scan the OPS vector for all SSA names in the product and build
4398 up a vector of occurrence counts for each factor. */
4399 FOR_EACH_VEC_ELT (*ops, i, oe)
4400 {
4401 if (TREE_CODE (oe->op) == SSA_NAME)
4402 {
4403 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4404 {
4405 if (rf1->factor == oe->op)
4406 {
4407 rf1->count += oe->count;
4408 break;
4409 }
4410 }
4411
4412 if (j >= repeat_factor_vec.length ())
4413 {
4414 rfnew.factor = oe->op;
4415 rfnew.rank = oe->rank;
4416 rfnew.count = oe->count;
4417 rfnew.repr = NULL_TREE;
4418 repeat_factor_vec.safe_push (rfnew);
4419 }
4420 }
4421 }
4422
4423 /* Sort the repeated factor vector by (a) increasing occurrence count,
4424 and (b) decreasing rank. */
4425 repeat_factor_vec.qsort (compare_repeat_factors);
4426
4427 /* It is generally best to combine as many base factors as possible
4428 into a product before applying __builtin_powi to the result.
4429 However, the sort order chosen for the repeated factor vector
4430 allows us to cache partial results for the product of the base
4431 factors for subsequent use. When we already have a cached partial
4432 result from a previous iteration, it is best to make use of it
4433 before looking for another __builtin_pow opportunity.
4434
4435 As an example, consider x * x * y * y * y * z * z * z * z.
4436 We want to first compose the product x * y * z, raise it to the
4437 second power, then multiply this by y * z, and finally multiply
4438 by z. This can be done in 5 multiplies provided we cache y * z
4439 for use in both expressions:
4440
4441 t1 = y * z
4442 t2 = t1 * x
4443 t3 = t2 * t2
4444 t4 = t1 * t3
4445 result = t4 * z
4446
4447 If we instead ignored the cached y * z and first multiplied by
4448 the __builtin_pow opportunity z * z, we would get the inferior:
4449
4450 t1 = y * z
4451 t2 = t1 * x
4452 t3 = t2 * t2
4453 t4 = z * z
4454 t5 = t3 * t4
4455 result = t5 * y */
4456
4457 vec_len = repeat_factor_vec.length ();
4458
4459 /* Repeatedly look for opportunities to create a builtin_powi call. */
4460 while (true)
4461 {
4462 HOST_WIDE_INT power;
4463
4464 /* First look for the largest cached product of factors from
4465 preceding iterations. If found, create a builtin_powi for
4466 it if the minimum occurrence count for its factors is at
4467 least 2, or just use this cached product as our next
4468 multiplicand if the minimum occurrence count is 1. */
4469 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4470 {
4471 if (rf1->repr && rf1->count > 0)
4472 break;
4473 }
4474
4475 if (j < vec_len)
4476 {
4477 power = rf1->count;
4478
4479 if (power == 1)
4480 {
4481 iter_result = rf1->repr;
4482
4483 if (dump_file && (dump_flags & TDF_DETAILS))
4484 {
4485 unsigned elt;
4486 repeat_factor_t rf;
4487 fputs ("Multiplying by cached product ", dump_file);
4488 for (elt = j; elt < vec_len; elt++)
4489 {
4490 rf = &repeat_factor_vec[elt];
4491 print_generic_expr (dump_file, rf->factor, 0);
4492 if (elt < vec_len - 1)
4493 fputs (" * ", dump_file);
4494 }
4495 fputs ("\n", dump_file);
4496 }
4497 }
4498 else
4499 {
4500 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4501 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4502 build_int_cst (integer_type_node,
4503 power));
4504 gimple_call_set_lhs (pow_stmt, iter_result);
4505 gimple_set_location (pow_stmt, gimple_location (stmt));
4506 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4507
4508 if (dump_file && (dump_flags & TDF_DETAILS))
4509 {
4510 unsigned elt;
4511 repeat_factor_t rf;
4512 fputs ("Building __builtin_pow call for cached product (",
4513 dump_file);
4514 for (elt = j; elt < vec_len; elt++)
4515 {
4516 rf = &repeat_factor_vec[elt];
4517 print_generic_expr (dump_file, rf->factor, 0);
4518 if (elt < vec_len - 1)
4519 fputs (" * ", dump_file);
4520 }
4521 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4522 power);
4523 }
4524 }
4525 }
4526 else
4527 {
4528 /* Otherwise, find the first factor in the repeated factor
4529 vector whose occurrence count is at least 2. If no such
4530 factor exists, there are no builtin_powi opportunities
4531 remaining. */
4532 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4533 {
4534 if (rf1->count >= 2)
4535 break;
4536 }
4537
4538 if (j >= vec_len)
4539 break;
4540
4541 power = rf1->count;
4542
4543 if (dump_file && (dump_flags & TDF_DETAILS))
4544 {
4545 unsigned elt;
4546 repeat_factor_t rf;
4547 fputs ("Building __builtin_pow call for (", dump_file);
4548 for (elt = j; elt < vec_len; elt++)
4549 {
4550 rf = &repeat_factor_vec[elt];
4551 print_generic_expr (dump_file, rf->factor, 0);
4552 if (elt < vec_len - 1)
4553 fputs (" * ", dump_file);
4554 }
4555 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4556 }
4557
4558 reassociate_stats.pows_created++;
4559
4560 /* Visit each element of the vector in reverse order (so that
4561 high-occurrence elements are visited first, and within the
4562 same occurrence count, lower-ranked elements are visited
4563 first). Form a linear product of all elements in this order
4564 whose occurrencce count is at least that of element J.
4565 Record the SSA name representing the product of each element
4566 with all subsequent elements in the vector. */
4567 if (j == vec_len - 1)
4568 rf1->repr = rf1->factor;
4569 else
4570 {
4571 for (ii = vec_len - 2; ii >= (int)j; ii--)
4572 {
4573 tree op1, op2;
4574
4575 rf1 = &repeat_factor_vec[ii];
4576 rf2 = &repeat_factor_vec[ii + 1];
4577
4578 /* Init the last factor's representative to be itself. */
4579 if (!rf2->repr)
4580 rf2->repr = rf2->factor;
4581
4582 op1 = rf1->factor;
4583 op2 = rf2->repr;
4584
4585 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4586 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4587 op1, op2);
4588 gimple_set_location (mul_stmt, gimple_location (stmt));
4589 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4590 rf1->repr = target_ssa;
4591
4592 /* Don't reprocess the multiply we just introduced. */
4593 gimple_set_visited (mul_stmt, true);
4594 }
4595 }
4596
4597 /* Form a call to __builtin_powi for the maximum product
4598 just formed, raised to the power obtained earlier. */
4599 rf1 = &repeat_factor_vec[j];
4600 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4601 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4602 build_int_cst (integer_type_node,
4603 power));
4604 gimple_call_set_lhs (pow_stmt, iter_result);
4605 gimple_set_location (pow_stmt, gimple_location (stmt));
4606 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4607 }
4608
4609 /* If we previously formed at least one other builtin_powi call,
4610 form the product of this one and those others. */
4611 if (result)
4612 {
4613 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4614 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4615 result, iter_result);
4616 gimple_set_location (mul_stmt, gimple_location (stmt));
4617 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4618 gimple_set_visited (mul_stmt, true);
4619 result = new_result;
4620 }
4621 else
4622 result = iter_result;
4623
4624 /* Decrement the occurrence count of each element in the product
4625 by the count found above, and remove this many copies of each
4626 factor from OPS. */
4627 for (i = j; i < vec_len; i++)
4628 {
4629 unsigned k = power;
4630 unsigned n;
4631
4632 rf1 = &repeat_factor_vec[i];
4633 rf1->count -= power;
4634
4635 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4636 {
4637 if (oe->op == rf1->factor)
4638 {
4639 if (oe->count <= k)
4640 {
4641 ops->ordered_remove (n);
4642 k -= oe->count;
4643
4644 if (k == 0)
4645 break;
4646 }
4647 else
4648 {
4649 oe->count -= k;
4650 break;
4651 }
4652 }
4653 }
4654 }
4655 }
4656
4657 /* At this point all elements in the repeated factor vector have a
4658 remaining occurrence count of 0 or 1, and those with a count of 1
4659 don't have cached representatives. Re-sort the ops vector and
4660 clean up. */
4661 ops->qsort (sort_by_operand_rank);
4662 repeat_factor_vec.release ();
4663
4664 /* Return the final product computed herein. Note that there may
4665 still be some elements with single occurrence count left in OPS;
4666 those will be handled by the normal reassociation logic. */
4667 return result;
4668 }
4669
4670 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4671
4672 static void
4673 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4674 {
4675 tree rhs1;
4676
4677 if (dump_file && (dump_flags & TDF_DETAILS))
4678 {
4679 fprintf (dump_file, "Transforming ");
4680 print_gimple_stmt (dump_file, stmt, 0, 0);
4681 }
4682
4683 rhs1 = gimple_assign_rhs1 (stmt);
4684 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4685 update_stmt (stmt);
4686 remove_visited_stmt_chain (rhs1);
4687
4688 if (dump_file && (dump_flags & TDF_DETAILS))
4689 {
4690 fprintf (dump_file, " into ");
4691 print_gimple_stmt (dump_file, stmt, 0, 0);
4692 }
4693 }
4694
4695 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4696
4697 static void
4698 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4699 tree rhs1, tree rhs2)
4700 {
4701 if (dump_file && (dump_flags & TDF_DETAILS))
4702 {
4703 fprintf (dump_file, "Transforming ");
4704 print_gimple_stmt (dump_file, stmt, 0, 0);
4705 }
4706
4707 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4708 update_stmt (gsi_stmt (*gsi));
4709 remove_visited_stmt_chain (rhs1);
4710
4711 if (dump_file && (dump_flags & TDF_DETAILS))
4712 {
4713 fprintf (dump_file, " into ");
4714 print_gimple_stmt (dump_file, stmt, 0, 0);
4715 }
4716 }
4717
4718 /* Reassociate expressions in basic block BB and its post-dominator as
4719 children. */
4720
4721 static void
4722 reassociate_bb (basic_block bb)
4723 {
4724 gimple_stmt_iterator gsi;
4725 basic_block son;
4726 gimple stmt = last_stmt (bb);
4727
4728 if (stmt && !gimple_visited_p (stmt))
4729 maybe_optimize_range_tests (stmt);
4730
4731 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4732 {
4733 stmt = gsi_stmt (gsi);
4734
4735 if (is_gimple_assign (stmt)
4736 && !stmt_could_throw_p (stmt))
4737 {
4738 tree lhs, rhs1, rhs2;
4739 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4740
4741 /* If this is not a gimple binary expression, there is
4742 nothing for us to do with it. */
4743 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4744 continue;
4745
4746 /* If this was part of an already processed statement,
4747 we don't need to touch it again. */
4748 if (gimple_visited_p (stmt))
4749 {
4750 /* This statement might have become dead because of previous
4751 reassociations. */
4752 if (has_zero_uses (gimple_get_lhs (stmt)))
4753 {
4754 reassoc_remove_stmt (&gsi);
4755 release_defs (stmt);
4756 /* We might end up removing the last stmt above which
4757 places the iterator to the end of the sequence.
4758 Reset it to the last stmt in this case which might
4759 be the end of the sequence as well if we removed
4760 the last statement of the sequence. In which case
4761 we need to bail out. */
4762 if (gsi_end_p (gsi))
4763 {
4764 gsi = gsi_last_bb (bb);
4765 if (gsi_end_p (gsi))
4766 break;
4767 }
4768 }
4769 continue;
4770 }
4771
4772 lhs = gimple_assign_lhs (stmt);
4773 rhs1 = gimple_assign_rhs1 (stmt);
4774 rhs2 = gimple_assign_rhs2 (stmt);
4775
4776 /* For non-bit or min/max operations we can't associate
4777 all types. Verify that here. */
4778 if (rhs_code != BIT_IOR_EXPR
4779 && rhs_code != BIT_AND_EXPR
4780 && rhs_code != BIT_XOR_EXPR
4781 && rhs_code != MIN_EXPR
4782 && rhs_code != MAX_EXPR
4783 && (!can_reassociate_p (lhs)
4784 || !can_reassociate_p (rhs1)
4785 || !can_reassociate_p (rhs2)))
4786 continue;
4787
4788 if (associative_tree_code (rhs_code))
4789 {
4790 auto_vec<operand_entry_t> ops;
4791 tree powi_result = NULL_TREE;
4792
4793 /* There may be no immediate uses left by the time we
4794 get here because we may have eliminated them all. */
4795 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4796 continue;
4797
4798 gimple_set_visited (stmt, true);
4799 linearize_expr_tree (&ops, stmt, true, true);
4800 ops.qsort (sort_by_operand_rank);
4801 optimize_ops_list (rhs_code, &ops);
4802 if (undistribute_ops_list (rhs_code, &ops,
4803 loop_containing_stmt (stmt)))
4804 {
4805 ops.qsort (sort_by_operand_rank);
4806 optimize_ops_list (rhs_code, &ops);
4807 }
4808
4809 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4810 optimize_range_tests (rhs_code, &ops);
4811
4812 if (first_pass_instance
4813 && rhs_code == MULT_EXPR
4814 && flag_unsafe_math_optimizations)
4815 powi_result = attempt_builtin_powi (stmt, &ops);
4816
4817 /* If the operand vector is now empty, all operands were
4818 consumed by the __builtin_powi optimization. */
4819 if (ops.length () == 0)
4820 transform_stmt_to_copy (&gsi, stmt, powi_result);
4821 else if (ops.length () == 1)
4822 {
4823 tree last_op = ops.last ()->op;
4824
4825 if (powi_result)
4826 transform_stmt_to_multiply (&gsi, stmt, last_op,
4827 powi_result);
4828 else
4829 transform_stmt_to_copy (&gsi, stmt, last_op);
4830 }
4831 else
4832 {
4833 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4834 int ops_num = ops.length ();
4835 int width = get_reassociation_width (ops_num, rhs_code, mode);
4836 tree new_lhs = lhs;
4837
4838 if (dump_file && (dump_flags & TDF_DETAILS))
4839 fprintf (dump_file,
4840 "Width = %d was chosen for reassociation\n", width);
4841
4842 if (width > 1
4843 && ops.length () > 3)
4844 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4845 width, ops);
4846 else
4847 {
4848 /* When there are three operands left, we want
4849 to make sure the ones that get the double
4850 binary op are chosen wisely. */
4851 int len = ops.length ();
4852 if (len >= 3)
4853 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4854
4855 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4856 powi_result != NULL);
4857 }
4858
4859 /* If we combined some repeated factors into a
4860 __builtin_powi call, multiply that result by the
4861 reassociated operands. */
4862 if (powi_result)
4863 {
4864 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4865 tree type = TREE_TYPE (lhs);
4866 tree target_ssa = make_temp_ssa_name (type, NULL,
4867 "reassocpow");
4868 gimple_set_lhs (lhs_stmt, target_ssa);
4869 update_stmt (lhs_stmt);
4870 if (lhs != new_lhs)
4871 target_ssa = new_lhs;
4872 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4873 powi_result, target_ssa);
4874 gimple_set_location (mul_stmt, gimple_location (stmt));
4875 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4876 }
4877 }
4878 }
4879 }
4880 }
4881 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4882 son;
4883 son = next_dom_son (CDI_POST_DOMINATORS, son))
4884 reassociate_bb (son);
4885 }
4886
4887 /* Add jumps around shifts for range tests turned into bit tests.
4888 For each SSA_NAME VAR we have code like:
4889 VAR = ...; // final stmt of range comparison
4890 // bit test here...;
4891 OTHERVAR = ...; // final stmt of the bit test sequence
4892 RES = VAR | OTHERVAR;
4893 Turn the above into:
4894 VAR = ...;
4895 if (VAR != 0)
4896 goto <l3>;
4897 else
4898 goto <l2>;
4899 <l2>:
4900 // bit test here...;
4901 OTHERVAR = ...;
4902 <l3>:
4903 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4904
4905 static void
4906 branch_fixup (void)
4907 {
4908 tree var;
4909 unsigned int i;
4910
4911 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4912 {
4913 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4914 gimple use_stmt;
4915 use_operand_p use;
4916 bool ok = single_imm_use (var, &use, &use_stmt);
4917 gcc_assert (ok
4918 && is_gimple_assign (use_stmt)
4919 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4920 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4921
4922 basic_block cond_bb = gimple_bb (def_stmt);
4923 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4924 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4925
4926 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4927 gimple g = gimple_build_cond (NE_EXPR, var,
4928 build_zero_cst (TREE_TYPE (var)),
4929 NULL_TREE, NULL_TREE);
4930 location_t loc = gimple_location (use_stmt);
4931 gimple_set_location (g, loc);
4932 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4933
4934 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4935 etrue->probability = REG_BR_PROB_BASE / 2;
4936 etrue->count = cond_bb->count / 2;
4937 edge efalse = find_edge (cond_bb, then_bb);
4938 efalse->flags = EDGE_FALSE_VALUE;
4939 efalse->probability -= etrue->probability;
4940 efalse->count -= etrue->count;
4941 then_bb->count -= etrue->count;
4942
4943 tree othervar = NULL_TREE;
4944 if (gimple_assign_rhs1 (use_stmt) == var)
4945 othervar = gimple_assign_rhs2 (use_stmt);
4946 else if (gimple_assign_rhs2 (use_stmt) == var)
4947 othervar = gimple_assign_rhs1 (use_stmt);
4948 else
4949 gcc_unreachable ();
4950 tree lhs = gimple_assign_lhs (use_stmt);
4951 gphi *phi = create_phi_node (lhs, merge_bb);
4952 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4953 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4954 gsi = gsi_for_stmt (use_stmt);
4955 gsi_remove (&gsi, true);
4956
4957 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4958 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4959 }
4960 reassoc_branch_fixups.release ();
4961 }
4962
4963 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4964 void debug_ops_vector (vec<operand_entry_t> ops);
4965
4966 /* Dump the operand entry vector OPS to FILE. */
4967
4968 void
4969 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4970 {
4971 operand_entry_t oe;
4972 unsigned int i;
4973
4974 FOR_EACH_VEC_ELT (ops, i, oe)
4975 {
4976 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4977 print_generic_expr (file, oe->op, 0);
4978 }
4979 }
4980
4981 /* Dump the operand entry vector OPS to STDERR. */
4982
4983 DEBUG_FUNCTION void
4984 debug_ops_vector (vec<operand_entry_t> ops)
4985 {
4986 dump_ops_vector (stderr, ops);
4987 }
4988
4989 static void
4990 do_reassoc (void)
4991 {
4992 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4993 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4994 }
4995
4996 /* Initialize the reassociation pass. */
4997
4998 static void
4999 init_reassoc (void)
5000 {
5001 int i;
5002 long rank = 2;
5003 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5004
5005 /* Find the loops, so that we can prevent moving calculations in
5006 them. */
5007 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5008
5009 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5010
5011 operand_entry_pool = create_alloc_pool ("operand entry pool",
5012 sizeof (struct operand_entry), 30);
5013 next_operand_entry_id = 0;
5014
5015 /* Reverse RPO (Reverse Post Order) will give us something where
5016 deeper loops come later. */
5017 pre_and_rev_post_order_compute (NULL, bbs, false);
5018 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5019 operand_rank = new hash_map<tree, long>;
5020
5021 /* Give each default definition a distinct rank. This includes
5022 parameters and the static chain. Walk backwards over all
5023 SSA names so that we get proper rank ordering according
5024 to tree_swap_operands_p. */
5025 for (i = num_ssa_names - 1; i > 0; --i)
5026 {
5027 tree name = ssa_name (i);
5028 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5029 insert_operand_rank (name, ++rank);
5030 }
5031
5032 /* Set up rank for each BB */
5033 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5034 bb_rank[bbs[i]] = ++rank << 16;
5035
5036 free (bbs);
5037 calculate_dominance_info (CDI_POST_DOMINATORS);
5038 plus_negates = vNULL;
5039 }
5040
5041 /* Cleanup after the reassociation pass, and print stats if
5042 requested. */
5043
5044 static void
5045 fini_reassoc (void)
5046 {
5047 statistics_counter_event (cfun, "Linearized",
5048 reassociate_stats.linearized);
5049 statistics_counter_event (cfun, "Constants eliminated",
5050 reassociate_stats.constants_eliminated);
5051 statistics_counter_event (cfun, "Ops eliminated",
5052 reassociate_stats.ops_eliminated);
5053 statistics_counter_event (cfun, "Statements rewritten",
5054 reassociate_stats.rewritten);
5055 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5056 reassociate_stats.pows_encountered);
5057 statistics_counter_event (cfun, "Built-in powi calls created",
5058 reassociate_stats.pows_created);
5059
5060 delete operand_rank;
5061 free_alloc_pool (operand_entry_pool);
5062 free (bb_rank);
5063 plus_negates.release ();
5064 free_dominance_info (CDI_POST_DOMINATORS);
5065 loop_optimizer_finalize ();
5066 }
5067
5068 /* Gate and execute functions for Reassociation. */
5069
5070 static unsigned int
5071 execute_reassoc (void)
5072 {
5073 init_reassoc ();
5074
5075 do_reassoc ();
5076 repropagate_negates ();
5077 branch_fixup ();
5078
5079 fini_reassoc ();
5080 return 0;
5081 }
5082
5083 namespace {
5084
5085 const pass_data pass_data_reassoc =
5086 {
5087 GIMPLE_PASS, /* type */
5088 "reassoc", /* name */
5089 OPTGROUP_NONE, /* optinfo_flags */
5090 TV_TREE_REASSOC, /* tv_id */
5091 ( PROP_cfg | PROP_ssa ), /* properties_required */
5092 0, /* properties_provided */
5093 0, /* properties_destroyed */
5094 0, /* todo_flags_start */
5095 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5096 };
5097
5098 class pass_reassoc : public gimple_opt_pass
5099 {
5100 public:
5101 pass_reassoc (gcc::context *ctxt)
5102 : gimple_opt_pass (pass_data_reassoc, ctxt)
5103 {}
5104
5105 /* opt_pass methods: */
5106 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5107 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5108 virtual unsigned int execute (function *) { return execute_reassoc (); }
5109
5110 }; // class pass_reassoc
5111
5112 } // anon namespace
5113
5114 gimple_opt_pass *
5115 make_pass_reassoc (gcc::context *ctxt)
5116 {
5117 return new pass_reassoc (ctxt);
5118 }