[ARM] Add ACLE 2.0 predefined marco __ARM_FEATURE_IDIV
[gcc.git] / gcc / tree-ssa-reassoc.c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "hash-map.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
63 #include "builtins.h"
64 #include "gimplify.h"
65 #include "optabs.h"
66
67 /* This is a simple global reassociation pass. It is, in part, based
68 on the LLVM pass of the same name (They do some things more/less
69 than we do, in different orders, etc).
70
71 It consists of five steps:
72
73 1. Breaking up subtract operations into addition + negate, where
74 it would promote the reassociation of adds.
75
76 2. Left linearization of the expression trees, so that (A+B)+(C+D)
77 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
78 During linearization, we place the operands of the binary
79 expressions into a vector of operand_entry_t
80
81 3. Optimization of the operand lists, eliminating things like a +
82 -a, a & a, etc.
83
84 3a. Combine repeated factors with the same occurrence counts
85 into a __builtin_powi call that will later be optimized into
86 an optimal number of multiplies.
87
88 4. Rewrite the expression trees we linearized and optimized so
89 they are in proper rank order.
90
91 5. Repropagate negates, as nothing else will clean it up ATM.
92
93 A bit of theory on #4, since nobody seems to write anything down
94 about why it makes sense to do it the way they do it:
95
96 We could do this much nicer theoretically, but don't (for reasons
97 explained after how to do it theoretically nice :P).
98
99 In order to promote the most redundancy elimination, you want
100 binary expressions whose operands are the same rank (or
101 preferably, the same value) exposed to the redundancy eliminator,
102 for possible elimination.
103
104 So the way to do this if we really cared, is to build the new op
105 tree from the leaves to the roots, merging as you go, and putting the
106 new op on the end of the worklist, until you are left with one
107 thing on the worklist.
108
109 IE if you have to rewrite the following set of operands (listed with
110 rank in parentheses), with opcode PLUS_EXPR:
111
112 a (1), b (1), c (1), d (2), e (2)
113
114
115 We start with our merge worklist empty, and the ops list with all of
116 those on it.
117
118 You want to first merge all leaves of the same rank, as much as
119 possible.
120
121 So first build a binary op of
122
123 mergetmp = a + b, and put "mergetmp" on the merge worklist.
124
125 Because there is no three operand form of PLUS_EXPR, c is not going to
126 be exposed to redundancy elimination as a rank 1 operand.
127
128 So you might as well throw it on the merge worklist (you could also
129 consider it to now be a rank two operand, and merge it with d and e,
130 but in this case, you then have evicted e from a binary op. So at
131 least in this situation, you can't win.)
132
133 Then build a binary op of d + e
134 mergetmp2 = d + e
135
136 and put mergetmp2 on the merge worklist.
137
138 so merge worklist = {mergetmp, c, mergetmp2}
139
140 Continue building binary ops of these operations until you have only
141 one operation left on the worklist.
142
143 So we have
144
145 build binary op
146 mergetmp3 = mergetmp + c
147
148 worklist = {mergetmp2, mergetmp3}
149
150 mergetmp4 = mergetmp2 + mergetmp3
151
152 worklist = {mergetmp4}
153
154 because we have one operation left, we can now just set the original
155 statement equal to the result of that operation.
156
157 This will at least expose a + b and d + e to redundancy elimination
158 as binary operations.
159
160 For extra points, you can reuse the old statements to build the
161 mergetmps, since you shouldn't run out.
162
163 So why don't we do this?
164
165 Because it's expensive, and rarely will help. Most trees we are
166 reassociating have 3 or less ops. If they have 2 ops, they already
167 will be written into a nice single binary op. If you have 3 ops, a
168 single simple check suffices to tell you whether the first two are of the
169 same rank. If so, you know to order it
170
171 mergetmp = op1 + op2
172 newstmt = mergetmp + op3
173
174 instead of
175 mergetmp = op2 + op3
176 newstmt = mergetmp + op1
177
178 If all three are of the same rank, you can't expose them all in a
179 single binary operator anyway, so the above is *still* the best you
180 can do.
181
182 Thus, this is what we do. When we have three ops left, we check to see
183 what order to put them in, and call it a day. As a nod to vector sum
184 reduction, we check if any of the ops are really a phi node that is a
185 destructive update for the associating op, and keep the destructive
186 update together for vector sum reduction recognition. */
187
188
189 /* Statistics */
190 static struct
191 {
192 int linearized;
193 int constants_eliminated;
194 int ops_eliminated;
195 int rewritten;
196 int pows_encountered;
197 int pows_created;
198 } reassociate_stats;
199
200 /* Operator, rank pair. */
201 typedef struct operand_entry
202 {
203 unsigned int rank;
204 int id;
205 tree op;
206 unsigned int count;
207 } *operand_entry_t;
208
209 static alloc_pool operand_entry_pool;
210
211 /* This is used to assign a unique ID to each struct operand_entry
212 so that qsort results are identical on different hosts. */
213 static int next_operand_entry_id;
214
215 /* Starting rank number for a given basic block, so that we can rank
216 operations using unmovable instructions in that BB based on the bb
217 depth. */
218 static long *bb_rank;
219
220 /* Operand->rank hashtable. */
221 static hash_map<tree, long> *operand_rank;
222
223 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
224 all basic blocks the CFG should be adjusted - basic blocks
225 split right after that SSA_NAME's definition statement and before
226 the only use, which must be a bit ior. */
227 static vec<tree> reassoc_branch_fixups;
228
229 /* Forward decls. */
230 static long get_rank (tree);
231 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
232
233 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
234 possibly added by gsi_remove. */
235
236 bool
237 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
238 {
239 gimple stmt = gsi_stmt (*gsi);
240
241 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
242 return gsi_remove (gsi, true);
243
244 gimple_stmt_iterator prev = *gsi;
245 gsi_prev (&prev);
246 unsigned uid = gimple_uid (stmt);
247 basic_block bb = gimple_bb (stmt);
248 bool ret = gsi_remove (gsi, true);
249 if (!gsi_end_p (prev))
250 gsi_next (&prev);
251 else
252 prev = gsi_start_bb (bb);
253 gimple end_stmt = gsi_stmt (*gsi);
254 while ((stmt = gsi_stmt (prev)) != end_stmt)
255 {
256 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
257 gimple_set_uid (stmt, uid);
258 gsi_next (&prev);
259 }
260 return ret;
261 }
262
263 /* Bias amount for loop-carried phis. We want this to be larger than
264 the depth of any reassociation tree we can see, but not larger than
265 the rank difference between two blocks. */
266 #define PHI_LOOP_BIAS (1 << 15)
267
268 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
269 an innermost loop, and the phi has only a single use which is inside
270 the loop, then the rank is the block rank of the loop latch plus an
271 extra bias for the loop-carried dependence. This causes expressions
272 calculated into an accumulator variable to be independent for each
273 iteration of the loop. If STMT is some other phi, the rank is the
274 block rank of its containing block. */
275 static long
276 phi_rank (gimple stmt)
277 {
278 basic_block bb = gimple_bb (stmt);
279 struct loop *father = bb->loop_father;
280 tree res;
281 unsigned i;
282 use_operand_p use;
283 gimple use_stmt;
284
285 /* We only care about real loops (those with a latch). */
286 if (!father->latch)
287 return bb_rank[bb->index];
288
289 /* Interesting phis must be in headers of innermost loops. */
290 if (bb != father->header
291 || father->inner)
292 return bb_rank[bb->index];
293
294 /* Ignore virtual SSA_NAMEs. */
295 res = gimple_phi_result (stmt);
296 if (virtual_operand_p (res))
297 return bb_rank[bb->index];
298
299 /* The phi definition must have a single use, and that use must be
300 within the loop. Otherwise this isn't an accumulator pattern. */
301 if (!single_imm_use (res, &use, &use_stmt)
302 || gimple_bb (use_stmt)->loop_father != father)
303 return bb_rank[bb->index];
304
305 /* Look for phi arguments from within the loop. If found, bias this phi. */
306 for (i = 0; i < gimple_phi_num_args (stmt); i++)
307 {
308 tree arg = gimple_phi_arg_def (stmt, i);
309 if (TREE_CODE (arg) == SSA_NAME
310 && !SSA_NAME_IS_DEFAULT_DEF (arg))
311 {
312 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
313 if (gimple_bb (def_stmt)->loop_father == father)
314 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
315 }
316 }
317
318 /* Must be an uninteresting phi. */
319 return bb_rank[bb->index];
320 }
321
322 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
323 loop-carried dependence of an innermost loop, return TRUE; else
324 return FALSE. */
325 static bool
326 loop_carried_phi (tree exp)
327 {
328 gimple phi_stmt;
329 long block_rank;
330
331 if (TREE_CODE (exp) != SSA_NAME
332 || SSA_NAME_IS_DEFAULT_DEF (exp))
333 return false;
334
335 phi_stmt = SSA_NAME_DEF_STMT (exp);
336
337 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
338 return false;
339
340 /* Non-loop-carried phis have block rank. Loop-carried phis have
341 an additional bias added in. If this phi doesn't have block rank,
342 it's biased and should not be propagated. */
343 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
344
345 if (phi_rank (phi_stmt) != block_rank)
346 return true;
347
348 return false;
349 }
350
351 /* Return the maximum of RANK and the rank that should be propagated
352 from expression OP. For most operands, this is just the rank of OP.
353 For loop-carried phis, the value is zero to avoid undoing the bias
354 in favor of the phi. */
355 static long
356 propagate_rank (long rank, tree op)
357 {
358 long op_rank;
359
360 if (loop_carried_phi (op))
361 return rank;
362
363 op_rank = get_rank (op);
364
365 return MAX (rank, op_rank);
366 }
367
368 /* Look up the operand rank structure for expression E. */
369
370 static inline long
371 find_operand_rank (tree e)
372 {
373 long *slot = operand_rank->get (e);
374 return slot ? *slot : -1;
375 }
376
377 /* Insert {E,RANK} into the operand rank hashtable. */
378
379 static inline void
380 insert_operand_rank (tree e, long rank)
381 {
382 gcc_assert (rank > 0);
383 gcc_assert (!operand_rank->put (e, rank));
384 }
385
386 /* Given an expression E, return the rank of the expression. */
387
388 static long
389 get_rank (tree e)
390 {
391 /* Constants have rank 0. */
392 if (is_gimple_min_invariant (e))
393 return 0;
394
395 /* SSA_NAME's have the rank of the expression they are the result
396 of.
397 For globals and uninitialized values, the rank is 0.
398 For function arguments, use the pre-setup rank.
399 For PHI nodes, stores, asm statements, etc, we use the rank of
400 the BB.
401 For simple operations, the rank is the maximum rank of any of
402 its operands, or the bb_rank, whichever is less.
403 I make no claims that this is optimal, however, it gives good
404 results. */
405
406 /* We make an exception to the normal ranking system to break
407 dependences of accumulator variables in loops. Suppose we
408 have a simple one-block loop containing:
409
410 x_1 = phi(x_0, x_2)
411 b = a + x_1
412 c = b + d
413 x_2 = c + e
414
415 As shown, each iteration of the calculation into x is fully
416 dependent upon the iteration before it. We would prefer to
417 see this in the form:
418
419 x_1 = phi(x_0, x_2)
420 b = a + d
421 c = b + e
422 x_2 = c + x_1
423
424 If the loop is unrolled, the calculations of b and c from
425 different iterations can be interleaved.
426
427 To obtain this result during reassociation, we bias the rank
428 of the phi definition x_1 upward, when it is recognized as an
429 accumulator pattern. The artificial rank causes it to be
430 added last, providing the desired independence. */
431
432 if (TREE_CODE (e) == SSA_NAME)
433 {
434 gimple stmt;
435 long rank;
436 int i, n;
437 tree op;
438
439 if (SSA_NAME_IS_DEFAULT_DEF (e))
440 return find_operand_rank (e);
441
442 stmt = SSA_NAME_DEF_STMT (e);
443 if (gimple_code (stmt) == GIMPLE_PHI)
444 return phi_rank (stmt);
445
446 if (!is_gimple_assign (stmt)
447 || gimple_vdef (stmt))
448 return bb_rank[gimple_bb (stmt)->index];
449
450 /* If we already have a rank for this expression, use that. */
451 rank = find_operand_rank (e);
452 if (rank != -1)
453 return rank;
454
455 /* Otherwise, find the maximum rank for the operands. As an
456 exception, remove the bias from loop-carried phis when propagating
457 the rank so that dependent operations are not also biased. */
458 rank = 0;
459 if (gimple_assign_single_p (stmt))
460 {
461 tree rhs = gimple_assign_rhs1 (stmt);
462 n = TREE_OPERAND_LENGTH (rhs);
463 if (n == 0)
464 rank = propagate_rank (rank, rhs);
465 else
466 {
467 for (i = 0; i < n; i++)
468 {
469 op = TREE_OPERAND (rhs, i);
470
471 if (op != NULL_TREE)
472 rank = propagate_rank (rank, op);
473 }
474 }
475 }
476 else
477 {
478 n = gimple_num_ops (stmt);
479 for (i = 1; i < n; i++)
480 {
481 op = gimple_op (stmt, i);
482 gcc_assert (op);
483 rank = propagate_rank (rank, op);
484 }
485 }
486
487 if (dump_file && (dump_flags & TDF_DETAILS))
488 {
489 fprintf (dump_file, "Rank for ");
490 print_generic_expr (dump_file, e, 0);
491 fprintf (dump_file, " is %ld\n", (rank + 1));
492 }
493
494 /* Note the rank in the hashtable so we don't recompute it. */
495 insert_operand_rank (e, (rank + 1));
496 return (rank + 1);
497 }
498
499 /* Globals, etc, are rank 0 */
500 return 0;
501 }
502
503
504 /* We want integer ones to end up last no matter what, since they are
505 the ones we can do the most with. */
506 #define INTEGER_CONST_TYPE 1 << 3
507 #define FLOAT_CONST_TYPE 1 << 2
508 #define OTHER_CONST_TYPE 1 << 1
509
510 /* Classify an invariant tree into integer, float, or other, so that
511 we can sort them to be near other constants of the same type. */
512 static inline int
513 constant_type (tree t)
514 {
515 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
516 return INTEGER_CONST_TYPE;
517 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
518 return FLOAT_CONST_TYPE;
519 else
520 return OTHER_CONST_TYPE;
521 }
522
523 /* qsort comparison function to sort operand entries PA and PB by rank
524 so that the sorted array is ordered by rank in decreasing order. */
525 static int
526 sort_by_operand_rank (const void *pa, const void *pb)
527 {
528 const operand_entry_t oea = *(const operand_entry_t *)pa;
529 const operand_entry_t oeb = *(const operand_entry_t *)pb;
530
531 /* It's nicer for optimize_expression if constants that are likely
532 to fold when added/multiplied//whatever are put next to each
533 other. Since all constants have rank 0, order them by type. */
534 if (oeb->rank == 0 && oea->rank == 0)
535 {
536 if (constant_type (oeb->op) != constant_type (oea->op))
537 return constant_type (oeb->op) - constant_type (oea->op);
538 else
539 /* To make sorting result stable, we use unique IDs to determine
540 order. */
541 return oeb->id - oea->id;
542 }
543
544 /* Lastly, make sure the versions that are the same go next to each
545 other. */
546 if ((oeb->rank - oea->rank == 0)
547 && TREE_CODE (oea->op) == SSA_NAME
548 && TREE_CODE (oeb->op) == SSA_NAME)
549 {
550 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
551 versions of removed SSA_NAMEs, so if possible, prefer to sort
552 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
553 See PR60418. */
554 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
555 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
556 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
557 {
558 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
559 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
560 basic_block bba = gimple_bb (stmta);
561 basic_block bbb = gimple_bb (stmtb);
562 if (bbb != bba)
563 {
564 if (bb_rank[bbb->index] != bb_rank[bba->index])
565 return bb_rank[bbb->index] - bb_rank[bba->index];
566 }
567 else
568 {
569 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
570 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
571 if (da != db)
572 return da ? 1 : -1;
573 }
574 }
575
576 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
577 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
578 else
579 return oeb->id - oea->id;
580 }
581
582 if (oeb->rank != oea->rank)
583 return oeb->rank - oea->rank;
584 else
585 return oeb->id - oea->id;
586 }
587
588 /* Add an operand entry to *OPS for the tree operand OP. */
589
590 static void
591 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
592 {
593 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
594
595 oe->op = op;
596 oe->rank = get_rank (op);
597 oe->id = next_operand_entry_id++;
598 oe->count = 1;
599 ops->safe_push (oe);
600 }
601
602 /* Add an operand entry to *OPS for the tree operand OP with repeat
603 count REPEAT. */
604
605 static void
606 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
607 HOST_WIDE_INT repeat)
608 {
609 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
610
611 oe->op = op;
612 oe->rank = get_rank (op);
613 oe->id = next_operand_entry_id++;
614 oe->count = repeat;
615 ops->safe_push (oe);
616
617 reassociate_stats.pows_encountered++;
618 }
619
620 /* Return true if STMT is reassociable operation containing a binary
621 operation with tree code CODE, and is inside LOOP. */
622
623 static bool
624 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
625 {
626 basic_block bb = gimple_bb (stmt);
627
628 if (gimple_bb (stmt) == NULL)
629 return false;
630
631 if (!flow_bb_inside_loop_p (loop, bb))
632 return false;
633
634 if (is_gimple_assign (stmt)
635 && gimple_assign_rhs_code (stmt) == code
636 && has_single_use (gimple_assign_lhs (stmt)))
637 return true;
638
639 return false;
640 }
641
642
643 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
644 operand of the negate operation. Otherwise, return NULL. */
645
646 static tree
647 get_unary_op (tree name, enum tree_code opcode)
648 {
649 gimple stmt = SSA_NAME_DEF_STMT (name);
650
651 if (!is_gimple_assign (stmt))
652 return NULL_TREE;
653
654 if (gimple_assign_rhs_code (stmt) == opcode)
655 return gimple_assign_rhs1 (stmt);
656 return NULL_TREE;
657 }
658
659 /* If CURR and LAST are a pair of ops that OPCODE allows us to
660 eliminate through equivalences, do so, remove them from OPS, and
661 return true. Otherwise, return false. */
662
663 static bool
664 eliminate_duplicate_pair (enum tree_code opcode,
665 vec<operand_entry_t> *ops,
666 bool *all_done,
667 unsigned int i,
668 operand_entry_t curr,
669 operand_entry_t last)
670 {
671
672 /* If we have two of the same op, and the opcode is & |, min, or max,
673 we can eliminate one of them.
674 If we have two of the same op, and the opcode is ^, we can
675 eliminate both of them. */
676
677 if (last && last->op == curr->op)
678 {
679 switch (opcode)
680 {
681 case MAX_EXPR:
682 case MIN_EXPR:
683 case BIT_IOR_EXPR:
684 case BIT_AND_EXPR:
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 {
687 fprintf (dump_file, "Equivalence: ");
688 print_generic_expr (dump_file, curr->op, 0);
689 fprintf (dump_file, " [&|minmax] ");
690 print_generic_expr (dump_file, last->op, 0);
691 fprintf (dump_file, " -> ");
692 print_generic_stmt (dump_file, last->op, 0);
693 }
694
695 ops->ordered_remove (i);
696 reassociate_stats.ops_eliminated ++;
697
698 return true;
699
700 case BIT_XOR_EXPR:
701 if (dump_file && (dump_flags & TDF_DETAILS))
702 {
703 fprintf (dump_file, "Equivalence: ");
704 print_generic_expr (dump_file, curr->op, 0);
705 fprintf (dump_file, " ^ ");
706 print_generic_expr (dump_file, last->op, 0);
707 fprintf (dump_file, " -> nothing\n");
708 }
709
710 reassociate_stats.ops_eliminated += 2;
711
712 if (ops->length () == 2)
713 {
714 ops->create (0);
715 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
716 *all_done = true;
717 }
718 else
719 {
720 ops->ordered_remove (i-1);
721 ops->ordered_remove (i-1);
722 }
723
724 return true;
725
726 default:
727 break;
728 }
729 }
730 return false;
731 }
732
733 static vec<tree> plus_negates;
734
735 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
736 expression, look in OPS for a corresponding positive operation to cancel
737 it out. If we find one, remove the other from OPS, replace
738 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
739 return false. */
740
741 static bool
742 eliminate_plus_minus_pair (enum tree_code opcode,
743 vec<operand_entry_t> *ops,
744 unsigned int currindex,
745 operand_entry_t curr)
746 {
747 tree negateop;
748 tree notop;
749 unsigned int i;
750 operand_entry_t oe;
751
752 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
753 return false;
754
755 negateop = get_unary_op (curr->op, NEGATE_EXPR);
756 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
757 if (negateop == NULL_TREE && notop == NULL_TREE)
758 return false;
759
760 /* Any non-negated version will have a rank that is one less than
761 the current rank. So once we hit those ranks, if we don't find
762 one, we can stop. */
763
764 for (i = currindex + 1;
765 ops->iterate (i, &oe)
766 && oe->rank >= curr->rank - 1 ;
767 i++)
768 {
769 if (oe->op == negateop)
770 {
771
772 if (dump_file && (dump_flags & TDF_DETAILS))
773 {
774 fprintf (dump_file, "Equivalence: ");
775 print_generic_expr (dump_file, negateop, 0);
776 fprintf (dump_file, " + -");
777 print_generic_expr (dump_file, oe->op, 0);
778 fprintf (dump_file, " -> 0\n");
779 }
780
781 ops->ordered_remove (i);
782 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
783 ops->ordered_remove (currindex);
784 reassociate_stats.ops_eliminated ++;
785
786 return true;
787 }
788 else if (oe->op == notop)
789 {
790 tree op_type = TREE_TYPE (oe->op);
791
792 if (dump_file && (dump_flags & TDF_DETAILS))
793 {
794 fprintf (dump_file, "Equivalence: ");
795 print_generic_expr (dump_file, notop, 0);
796 fprintf (dump_file, " + ~");
797 print_generic_expr (dump_file, oe->op, 0);
798 fprintf (dump_file, " -> -1\n");
799 }
800
801 ops->ordered_remove (i);
802 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
803 ops->ordered_remove (currindex);
804 reassociate_stats.ops_eliminated ++;
805
806 return true;
807 }
808 }
809
810 /* CURR->OP is a negate expr in a plus expr: save it for later
811 inspection in repropagate_negates(). */
812 if (negateop != NULL_TREE)
813 plus_negates.safe_push (curr->op);
814
815 return false;
816 }
817
818 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
819 bitwise not expression, look in OPS for a corresponding operand to
820 cancel it out. If we find one, remove the other from OPS, replace
821 OPS[CURRINDEX] with 0, and return true. Otherwise, return
822 false. */
823
824 static bool
825 eliminate_not_pairs (enum tree_code opcode,
826 vec<operand_entry_t> *ops,
827 unsigned int currindex,
828 operand_entry_t curr)
829 {
830 tree notop;
831 unsigned int i;
832 operand_entry_t oe;
833
834 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
835 || TREE_CODE (curr->op) != SSA_NAME)
836 return false;
837
838 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
839 if (notop == NULL_TREE)
840 return false;
841
842 /* Any non-not version will have a rank that is one less than
843 the current rank. So once we hit those ranks, if we don't find
844 one, we can stop. */
845
846 for (i = currindex + 1;
847 ops->iterate (i, &oe)
848 && oe->rank >= curr->rank - 1;
849 i++)
850 {
851 if (oe->op == notop)
852 {
853 if (dump_file && (dump_flags & TDF_DETAILS))
854 {
855 fprintf (dump_file, "Equivalence: ");
856 print_generic_expr (dump_file, notop, 0);
857 if (opcode == BIT_AND_EXPR)
858 fprintf (dump_file, " & ~");
859 else if (opcode == BIT_IOR_EXPR)
860 fprintf (dump_file, " | ~");
861 print_generic_expr (dump_file, oe->op, 0);
862 if (opcode == BIT_AND_EXPR)
863 fprintf (dump_file, " -> 0\n");
864 else if (opcode == BIT_IOR_EXPR)
865 fprintf (dump_file, " -> -1\n");
866 }
867
868 if (opcode == BIT_AND_EXPR)
869 oe->op = build_zero_cst (TREE_TYPE (oe->op));
870 else if (opcode == BIT_IOR_EXPR)
871 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
872
873 reassociate_stats.ops_eliminated += ops->length () - 1;
874 ops->truncate (0);
875 ops->quick_push (oe);
876 return true;
877 }
878 }
879
880 return false;
881 }
882
883 /* Use constant value that may be present in OPS to try to eliminate
884 operands. Note that this function is only really used when we've
885 eliminated ops for other reasons, or merged constants. Across
886 single statements, fold already does all of this, plus more. There
887 is little point in duplicating logic, so I've only included the
888 identities that I could ever construct testcases to trigger. */
889
890 static void
891 eliminate_using_constants (enum tree_code opcode,
892 vec<operand_entry_t> *ops)
893 {
894 operand_entry_t oelast = ops->last ();
895 tree type = TREE_TYPE (oelast->op);
896
897 if (oelast->rank == 0
898 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
899 {
900 switch (opcode)
901 {
902 case BIT_AND_EXPR:
903 if (integer_zerop (oelast->op))
904 {
905 if (ops->length () != 1)
906 {
907 if (dump_file && (dump_flags & TDF_DETAILS))
908 fprintf (dump_file, "Found & 0, removing all other ops\n");
909
910 reassociate_stats.ops_eliminated += ops->length () - 1;
911
912 ops->truncate (0);
913 ops->quick_push (oelast);
914 return;
915 }
916 }
917 else if (integer_all_onesp (oelast->op))
918 {
919 if (ops->length () != 1)
920 {
921 if (dump_file && (dump_flags & TDF_DETAILS))
922 fprintf (dump_file, "Found & -1, removing\n");
923 ops->pop ();
924 reassociate_stats.ops_eliminated++;
925 }
926 }
927 break;
928 case BIT_IOR_EXPR:
929 if (integer_all_onesp (oelast->op))
930 {
931 if (ops->length () != 1)
932 {
933 if (dump_file && (dump_flags & TDF_DETAILS))
934 fprintf (dump_file, "Found | -1, removing all other ops\n");
935
936 reassociate_stats.ops_eliminated += ops->length () - 1;
937
938 ops->truncate (0);
939 ops->quick_push (oelast);
940 return;
941 }
942 }
943 else if (integer_zerop (oelast->op))
944 {
945 if (ops->length () != 1)
946 {
947 if (dump_file && (dump_flags & TDF_DETAILS))
948 fprintf (dump_file, "Found | 0, removing\n");
949 ops->pop ();
950 reassociate_stats.ops_eliminated++;
951 }
952 }
953 break;
954 case MULT_EXPR:
955 if (integer_zerop (oelast->op)
956 || (FLOAT_TYPE_P (type)
957 && !HONOR_NANS (TYPE_MODE (type))
958 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
959 && real_zerop (oelast->op)))
960 {
961 if (ops->length () != 1)
962 {
963 if (dump_file && (dump_flags & TDF_DETAILS))
964 fprintf (dump_file, "Found * 0, removing all other ops\n");
965
966 reassociate_stats.ops_eliminated += ops->length () - 1;
967 ops->truncate (1);
968 ops->quick_push (oelast);
969 return;
970 }
971 }
972 else if (integer_onep (oelast->op)
973 || (FLOAT_TYPE_P (type)
974 && !HONOR_SNANS (TYPE_MODE (type))
975 && real_onep (oelast->op)))
976 {
977 if (ops->length () != 1)
978 {
979 if (dump_file && (dump_flags & TDF_DETAILS))
980 fprintf (dump_file, "Found * 1, removing\n");
981 ops->pop ();
982 reassociate_stats.ops_eliminated++;
983 return;
984 }
985 }
986 break;
987 case BIT_XOR_EXPR:
988 case PLUS_EXPR:
989 case MINUS_EXPR:
990 if (integer_zerop (oelast->op)
991 || (FLOAT_TYPE_P (type)
992 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
993 && fold_real_zero_addition_p (type, oelast->op,
994 opcode == MINUS_EXPR)))
995 {
996 if (ops->length () != 1)
997 {
998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "Found [|^+] 0, removing\n");
1000 ops->pop ();
1001 reassociate_stats.ops_eliminated++;
1002 return;
1003 }
1004 }
1005 break;
1006 default:
1007 break;
1008 }
1009 }
1010 }
1011
1012
1013 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1014 bool, bool);
1015
1016 /* Structure for tracking and counting operands. */
1017 typedef struct oecount_s {
1018 int cnt;
1019 int id;
1020 enum tree_code oecode;
1021 tree op;
1022 } oecount;
1023
1024
1025 /* The heap for the oecount hashtable and the sorted list of operands. */
1026 static vec<oecount> cvec;
1027
1028
1029 /* Oecount hashtable helpers. */
1030
1031 struct oecount_hasher
1032 {
1033 typedef int value_type;
1034 typedef int compare_type;
1035 typedef int store_values_directly;
1036 static inline hashval_t hash (const value_type &);
1037 static inline bool equal (const value_type &, const compare_type &);
1038 static bool is_deleted (int &v) { return v == 1; }
1039 static void mark_deleted (int &e) { e = 1; }
1040 static bool is_empty (int &v) { return v == 0; }
1041 static void mark_empty (int &e) { e = 0; }
1042 static void remove (int &) {}
1043 };
1044
1045 /* Hash function for oecount. */
1046
1047 inline hashval_t
1048 oecount_hasher::hash (const value_type &p)
1049 {
1050 const oecount *c = &cvec[p - 42];
1051 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1052 }
1053
1054 /* Comparison function for oecount. */
1055
1056 inline bool
1057 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1058 {
1059 const oecount *c1 = &cvec[p1 - 42];
1060 const oecount *c2 = &cvec[p2 - 42];
1061 return (c1->oecode == c2->oecode
1062 && c1->op == c2->op);
1063 }
1064
1065 /* Comparison function for qsort sorting oecount elements by count. */
1066
1067 static int
1068 oecount_cmp (const void *p1, const void *p2)
1069 {
1070 const oecount *c1 = (const oecount *)p1;
1071 const oecount *c2 = (const oecount *)p2;
1072 if (c1->cnt != c2->cnt)
1073 return c1->cnt - c2->cnt;
1074 else
1075 /* If counts are identical, use unique IDs to stabilize qsort. */
1076 return c1->id - c2->id;
1077 }
1078
1079 /* Return TRUE iff STMT represents a builtin call that raises OP
1080 to some exponent. */
1081
1082 static bool
1083 stmt_is_power_of_op (gimple stmt, tree op)
1084 {
1085 tree fndecl;
1086
1087 if (!is_gimple_call (stmt))
1088 return false;
1089
1090 fndecl = gimple_call_fndecl (stmt);
1091
1092 if (!fndecl
1093 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1094 return false;
1095
1096 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1097 {
1098 CASE_FLT_FN (BUILT_IN_POW):
1099 CASE_FLT_FN (BUILT_IN_POWI):
1100 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1101
1102 default:
1103 return false;
1104 }
1105 }
1106
1107 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1108 in place and return the result. Assumes that stmt_is_power_of_op
1109 was previously called for STMT and returned TRUE. */
1110
1111 static HOST_WIDE_INT
1112 decrement_power (gimple stmt)
1113 {
1114 REAL_VALUE_TYPE c, cint;
1115 HOST_WIDE_INT power;
1116 tree arg1;
1117
1118 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1119 {
1120 CASE_FLT_FN (BUILT_IN_POW):
1121 arg1 = gimple_call_arg (stmt, 1);
1122 c = TREE_REAL_CST (arg1);
1123 power = real_to_integer (&c) - 1;
1124 real_from_integer (&cint, VOIDmode, power, SIGNED);
1125 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1126 return power;
1127
1128 CASE_FLT_FN (BUILT_IN_POWI):
1129 arg1 = gimple_call_arg (stmt, 1);
1130 power = TREE_INT_CST_LOW (arg1) - 1;
1131 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1132 return power;
1133
1134 default:
1135 gcc_unreachable ();
1136 }
1137 }
1138
1139 /* Find the single immediate use of STMT's LHS, and replace it
1140 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1141 replace *DEF with OP as well. */
1142
1143 static void
1144 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1145 {
1146 tree lhs;
1147 gimple use_stmt;
1148 use_operand_p use;
1149 gimple_stmt_iterator gsi;
1150
1151 if (is_gimple_call (stmt))
1152 lhs = gimple_call_lhs (stmt);
1153 else
1154 lhs = gimple_assign_lhs (stmt);
1155
1156 gcc_assert (has_single_use (lhs));
1157 single_imm_use (lhs, &use, &use_stmt);
1158 if (lhs == *def)
1159 *def = op;
1160 SET_USE (use, op);
1161 if (TREE_CODE (op) != SSA_NAME)
1162 update_stmt (use_stmt);
1163 gsi = gsi_for_stmt (stmt);
1164 unlink_stmt_vdef (stmt);
1165 reassoc_remove_stmt (&gsi);
1166 release_defs (stmt);
1167 }
1168
1169 /* Walks the linear chain with result *DEF searching for an operation
1170 with operand OP and code OPCODE removing that from the chain. *DEF
1171 is updated if there is only one operand but no operation left. */
1172
1173 static void
1174 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1175 {
1176 gimple stmt = SSA_NAME_DEF_STMT (*def);
1177
1178 do
1179 {
1180 tree name;
1181
1182 if (opcode == MULT_EXPR
1183 && stmt_is_power_of_op (stmt, op))
1184 {
1185 if (decrement_power (stmt) == 1)
1186 propagate_op_to_single_use (op, stmt, def);
1187 return;
1188 }
1189
1190 name = gimple_assign_rhs1 (stmt);
1191
1192 /* If this is the operation we look for and one of the operands
1193 is ours simply propagate the other operand into the stmts
1194 single use. */
1195 if (gimple_assign_rhs_code (stmt) == opcode
1196 && (name == op
1197 || gimple_assign_rhs2 (stmt) == op))
1198 {
1199 if (name == op)
1200 name = gimple_assign_rhs2 (stmt);
1201 propagate_op_to_single_use (name, stmt, def);
1202 return;
1203 }
1204
1205 /* We might have a multiply of two __builtin_pow* calls, and
1206 the operand might be hiding in the rightmost one. */
1207 if (opcode == MULT_EXPR
1208 && gimple_assign_rhs_code (stmt) == opcode
1209 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1210 && has_single_use (gimple_assign_rhs2 (stmt)))
1211 {
1212 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1213 if (stmt_is_power_of_op (stmt2, op))
1214 {
1215 if (decrement_power (stmt2) == 1)
1216 propagate_op_to_single_use (op, stmt2, def);
1217 return;
1218 }
1219 }
1220
1221 /* Continue walking the chain. */
1222 gcc_assert (name != op
1223 && TREE_CODE (name) == SSA_NAME);
1224 stmt = SSA_NAME_DEF_STMT (name);
1225 }
1226 while (1);
1227 }
1228
1229 /* Returns true if statement S1 dominates statement S2. Like
1230 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1231
1232 static bool
1233 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1234 {
1235 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1236
1237 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1238 SSA_NAME. Assume it lives at the beginning of function and
1239 thus dominates everything. */
1240 if (!bb1 || s1 == s2)
1241 return true;
1242
1243 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1244 if (!bb2)
1245 return false;
1246
1247 if (bb1 == bb2)
1248 {
1249 /* PHIs in the same basic block are assumed to be
1250 executed all in parallel, if only one stmt is a PHI,
1251 it dominates the other stmt in the same basic block. */
1252 if (gimple_code (s1) == GIMPLE_PHI)
1253 return true;
1254
1255 if (gimple_code (s2) == GIMPLE_PHI)
1256 return false;
1257
1258 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1259
1260 if (gimple_uid (s1) < gimple_uid (s2))
1261 return true;
1262
1263 if (gimple_uid (s1) > gimple_uid (s2))
1264 return false;
1265
1266 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1267 unsigned int uid = gimple_uid (s1);
1268 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1269 {
1270 gimple s = gsi_stmt (gsi);
1271 if (gimple_uid (s) != uid)
1272 break;
1273 if (s == s2)
1274 return true;
1275 }
1276
1277 return false;
1278 }
1279
1280 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1281 }
1282
1283 /* Insert STMT after INSERT_POINT. */
1284
1285 static void
1286 insert_stmt_after (gimple stmt, gimple insert_point)
1287 {
1288 gimple_stmt_iterator gsi;
1289 basic_block bb;
1290
1291 if (gimple_code (insert_point) == GIMPLE_PHI)
1292 bb = gimple_bb (insert_point);
1293 else if (!stmt_ends_bb_p (insert_point))
1294 {
1295 gsi = gsi_for_stmt (insert_point);
1296 gimple_set_uid (stmt, gimple_uid (insert_point));
1297 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1298 return;
1299 }
1300 else
1301 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1302 thus if it must end a basic block, it should be a call that can
1303 throw, or some assignment that can throw. If it throws, the LHS
1304 of it will not be initialized though, so only valid places using
1305 the SSA_NAME should be dominated by the fallthru edge. */
1306 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1307 gsi = gsi_after_labels (bb);
1308 if (gsi_end_p (gsi))
1309 {
1310 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1311 gimple_set_uid (stmt,
1312 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1313 }
1314 else
1315 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1316 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1317 }
1318
1319 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1320 the result. Places the statement after the definition of either
1321 OP1 or OP2. Returns the new statement. */
1322
1323 static gimple
1324 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1325 {
1326 gimple op1def = NULL, op2def = NULL;
1327 gimple_stmt_iterator gsi;
1328 tree op;
1329 gimple sum;
1330
1331 /* Create the addition statement. */
1332 op = make_ssa_name (type, NULL);
1333 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1334
1335 /* Find an insertion place and insert. */
1336 if (TREE_CODE (op1) == SSA_NAME)
1337 op1def = SSA_NAME_DEF_STMT (op1);
1338 if (TREE_CODE (op2) == SSA_NAME)
1339 op2def = SSA_NAME_DEF_STMT (op2);
1340 if ((!op1def || gimple_nop_p (op1def))
1341 && (!op2def || gimple_nop_p (op2def)))
1342 {
1343 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1344 if (gsi_end_p (gsi))
1345 {
1346 gimple_stmt_iterator gsi2
1347 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1348 gimple_set_uid (sum,
1349 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1350 }
1351 else
1352 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1353 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1354 }
1355 else
1356 {
1357 gimple insert_point;
1358 if ((!op1def || gimple_nop_p (op1def))
1359 || (op2def && !gimple_nop_p (op2def)
1360 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1361 insert_point = op2def;
1362 else
1363 insert_point = op1def;
1364 insert_stmt_after (sum, insert_point);
1365 }
1366 update_stmt (sum);
1367
1368 return sum;
1369 }
1370
1371 /* Perform un-distribution of divisions and multiplications.
1372 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1373 to (A + B) / X for real X.
1374
1375 The algorithm is organized as follows.
1376
1377 - First we walk the addition chain *OPS looking for summands that
1378 are defined by a multiplication or a real division. This results
1379 in the candidates bitmap with relevant indices into *OPS.
1380
1381 - Second we build the chains of multiplications or divisions for
1382 these candidates, counting the number of occurrences of (operand, code)
1383 pairs in all of the candidates chains.
1384
1385 - Third we sort the (operand, code) pairs by number of occurrence and
1386 process them starting with the pair with the most uses.
1387
1388 * For each such pair we walk the candidates again to build a
1389 second candidate bitmap noting all multiplication/division chains
1390 that have at least one occurrence of (operand, code).
1391
1392 * We build an alternate addition chain only covering these
1393 candidates with one (operand, code) operation removed from their
1394 multiplication/division chain.
1395
1396 * The first candidate gets replaced by the alternate addition chain
1397 multiplied/divided by the operand.
1398
1399 * All candidate chains get disabled for further processing and
1400 processing of (operand, code) pairs continues.
1401
1402 The alternate addition chains built are re-processed by the main
1403 reassociation algorithm which allows optimizing a * x * y + b * y * x
1404 to (a + b ) * x * y in one invocation of the reassociation pass. */
1405
1406 static bool
1407 undistribute_ops_list (enum tree_code opcode,
1408 vec<operand_entry_t> *ops, struct loop *loop)
1409 {
1410 unsigned int length = ops->length ();
1411 operand_entry_t oe1;
1412 unsigned i, j;
1413 sbitmap candidates, candidates2;
1414 unsigned nr_candidates, nr_candidates2;
1415 sbitmap_iterator sbi0;
1416 vec<operand_entry_t> *subops;
1417 bool changed = false;
1418 int next_oecount_id = 0;
1419
1420 if (length <= 1
1421 || opcode != PLUS_EXPR)
1422 return false;
1423
1424 /* Build a list of candidates to process. */
1425 candidates = sbitmap_alloc (length);
1426 bitmap_clear (candidates);
1427 nr_candidates = 0;
1428 FOR_EACH_VEC_ELT (*ops, i, oe1)
1429 {
1430 enum tree_code dcode;
1431 gimple oe1def;
1432
1433 if (TREE_CODE (oe1->op) != SSA_NAME)
1434 continue;
1435 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1436 if (!is_gimple_assign (oe1def))
1437 continue;
1438 dcode = gimple_assign_rhs_code (oe1def);
1439 if ((dcode != MULT_EXPR
1440 && dcode != RDIV_EXPR)
1441 || !is_reassociable_op (oe1def, dcode, loop))
1442 continue;
1443
1444 bitmap_set_bit (candidates, i);
1445 nr_candidates++;
1446 }
1447
1448 if (nr_candidates < 2)
1449 {
1450 sbitmap_free (candidates);
1451 return false;
1452 }
1453
1454 if (dump_file && (dump_flags & TDF_DETAILS))
1455 {
1456 fprintf (dump_file, "searching for un-distribute opportunities ");
1457 print_generic_expr (dump_file,
1458 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1459 fprintf (dump_file, " %d\n", nr_candidates);
1460 }
1461
1462 /* Build linearized sub-operand lists and the counting table. */
1463 cvec.create (0);
1464
1465 hash_table<oecount_hasher> ctable (15);
1466
1467 /* ??? Macro arguments cannot have multi-argument template types in
1468 them. This typedef is needed to workaround that limitation. */
1469 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1470 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1471 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1472 {
1473 gimple oedef;
1474 enum tree_code oecode;
1475 unsigned j;
1476
1477 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1478 oecode = gimple_assign_rhs_code (oedef);
1479 linearize_expr_tree (&subops[i], oedef,
1480 associative_tree_code (oecode), false);
1481
1482 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1483 {
1484 oecount c;
1485 int *slot;
1486 int idx;
1487 c.oecode = oecode;
1488 c.cnt = 1;
1489 c.id = next_oecount_id++;
1490 c.op = oe1->op;
1491 cvec.safe_push (c);
1492 idx = cvec.length () + 41;
1493 slot = ctable.find_slot (idx, INSERT);
1494 if (!*slot)
1495 {
1496 *slot = idx;
1497 }
1498 else
1499 {
1500 cvec.pop ();
1501 cvec[*slot - 42].cnt++;
1502 }
1503 }
1504 }
1505
1506 /* Sort the counting table. */
1507 cvec.qsort (oecount_cmp);
1508
1509 if (dump_file && (dump_flags & TDF_DETAILS))
1510 {
1511 oecount *c;
1512 fprintf (dump_file, "Candidates:\n");
1513 FOR_EACH_VEC_ELT (cvec, j, c)
1514 {
1515 fprintf (dump_file, " %u %s: ", c->cnt,
1516 c->oecode == MULT_EXPR
1517 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1518 print_generic_expr (dump_file, c->op, 0);
1519 fprintf (dump_file, "\n");
1520 }
1521 }
1522
1523 /* Process the (operand, code) pairs in order of most occurrence. */
1524 candidates2 = sbitmap_alloc (length);
1525 while (!cvec.is_empty ())
1526 {
1527 oecount *c = &cvec.last ();
1528 if (c->cnt < 2)
1529 break;
1530
1531 /* Now collect the operands in the outer chain that contain
1532 the common operand in their inner chain. */
1533 bitmap_clear (candidates2);
1534 nr_candidates2 = 0;
1535 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1536 {
1537 gimple oedef;
1538 enum tree_code oecode;
1539 unsigned j;
1540 tree op = (*ops)[i]->op;
1541
1542 /* If we undistributed in this chain already this may be
1543 a constant. */
1544 if (TREE_CODE (op) != SSA_NAME)
1545 continue;
1546
1547 oedef = SSA_NAME_DEF_STMT (op);
1548 oecode = gimple_assign_rhs_code (oedef);
1549 if (oecode != c->oecode)
1550 continue;
1551
1552 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1553 {
1554 if (oe1->op == c->op)
1555 {
1556 bitmap_set_bit (candidates2, i);
1557 ++nr_candidates2;
1558 break;
1559 }
1560 }
1561 }
1562
1563 if (nr_candidates2 >= 2)
1564 {
1565 operand_entry_t oe1, oe2;
1566 gimple prod;
1567 int first = bitmap_first_set_bit (candidates2);
1568
1569 /* Build the new addition chain. */
1570 oe1 = (*ops)[first];
1571 if (dump_file && (dump_flags & TDF_DETAILS))
1572 {
1573 fprintf (dump_file, "Building (");
1574 print_generic_expr (dump_file, oe1->op, 0);
1575 }
1576 zero_one_operation (&oe1->op, c->oecode, c->op);
1577 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1578 {
1579 gimple sum;
1580 oe2 = (*ops)[i];
1581 if (dump_file && (dump_flags & TDF_DETAILS))
1582 {
1583 fprintf (dump_file, " + ");
1584 print_generic_expr (dump_file, oe2->op, 0);
1585 }
1586 zero_one_operation (&oe2->op, c->oecode, c->op);
1587 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1588 oe1->op, oe2->op, opcode);
1589 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1590 oe2->rank = 0;
1591 oe1->op = gimple_get_lhs (sum);
1592 }
1593
1594 /* Apply the multiplication/division. */
1595 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1596 oe1->op, c->op, c->oecode);
1597 if (dump_file && (dump_flags & TDF_DETAILS))
1598 {
1599 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1600 print_generic_expr (dump_file, c->op, 0);
1601 fprintf (dump_file, "\n");
1602 }
1603
1604 /* Record it in the addition chain and disable further
1605 undistribution with this op. */
1606 oe1->op = gimple_assign_lhs (prod);
1607 oe1->rank = get_rank (oe1->op);
1608 subops[first].release ();
1609
1610 changed = true;
1611 }
1612
1613 cvec.pop ();
1614 }
1615
1616 for (i = 0; i < ops->length (); ++i)
1617 subops[i].release ();
1618 free (subops);
1619 cvec.release ();
1620 sbitmap_free (candidates);
1621 sbitmap_free (candidates2);
1622
1623 return changed;
1624 }
1625
1626 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1627 expression, examine the other OPS to see if any of them are comparisons
1628 of the same values, which we may be able to combine or eliminate.
1629 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1630
1631 static bool
1632 eliminate_redundant_comparison (enum tree_code opcode,
1633 vec<operand_entry_t> *ops,
1634 unsigned int currindex,
1635 operand_entry_t curr)
1636 {
1637 tree op1, op2;
1638 enum tree_code lcode, rcode;
1639 gimple def1, def2;
1640 int i;
1641 operand_entry_t oe;
1642
1643 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1644 return false;
1645
1646 /* Check that CURR is a comparison. */
1647 if (TREE_CODE (curr->op) != SSA_NAME)
1648 return false;
1649 def1 = SSA_NAME_DEF_STMT (curr->op);
1650 if (!is_gimple_assign (def1))
1651 return false;
1652 lcode = gimple_assign_rhs_code (def1);
1653 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1654 return false;
1655 op1 = gimple_assign_rhs1 (def1);
1656 op2 = gimple_assign_rhs2 (def1);
1657
1658 /* Now look for a similar comparison in the remaining OPS. */
1659 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1660 {
1661 tree t;
1662
1663 if (TREE_CODE (oe->op) != SSA_NAME)
1664 continue;
1665 def2 = SSA_NAME_DEF_STMT (oe->op);
1666 if (!is_gimple_assign (def2))
1667 continue;
1668 rcode = gimple_assign_rhs_code (def2);
1669 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1670 continue;
1671
1672 /* If we got here, we have a match. See if we can combine the
1673 two comparisons. */
1674 if (opcode == BIT_IOR_EXPR)
1675 t = maybe_fold_or_comparisons (lcode, op1, op2,
1676 rcode, gimple_assign_rhs1 (def2),
1677 gimple_assign_rhs2 (def2));
1678 else
1679 t = maybe_fold_and_comparisons (lcode, op1, op2,
1680 rcode, gimple_assign_rhs1 (def2),
1681 gimple_assign_rhs2 (def2));
1682 if (!t)
1683 continue;
1684
1685 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1686 always give us a boolean_type_node value back. If the original
1687 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1688 we need to convert. */
1689 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1690 t = fold_convert (TREE_TYPE (curr->op), t);
1691
1692 if (TREE_CODE (t) != INTEGER_CST
1693 && !operand_equal_p (t, curr->op, 0))
1694 {
1695 enum tree_code subcode;
1696 tree newop1, newop2;
1697 if (!COMPARISON_CLASS_P (t))
1698 continue;
1699 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1700 STRIP_USELESS_TYPE_CONVERSION (newop1);
1701 STRIP_USELESS_TYPE_CONVERSION (newop2);
1702 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1703 continue;
1704 }
1705
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1707 {
1708 fprintf (dump_file, "Equivalence: ");
1709 print_generic_expr (dump_file, curr->op, 0);
1710 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1711 print_generic_expr (dump_file, oe->op, 0);
1712 fprintf (dump_file, " -> ");
1713 print_generic_expr (dump_file, t, 0);
1714 fprintf (dump_file, "\n");
1715 }
1716
1717 /* Now we can delete oe, as it has been subsumed by the new combined
1718 expression t. */
1719 ops->ordered_remove (i);
1720 reassociate_stats.ops_eliminated ++;
1721
1722 /* If t is the same as curr->op, we're done. Otherwise we must
1723 replace curr->op with t. Special case is if we got a constant
1724 back, in which case we add it to the end instead of in place of
1725 the current entry. */
1726 if (TREE_CODE (t) == INTEGER_CST)
1727 {
1728 ops->ordered_remove (currindex);
1729 add_to_ops_vec (ops, t);
1730 }
1731 else if (!operand_equal_p (t, curr->op, 0))
1732 {
1733 gimple sum;
1734 enum tree_code subcode;
1735 tree newop1;
1736 tree newop2;
1737 gcc_assert (COMPARISON_CLASS_P (t));
1738 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1739 STRIP_USELESS_TYPE_CONVERSION (newop1);
1740 STRIP_USELESS_TYPE_CONVERSION (newop2);
1741 gcc_checking_assert (is_gimple_val (newop1)
1742 && is_gimple_val (newop2));
1743 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1744 curr->op = gimple_get_lhs (sum);
1745 }
1746 return true;
1747 }
1748
1749 return false;
1750 }
1751
1752 /* Perform various identities and other optimizations on the list of
1753 operand entries, stored in OPS. The tree code for the binary
1754 operation between all the operands is OPCODE. */
1755
1756 static void
1757 optimize_ops_list (enum tree_code opcode,
1758 vec<operand_entry_t> *ops)
1759 {
1760 unsigned int length = ops->length ();
1761 unsigned int i;
1762 operand_entry_t oe;
1763 operand_entry_t oelast = NULL;
1764 bool iterate = false;
1765
1766 if (length == 1)
1767 return;
1768
1769 oelast = ops->last ();
1770
1771 /* If the last two are constants, pop the constants off, merge them
1772 and try the next two. */
1773 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1774 {
1775 operand_entry_t oelm1 = (*ops)[length - 2];
1776
1777 if (oelm1->rank == 0
1778 && is_gimple_min_invariant (oelm1->op)
1779 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1780 TREE_TYPE (oelast->op)))
1781 {
1782 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1783 oelm1->op, oelast->op);
1784
1785 if (folded && is_gimple_min_invariant (folded))
1786 {
1787 if (dump_file && (dump_flags & TDF_DETAILS))
1788 fprintf (dump_file, "Merging constants\n");
1789
1790 ops->pop ();
1791 ops->pop ();
1792
1793 add_to_ops_vec (ops, folded);
1794 reassociate_stats.constants_eliminated++;
1795
1796 optimize_ops_list (opcode, ops);
1797 return;
1798 }
1799 }
1800 }
1801
1802 eliminate_using_constants (opcode, ops);
1803 oelast = NULL;
1804
1805 for (i = 0; ops->iterate (i, &oe);)
1806 {
1807 bool done = false;
1808
1809 if (eliminate_not_pairs (opcode, ops, i, oe))
1810 return;
1811 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1812 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1813 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1814 {
1815 if (done)
1816 return;
1817 iterate = true;
1818 oelast = NULL;
1819 continue;
1820 }
1821 oelast = oe;
1822 i++;
1823 }
1824
1825 length = ops->length ();
1826 oelast = ops->last ();
1827
1828 if (iterate)
1829 optimize_ops_list (opcode, ops);
1830 }
1831
1832 /* The following functions are subroutines to optimize_range_tests and allow
1833 it to try to change a logical combination of comparisons into a range
1834 test.
1835
1836 For example, both
1837 X == 2 || X == 5 || X == 3 || X == 4
1838 and
1839 X >= 2 && X <= 5
1840 are converted to
1841 (unsigned) (X - 2) <= 3
1842
1843 For more information see comments above fold_test_range in fold-const.c,
1844 this implementation is for GIMPLE. */
1845
1846 struct range_entry
1847 {
1848 tree exp;
1849 tree low;
1850 tree high;
1851 bool in_p;
1852 bool strict_overflow_p;
1853 unsigned int idx, next;
1854 };
1855
1856 /* This is similar to make_range in fold-const.c, but on top of
1857 GIMPLE instead of trees. If EXP is non-NULL, it should be
1858 an SSA_NAME and STMT argument is ignored, otherwise STMT
1859 argument should be a GIMPLE_COND. */
1860
1861 static void
1862 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1863 {
1864 int in_p;
1865 tree low, high;
1866 bool is_bool, strict_overflow_p;
1867
1868 r->exp = NULL_TREE;
1869 r->in_p = false;
1870 r->strict_overflow_p = false;
1871 r->low = NULL_TREE;
1872 r->high = NULL_TREE;
1873 if (exp != NULL_TREE
1874 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1875 return;
1876
1877 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1878 and see if we can refine the range. Some of the cases below may not
1879 happen, but it doesn't seem worth worrying about this. We "continue"
1880 the outer loop when we've changed something; otherwise we "break"
1881 the switch, which will "break" the while. */
1882 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1883 high = low;
1884 in_p = 0;
1885 strict_overflow_p = false;
1886 is_bool = false;
1887 if (exp == NULL_TREE)
1888 is_bool = true;
1889 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1890 {
1891 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1892 is_bool = true;
1893 else
1894 return;
1895 }
1896 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1897 is_bool = true;
1898
1899 while (1)
1900 {
1901 enum tree_code code;
1902 tree arg0, arg1, exp_type;
1903 tree nexp;
1904 location_t loc;
1905
1906 if (exp != NULL_TREE)
1907 {
1908 if (TREE_CODE (exp) != SSA_NAME
1909 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1910 break;
1911
1912 stmt = SSA_NAME_DEF_STMT (exp);
1913 if (!is_gimple_assign (stmt))
1914 break;
1915
1916 code = gimple_assign_rhs_code (stmt);
1917 arg0 = gimple_assign_rhs1 (stmt);
1918 arg1 = gimple_assign_rhs2 (stmt);
1919 exp_type = TREE_TYPE (exp);
1920 }
1921 else
1922 {
1923 code = gimple_cond_code (stmt);
1924 arg0 = gimple_cond_lhs (stmt);
1925 arg1 = gimple_cond_rhs (stmt);
1926 exp_type = boolean_type_node;
1927 }
1928
1929 if (TREE_CODE (arg0) != SSA_NAME)
1930 break;
1931 loc = gimple_location (stmt);
1932 switch (code)
1933 {
1934 case BIT_NOT_EXPR:
1935 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1936 /* Ensure the range is either +[-,0], +[0,0],
1937 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1938 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1939 or similar expression of unconditional true or
1940 false, it should not be negated. */
1941 && ((high && integer_zerop (high))
1942 || (low && integer_onep (low))))
1943 {
1944 in_p = !in_p;
1945 exp = arg0;
1946 continue;
1947 }
1948 break;
1949 case SSA_NAME:
1950 exp = arg0;
1951 continue;
1952 CASE_CONVERT:
1953 if (is_bool)
1954 goto do_default;
1955 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1956 {
1957 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1958 is_bool = true;
1959 else
1960 return;
1961 }
1962 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1963 is_bool = true;
1964 goto do_default;
1965 case EQ_EXPR:
1966 case NE_EXPR:
1967 case LT_EXPR:
1968 case LE_EXPR:
1969 case GE_EXPR:
1970 case GT_EXPR:
1971 is_bool = true;
1972 /* FALLTHRU */
1973 default:
1974 if (!is_bool)
1975 return;
1976 do_default:
1977 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1978 &low, &high, &in_p,
1979 &strict_overflow_p);
1980 if (nexp != NULL_TREE)
1981 {
1982 exp = nexp;
1983 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1984 continue;
1985 }
1986 break;
1987 }
1988 break;
1989 }
1990 if (is_bool)
1991 {
1992 r->exp = exp;
1993 r->in_p = in_p;
1994 r->low = low;
1995 r->high = high;
1996 r->strict_overflow_p = strict_overflow_p;
1997 }
1998 }
1999
2000 /* Comparison function for qsort. Sort entries
2001 without SSA_NAME exp first, then with SSA_NAMEs sorted
2002 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2003 by increasing ->low and if ->low is the same, by increasing
2004 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2005 maximum. */
2006
2007 static int
2008 range_entry_cmp (const void *a, const void *b)
2009 {
2010 const struct range_entry *p = (const struct range_entry *) a;
2011 const struct range_entry *q = (const struct range_entry *) b;
2012
2013 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2014 {
2015 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2016 {
2017 /* Group range_entries for the same SSA_NAME together. */
2018 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2019 return -1;
2020 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2021 return 1;
2022 /* If ->low is different, NULL low goes first, then by
2023 ascending low. */
2024 if (p->low != NULL_TREE)
2025 {
2026 if (q->low != NULL_TREE)
2027 {
2028 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2029 p->low, q->low);
2030 if (tem && integer_onep (tem))
2031 return -1;
2032 tem = fold_binary (GT_EXPR, boolean_type_node,
2033 p->low, q->low);
2034 if (tem && integer_onep (tem))
2035 return 1;
2036 }
2037 else
2038 return 1;
2039 }
2040 else if (q->low != NULL_TREE)
2041 return -1;
2042 /* If ->high is different, NULL high goes last, before that by
2043 ascending high. */
2044 if (p->high != NULL_TREE)
2045 {
2046 if (q->high != NULL_TREE)
2047 {
2048 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2049 p->high, q->high);
2050 if (tem && integer_onep (tem))
2051 return -1;
2052 tem = fold_binary (GT_EXPR, boolean_type_node,
2053 p->high, q->high);
2054 if (tem && integer_onep (tem))
2055 return 1;
2056 }
2057 else
2058 return -1;
2059 }
2060 else if (p->high != NULL_TREE)
2061 return 1;
2062 /* If both ranges are the same, sort below by ascending idx. */
2063 }
2064 else
2065 return 1;
2066 }
2067 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2068 return -1;
2069
2070 if (p->idx < q->idx)
2071 return -1;
2072 else
2073 {
2074 gcc_checking_assert (p->idx > q->idx);
2075 return 1;
2076 }
2077 }
2078
2079 /* Helper routine of optimize_range_test.
2080 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2081 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2082 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2083 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2084 an array of COUNT pointers to other ranges. Return
2085 true if the range merge has been successful.
2086 If OPCODE is ERROR_MARK, this is called from within
2087 maybe_optimize_range_tests and is performing inter-bb range optimization.
2088 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2089 oe->rank. */
2090
2091 static bool
2092 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2093 struct range_entry **otherrangep,
2094 unsigned int count, enum tree_code opcode,
2095 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2096 bool in_p, tree low, tree high, bool strict_overflow_p)
2097 {
2098 operand_entry_t oe = (*ops)[range->idx];
2099 tree op = oe->op;
2100 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2101 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2102 location_t loc = gimple_location (stmt);
2103 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2104 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2105 in_p, low, high);
2106 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2107 gimple_stmt_iterator gsi;
2108 unsigned int i;
2109
2110 if (tem == NULL_TREE)
2111 return false;
2112
2113 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2114 warning_at (loc, OPT_Wstrict_overflow,
2115 "assuming signed overflow does not occur "
2116 "when simplifying range test");
2117
2118 if (dump_file && (dump_flags & TDF_DETAILS))
2119 {
2120 struct range_entry *r;
2121 fprintf (dump_file, "Optimizing range tests ");
2122 print_generic_expr (dump_file, range->exp, 0);
2123 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2124 print_generic_expr (dump_file, range->low, 0);
2125 fprintf (dump_file, ", ");
2126 print_generic_expr (dump_file, range->high, 0);
2127 fprintf (dump_file, "]");
2128 for (i = 0; i < count; i++)
2129 {
2130 if (otherrange)
2131 r = otherrange + i;
2132 else
2133 r = otherrangep[i];
2134 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2135 print_generic_expr (dump_file, r->low, 0);
2136 fprintf (dump_file, ", ");
2137 print_generic_expr (dump_file, r->high, 0);
2138 fprintf (dump_file, "]");
2139 }
2140 fprintf (dump_file, "\n into ");
2141 print_generic_expr (dump_file, tem, 0);
2142 fprintf (dump_file, "\n");
2143 }
2144
2145 if (opcode == BIT_IOR_EXPR
2146 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2147 tem = invert_truthvalue_loc (loc, tem);
2148
2149 tem = fold_convert_loc (loc, optype, tem);
2150 gsi = gsi_for_stmt (stmt);
2151 /* In rare cases range->exp can be equal to lhs of stmt.
2152 In that case we have to insert after the stmt rather then before
2153 it. */
2154 if (op == range->exp)
2155 {
2156 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2157 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2158 GSI_CONTINUE_LINKING);
2159 }
2160 else
2161 {
2162 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2163 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2164 GSI_SAME_STMT);
2165 gsi_prev (&gsi);
2166 }
2167 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2168 if (gimple_uid (gsi_stmt (gsi)))
2169 break;
2170 else
2171 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2172
2173 oe->op = tem;
2174 range->exp = exp;
2175 range->low = low;
2176 range->high = high;
2177 range->in_p = in_p;
2178 range->strict_overflow_p = false;
2179
2180 for (i = 0; i < count; i++)
2181 {
2182 if (otherrange)
2183 range = otherrange + i;
2184 else
2185 range = otherrangep[i];
2186 oe = (*ops)[range->idx];
2187 /* Now change all the other range test immediate uses, so that
2188 those tests will be optimized away. */
2189 if (opcode == ERROR_MARK)
2190 {
2191 if (oe->op)
2192 oe->op = build_int_cst (TREE_TYPE (oe->op),
2193 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2194 else
2195 oe->op = (oe->rank == BIT_IOR_EXPR
2196 ? boolean_false_node : boolean_true_node);
2197 }
2198 else
2199 oe->op = error_mark_node;
2200 range->exp = NULL_TREE;
2201 }
2202 return true;
2203 }
2204
2205 /* Optimize X == CST1 || X == CST2
2206 if popcount (CST1 ^ CST2) == 1 into
2207 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2208 Similarly for ranges. E.g.
2209 X != 2 && X != 3 && X != 10 && X != 11
2210 will be transformed by the previous optimization into
2211 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2212 and this loop can transform that into
2213 !(((X & ~8) - 2U) <= 1U). */
2214
2215 static bool
2216 optimize_range_tests_xor (enum tree_code opcode, tree type,
2217 tree lowi, tree lowj, tree highi, tree highj,
2218 vec<operand_entry_t> *ops,
2219 struct range_entry *rangei,
2220 struct range_entry *rangej)
2221 {
2222 tree lowxor, highxor, tem, exp;
2223 /* Check lowi ^ lowj == highi ^ highj and
2224 popcount (lowi ^ lowj) == 1. */
2225 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2226 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2227 return false;
2228 if (!integer_pow2p (lowxor))
2229 return false;
2230 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2231 if (!tree_int_cst_equal (lowxor, highxor))
2232 return false;
2233
2234 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2235 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2236 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2237 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2238 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2239 NULL, rangei->in_p, lowj, highj,
2240 rangei->strict_overflow_p
2241 || rangej->strict_overflow_p))
2242 return true;
2243 return false;
2244 }
2245
2246 /* Optimize X == CST1 || X == CST2
2247 if popcount (CST2 - CST1) == 1 into
2248 ((X - CST1) & ~(CST2 - CST1)) == 0.
2249 Similarly for ranges. E.g.
2250 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2251 || X == 75 || X == 45
2252 will be transformed by the previous optimization into
2253 (X - 43U) <= 3U || (X - 75U) <= 3U
2254 and this loop can transform that into
2255 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2256 static bool
2257 optimize_range_tests_diff (enum tree_code opcode, tree type,
2258 tree lowi, tree lowj, tree highi, tree highj,
2259 vec<operand_entry_t> *ops,
2260 struct range_entry *rangei,
2261 struct range_entry *rangej)
2262 {
2263 tree tem1, tem2, mask;
2264 /* Check highi - lowi == highj - lowj. */
2265 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2266 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2267 return false;
2268 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2269 if (!tree_int_cst_equal (tem1, tem2))
2270 return false;
2271 /* Check popcount (lowj - lowi) == 1. */
2272 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2273 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2274 return false;
2275 if (!integer_pow2p (tem1))
2276 return false;
2277
2278 type = unsigned_type_for (type);
2279 tem1 = fold_convert (type, tem1);
2280 tem2 = fold_convert (type, tem2);
2281 lowi = fold_convert (type, lowi);
2282 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2283 tem1 = fold_binary (MINUS_EXPR, type,
2284 fold_convert (type, rangei->exp), lowi);
2285 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2286 lowj = build_int_cst (type, 0);
2287 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2288 NULL, rangei->in_p, lowj, tem2,
2289 rangei->strict_overflow_p
2290 || rangej->strict_overflow_p))
2291 return true;
2292 return false;
2293 }
2294
2295 /* It does some common checks for function optimize_range_tests_xor and
2296 optimize_range_tests_diff.
2297 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2298 Else it calls optimize_range_tests_diff. */
2299
2300 static bool
2301 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2302 bool optimize_xor, vec<operand_entry_t> *ops,
2303 struct range_entry *ranges)
2304 {
2305 int i, j;
2306 bool any_changes = false;
2307 for (i = first; i < length; i++)
2308 {
2309 tree lowi, highi, lowj, highj, type, tem;
2310
2311 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2312 continue;
2313 type = TREE_TYPE (ranges[i].exp);
2314 if (!INTEGRAL_TYPE_P (type))
2315 continue;
2316 lowi = ranges[i].low;
2317 if (lowi == NULL_TREE)
2318 lowi = TYPE_MIN_VALUE (type);
2319 highi = ranges[i].high;
2320 if (highi == NULL_TREE)
2321 continue;
2322 for (j = i + 1; j < length && j < i + 64; j++)
2323 {
2324 bool changes;
2325 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2326 continue;
2327 lowj = ranges[j].low;
2328 if (lowj == NULL_TREE)
2329 continue;
2330 highj = ranges[j].high;
2331 if (highj == NULL_TREE)
2332 highj = TYPE_MAX_VALUE (type);
2333 /* Check lowj > highi. */
2334 tem = fold_binary (GT_EXPR, boolean_type_node,
2335 lowj, highi);
2336 if (tem == NULL_TREE || !integer_onep (tem))
2337 continue;
2338 if (optimize_xor)
2339 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2340 highi, highj, ops,
2341 ranges + i, ranges + j);
2342 else
2343 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2344 highi, highj, ops,
2345 ranges + i, ranges + j);
2346 if (changes)
2347 {
2348 any_changes = true;
2349 break;
2350 }
2351 }
2352 }
2353 return any_changes;
2354 }
2355
2356 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2357 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2358 EXP on success, NULL otherwise. */
2359
2360 static tree
2361 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2362 wide_int *mask, tree *totallowp)
2363 {
2364 tree tem = int_const_binop (MINUS_EXPR, high, low);
2365 if (tem == NULL_TREE
2366 || TREE_CODE (tem) != INTEGER_CST
2367 || TREE_OVERFLOW (tem)
2368 || tree_int_cst_sgn (tem) == -1
2369 || compare_tree_int (tem, prec) != -1)
2370 return NULL_TREE;
2371
2372 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2373 *mask = wi::shifted_mask (0, max, false, prec);
2374 if (TREE_CODE (exp) == BIT_AND_EXPR
2375 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2376 {
2377 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2378 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2379 if (wi::popcount (msk) == 1
2380 && wi::ltu_p (msk, prec - max))
2381 {
2382 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2383 max += msk.to_uhwi ();
2384 exp = TREE_OPERAND (exp, 0);
2385 if (integer_zerop (low)
2386 && TREE_CODE (exp) == PLUS_EXPR
2387 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2388 {
2389 widest_int bias
2390 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2391 TYPE_PRECISION (TREE_TYPE (low))));
2392 tree tbias = wide_int_to_tree (TREE_TYPE (low), bias);
2393 if (totallowp)
2394 {
2395 *totallowp = tbias;
2396 exp = TREE_OPERAND (exp, 0);
2397 STRIP_NOPS (exp);
2398 return exp;
2399 }
2400 else if (!tree_int_cst_lt (totallow, tbias))
2401 return NULL_TREE;
2402 bias -= wi::to_widest (totallow);
2403 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2404 {
2405 *mask = wi::lshift (*mask, bias);
2406 exp = TREE_OPERAND (exp, 0);
2407 STRIP_NOPS (exp);
2408 return exp;
2409 }
2410 }
2411 }
2412 }
2413 if (totallowp)
2414 return exp;
2415 if (!tree_int_cst_lt (totallow, low))
2416 return exp;
2417 tem = int_const_binop (MINUS_EXPR, low, totallow);
2418 if (tem == NULL_TREE
2419 || TREE_CODE (tem) != INTEGER_CST
2420 || TREE_OVERFLOW (tem)
2421 || compare_tree_int (tem, prec - max) == 1)
2422 return NULL_TREE;
2423
2424 *mask = wi::lshift (*mask, wi::to_widest (tem));
2425 return exp;
2426 }
2427
2428 /* Attempt to optimize small range tests using bit test.
2429 E.g.
2430 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2431 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2432 has been by earlier optimizations optimized into:
2433 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2434 As all the 43 through 82 range is less than 64 numbers,
2435 for 64-bit word targets optimize that into:
2436 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2437
2438 static bool
2439 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2440 vec<operand_entry_t> *ops,
2441 struct range_entry *ranges)
2442 {
2443 int i, j;
2444 bool any_changes = false;
2445 int prec = GET_MODE_BITSIZE (word_mode);
2446 auto_vec<struct range_entry *, 64> candidates;
2447
2448 for (i = first; i < length - 2; i++)
2449 {
2450 tree lowi, highi, lowj, highj, type;
2451
2452 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2453 continue;
2454 type = TREE_TYPE (ranges[i].exp);
2455 if (!INTEGRAL_TYPE_P (type))
2456 continue;
2457 lowi = ranges[i].low;
2458 if (lowi == NULL_TREE)
2459 lowi = TYPE_MIN_VALUE (type);
2460 highi = ranges[i].high;
2461 if (highi == NULL_TREE)
2462 continue;
2463 wide_int mask;
2464 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2465 highi, &mask, &lowi);
2466 if (exp == NULL_TREE)
2467 continue;
2468 bool strict_overflow_p = ranges[i].strict_overflow_p;
2469 candidates.truncate (0);
2470 int end = MIN (i + 64, length);
2471 for (j = i + 1; j < end; j++)
2472 {
2473 tree exp2;
2474 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2475 continue;
2476 if (ranges[j].exp == exp)
2477 ;
2478 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2479 {
2480 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2481 if (exp2 == exp)
2482 ;
2483 else if (TREE_CODE (exp2) == PLUS_EXPR)
2484 {
2485 exp2 = TREE_OPERAND (exp2, 0);
2486 STRIP_NOPS (exp2);
2487 if (exp2 != exp)
2488 continue;
2489 }
2490 else
2491 continue;
2492 }
2493 else
2494 continue;
2495 lowj = ranges[j].low;
2496 if (lowj == NULL_TREE)
2497 continue;
2498 highj = ranges[j].high;
2499 if (highj == NULL_TREE)
2500 highj = TYPE_MAX_VALUE (type);
2501 wide_int mask2;
2502 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2503 highj, &mask2, NULL);
2504 if (exp2 != exp)
2505 continue;
2506 mask |= mask2;
2507 strict_overflow_p |= ranges[j].strict_overflow_p;
2508 candidates.safe_push (&ranges[j]);
2509 }
2510
2511 /* If we need otherwise 3 or more comparisons, use a bit test. */
2512 if (candidates.length () >= 2)
2513 {
2514 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2515 wi::to_widest (lowi)
2516 + prec - wi::clz (mask));
2517 operand_entry_t oe = (*ops)[ranges[i].idx];
2518 tree op = oe->op;
2519 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2520 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2521 location_t loc = gimple_location (stmt);
2522 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2523
2524 /* See if it isn't cheaper to pretend the minimum value of the
2525 range is 0, if maximum value is small enough.
2526 We can avoid then subtraction of the minimum value, but the
2527 mask constant could be perhaps more expensive. */
2528 if (compare_tree_int (lowi, 0) > 0
2529 && compare_tree_int (high, prec) < 0)
2530 {
2531 int cost_diff;
2532 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2533 rtx reg = gen_raw_REG (word_mode, 10000);
2534 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2535 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2536 GEN_INT (-m)), speed_p);
2537 rtx r = immed_wide_int_const (mask, word_mode);
2538 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2539 speed_p);
2540 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2541 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2542 speed_p);
2543 if (cost_diff > 0)
2544 {
2545 mask = wi::lshift (mask, m);
2546 lowi = build_zero_cst (TREE_TYPE (lowi));
2547 }
2548 }
2549
2550 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2551 false, lowi, high);
2552 if (tem == NULL_TREE || is_gimple_val (tem))
2553 continue;
2554 tree etype = unsigned_type_for (TREE_TYPE (exp));
2555 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2556 fold_convert_loc (loc, etype, exp),
2557 fold_convert_loc (loc, etype, lowi));
2558 exp = fold_convert_loc (loc, integer_type_node, exp);
2559 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2560 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2561 build_int_cst (word_type, 1), exp);
2562 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2563 wide_int_to_tree (word_type, mask));
2564 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2565 build_zero_cst (word_type));
2566 if (is_gimple_val (exp))
2567 continue;
2568
2569 /* The shift might have undefined behavior if TEM is true,
2570 but reassociate_bb isn't prepared to have basic blocks
2571 split when it is running. So, temporarily emit a code
2572 with BIT_IOR_EXPR instead of &&, and fix it up in
2573 branch_fixup. */
2574 gimple_seq seq;
2575 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2576 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2577 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2578 gimple_seq seq2;
2579 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2580 gimple_seq_add_seq_without_update (&seq, seq2);
2581 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2582 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2583 gimple g
2584 = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2585 make_ssa_name (optype, NULL),
2586 tem, exp);
2587 gimple_set_location (g, loc);
2588 gimple_seq_add_stmt_without_update (&seq, g);
2589 exp = gimple_assign_lhs (g);
2590 tree val = build_zero_cst (optype);
2591 if (update_range_test (&ranges[i], NULL, candidates.address (),
2592 candidates.length (), opcode, ops, exp,
2593 seq, false, val, val, strict_overflow_p))
2594 {
2595 any_changes = true;
2596 reassoc_branch_fixups.safe_push (tem);
2597 }
2598 else
2599 gimple_seq_discard (seq);
2600 }
2601 }
2602 return any_changes;
2603 }
2604
2605 /* Optimize range tests, similarly how fold_range_test optimizes
2606 it on trees. The tree code for the binary
2607 operation between all the operands is OPCODE.
2608 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2609 maybe_optimize_range_tests for inter-bb range optimization.
2610 In that case if oe->op is NULL, oe->id is bb->index whose
2611 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2612 the actual opcode. */
2613
2614 static bool
2615 optimize_range_tests (enum tree_code opcode,
2616 vec<operand_entry_t> *ops)
2617 {
2618 unsigned int length = ops->length (), i, j, first;
2619 operand_entry_t oe;
2620 struct range_entry *ranges;
2621 bool any_changes = false;
2622
2623 if (length == 1)
2624 return false;
2625
2626 ranges = XNEWVEC (struct range_entry, length);
2627 for (i = 0; i < length; i++)
2628 {
2629 oe = (*ops)[i];
2630 ranges[i].idx = i;
2631 init_range_entry (ranges + i, oe->op,
2632 oe->op ? NULL :
2633 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2634 /* For | invert it now, we will invert it again before emitting
2635 the optimized expression. */
2636 if (opcode == BIT_IOR_EXPR
2637 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2638 ranges[i].in_p = !ranges[i].in_p;
2639 }
2640
2641 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2642 for (i = 0; i < length; i++)
2643 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2644 break;
2645
2646 /* Try to merge ranges. */
2647 for (first = i; i < length; i++)
2648 {
2649 tree low = ranges[i].low;
2650 tree high = ranges[i].high;
2651 int in_p = ranges[i].in_p;
2652 bool strict_overflow_p = ranges[i].strict_overflow_p;
2653 int update_fail_count = 0;
2654
2655 for (j = i + 1; j < length; j++)
2656 {
2657 if (ranges[i].exp != ranges[j].exp)
2658 break;
2659 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2660 ranges[j].in_p, ranges[j].low, ranges[j].high))
2661 break;
2662 strict_overflow_p |= ranges[j].strict_overflow_p;
2663 }
2664
2665 if (j == i + 1)
2666 continue;
2667
2668 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2669 opcode, ops, ranges[i].exp, NULL, in_p,
2670 low, high, strict_overflow_p))
2671 {
2672 i = j - 1;
2673 any_changes = true;
2674 }
2675 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2676 while update_range_test would fail. */
2677 else if (update_fail_count == 64)
2678 i = j - 1;
2679 else
2680 ++update_fail_count;
2681 }
2682
2683 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2684 ops, ranges);
2685
2686 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2687 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2688 ops, ranges);
2689 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2690 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2691 ops, ranges);
2692
2693 if (any_changes && opcode != ERROR_MARK)
2694 {
2695 j = 0;
2696 FOR_EACH_VEC_ELT (*ops, i, oe)
2697 {
2698 if (oe->op == error_mark_node)
2699 continue;
2700 else if (i != j)
2701 (*ops)[j] = oe;
2702 j++;
2703 }
2704 ops->truncate (j);
2705 }
2706
2707 XDELETEVEC (ranges);
2708 return any_changes;
2709 }
2710
2711 /* Return true if STMT is a cast like:
2712 <bb N>:
2713 ...
2714 _123 = (int) _234;
2715
2716 <bb M>:
2717 # _345 = PHI <_123(N), 1(...), 1(...)>
2718 where _234 has bool type, _123 has single use and
2719 bb N has a single successor M. This is commonly used in
2720 the last block of a range test. */
2721
2722 static bool
2723 final_range_test_p (gimple stmt)
2724 {
2725 basic_block bb, rhs_bb;
2726 edge e;
2727 tree lhs, rhs;
2728 use_operand_p use_p;
2729 gimple use_stmt;
2730
2731 if (!gimple_assign_cast_p (stmt))
2732 return false;
2733 bb = gimple_bb (stmt);
2734 if (!single_succ_p (bb))
2735 return false;
2736 e = single_succ_edge (bb);
2737 if (e->flags & EDGE_COMPLEX)
2738 return false;
2739
2740 lhs = gimple_assign_lhs (stmt);
2741 rhs = gimple_assign_rhs1 (stmt);
2742 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2743 || TREE_CODE (rhs) != SSA_NAME
2744 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2745 return false;
2746
2747 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2748 if (!single_imm_use (lhs, &use_p, &use_stmt))
2749 return false;
2750
2751 if (gimple_code (use_stmt) != GIMPLE_PHI
2752 || gimple_bb (use_stmt) != e->dest)
2753 return false;
2754
2755 /* And that the rhs is defined in the same loop. */
2756 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2757 if (rhs_bb == NULL
2758 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2759 return false;
2760
2761 return true;
2762 }
2763
2764 /* Return true if BB is suitable basic block for inter-bb range test
2765 optimization. If BACKWARD is true, BB should be the only predecessor
2766 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2767 or compared with to find a common basic block to which all conditions
2768 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2769 be the only predecessor of BB. */
2770
2771 static bool
2772 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2773 bool backward)
2774 {
2775 edge_iterator ei, ei2;
2776 edge e, e2;
2777 gimple stmt;
2778 gimple_stmt_iterator gsi;
2779 bool other_edge_seen = false;
2780 bool is_cond;
2781
2782 if (test_bb == bb)
2783 return false;
2784 /* Check last stmt first. */
2785 stmt = last_stmt (bb);
2786 if (stmt == NULL
2787 || (gimple_code (stmt) != GIMPLE_COND
2788 && (backward || !final_range_test_p (stmt)))
2789 || gimple_visited_p (stmt)
2790 || stmt_could_throw_p (stmt)
2791 || *other_bb == bb)
2792 return false;
2793 is_cond = gimple_code (stmt) == GIMPLE_COND;
2794 if (is_cond)
2795 {
2796 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2797 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2798 to *OTHER_BB (if not set yet, try to find it out). */
2799 if (EDGE_COUNT (bb->succs) != 2)
2800 return false;
2801 FOR_EACH_EDGE (e, ei, bb->succs)
2802 {
2803 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2804 return false;
2805 if (e->dest == test_bb)
2806 {
2807 if (backward)
2808 continue;
2809 else
2810 return false;
2811 }
2812 if (e->dest == bb)
2813 return false;
2814 if (*other_bb == NULL)
2815 {
2816 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2817 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2818 return false;
2819 else if (e->dest == e2->dest)
2820 *other_bb = e->dest;
2821 if (*other_bb == NULL)
2822 return false;
2823 }
2824 if (e->dest == *other_bb)
2825 other_edge_seen = true;
2826 else if (backward)
2827 return false;
2828 }
2829 if (*other_bb == NULL || !other_edge_seen)
2830 return false;
2831 }
2832 else if (single_succ (bb) != *other_bb)
2833 return false;
2834
2835 /* Now check all PHIs of *OTHER_BB. */
2836 e = find_edge (bb, *other_bb);
2837 e2 = find_edge (test_bb, *other_bb);
2838 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2839 {
2840 gimple phi = gsi_stmt (gsi);
2841 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2842 corresponding to BB and TEST_BB predecessor must be the same. */
2843 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2844 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2845 {
2846 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2847 one of the PHIs should have the lhs of the last stmt in
2848 that block as PHI arg and that PHI should have 0 or 1
2849 corresponding to it in all other range test basic blocks
2850 considered. */
2851 if (!is_cond)
2852 {
2853 if (gimple_phi_arg_def (phi, e->dest_idx)
2854 == gimple_assign_lhs (stmt)
2855 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2856 || integer_onep (gimple_phi_arg_def (phi,
2857 e2->dest_idx))))
2858 continue;
2859 }
2860 else
2861 {
2862 gimple test_last = last_stmt (test_bb);
2863 if (gimple_code (test_last) != GIMPLE_COND
2864 && gimple_phi_arg_def (phi, e2->dest_idx)
2865 == gimple_assign_lhs (test_last)
2866 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2867 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2868 continue;
2869 }
2870
2871 return false;
2872 }
2873 }
2874 return true;
2875 }
2876
2877 /* Return true if BB doesn't have side-effects that would disallow
2878 range test optimization, all SSA_NAMEs set in the bb are consumed
2879 in the bb and there are no PHIs. */
2880
2881 static bool
2882 no_side_effect_bb (basic_block bb)
2883 {
2884 gimple_stmt_iterator gsi;
2885 gimple last;
2886
2887 if (!gimple_seq_empty_p (phi_nodes (bb)))
2888 return false;
2889 last = last_stmt (bb);
2890 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2891 {
2892 gimple stmt = gsi_stmt (gsi);
2893 tree lhs;
2894 imm_use_iterator imm_iter;
2895 use_operand_p use_p;
2896
2897 if (is_gimple_debug (stmt))
2898 continue;
2899 if (gimple_has_side_effects (stmt))
2900 return false;
2901 if (stmt == last)
2902 return true;
2903 if (!is_gimple_assign (stmt))
2904 return false;
2905 lhs = gimple_assign_lhs (stmt);
2906 if (TREE_CODE (lhs) != SSA_NAME)
2907 return false;
2908 if (gimple_assign_rhs_could_trap_p (stmt))
2909 return false;
2910 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2911 {
2912 gimple use_stmt = USE_STMT (use_p);
2913 if (is_gimple_debug (use_stmt))
2914 continue;
2915 if (gimple_bb (use_stmt) != bb)
2916 return false;
2917 }
2918 }
2919 return false;
2920 }
2921
2922 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2923 return true and fill in *OPS recursively. */
2924
2925 static bool
2926 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2927 struct loop *loop)
2928 {
2929 gimple stmt = SSA_NAME_DEF_STMT (var);
2930 tree rhs[2];
2931 int i;
2932
2933 if (!is_reassociable_op (stmt, code, loop))
2934 return false;
2935
2936 rhs[0] = gimple_assign_rhs1 (stmt);
2937 rhs[1] = gimple_assign_rhs2 (stmt);
2938 gimple_set_visited (stmt, true);
2939 for (i = 0; i < 2; i++)
2940 if (TREE_CODE (rhs[i]) == SSA_NAME
2941 && !get_ops (rhs[i], code, ops, loop)
2942 && has_single_use (rhs[i]))
2943 {
2944 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2945
2946 oe->op = rhs[i];
2947 oe->rank = code;
2948 oe->id = 0;
2949 oe->count = 1;
2950 ops->safe_push (oe);
2951 }
2952 return true;
2953 }
2954
2955 /* Find the ops that were added by get_ops starting from VAR, see if
2956 they were changed during update_range_test and if yes, create new
2957 stmts. */
2958
2959 static tree
2960 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2961 unsigned int *pidx, struct loop *loop)
2962 {
2963 gimple stmt = SSA_NAME_DEF_STMT (var);
2964 tree rhs[4];
2965 int i;
2966
2967 if (!is_reassociable_op (stmt, code, loop))
2968 return NULL;
2969
2970 rhs[0] = gimple_assign_rhs1 (stmt);
2971 rhs[1] = gimple_assign_rhs2 (stmt);
2972 rhs[2] = rhs[0];
2973 rhs[3] = rhs[1];
2974 for (i = 0; i < 2; i++)
2975 if (TREE_CODE (rhs[i]) == SSA_NAME)
2976 {
2977 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2978 if (rhs[2 + i] == NULL_TREE)
2979 {
2980 if (has_single_use (rhs[i]))
2981 rhs[2 + i] = ops[(*pidx)++]->op;
2982 else
2983 rhs[2 + i] = rhs[i];
2984 }
2985 }
2986 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2987 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2988 {
2989 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2990 var = make_ssa_name (TREE_TYPE (var), NULL);
2991 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2992 var, rhs[2], rhs[3]);
2993 gimple_set_uid (g, gimple_uid (stmt));
2994 gimple_set_visited (g, true);
2995 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2996 }
2997 return var;
2998 }
2999
3000 /* Structure to track the initial value passed to get_ops and
3001 the range in the ops vector for each basic block. */
3002
3003 struct inter_bb_range_test_entry
3004 {
3005 tree op;
3006 unsigned int first_idx, last_idx;
3007 };
3008
3009 /* Inter-bb range test optimization. */
3010
3011 static void
3012 maybe_optimize_range_tests (gimple stmt)
3013 {
3014 basic_block first_bb = gimple_bb (stmt);
3015 basic_block last_bb = first_bb;
3016 basic_block other_bb = NULL;
3017 basic_block bb;
3018 edge_iterator ei;
3019 edge e;
3020 auto_vec<operand_entry_t> ops;
3021 auto_vec<inter_bb_range_test_entry> bbinfo;
3022 bool any_changes = false;
3023
3024 /* Consider only basic blocks that end with GIMPLE_COND or
3025 a cast statement satisfying final_range_test_p. All
3026 but the last bb in the first_bb .. last_bb range
3027 should end with GIMPLE_COND. */
3028 if (gimple_code (stmt) == GIMPLE_COND)
3029 {
3030 if (EDGE_COUNT (first_bb->succs) != 2)
3031 return;
3032 }
3033 else if (final_range_test_p (stmt))
3034 other_bb = single_succ (first_bb);
3035 else
3036 return;
3037
3038 if (stmt_could_throw_p (stmt))
3039 return;
3040
3041 /* As relative ordering of post-dominator sons isn't fixed,
3042 maybe_optimize_range_tests can be called first on any
3043 bb in the range we want to optimize. So, start searching
3044 backwards, if first_bb can be set to a predecessor. */
3045 while (single_pred_p (first_bb))
3046 {
3047 basic_block pred_bb = single_pred (first_bb);
3048 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3049 break;
3050 if (!no_side_effect_bb (first_bb))
3051 break;
3052 first_bb = pred_bb;
3053 }
3054 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3055 Before starting forward search in last_bb successors, find
3056 out the other_bb. */
3057 if (first_bb == last_bb)
3058 {
3059 other_bb = NULL;
3060 /* As non-GIMPLE_COND last stmt always terminates the range,
3061 if forward search didn't discover anything, just give up. */
3062 if (gimple_code (stmt) != GIMPLE_COND)
3063 return;
3064 /* Look at both successors. Either it ends with a GIMPLE_COND
3065 and satisfies suitable_cond_bb, or ends with a cast and
3066 other_bb is that cast's successor. */
3067 FOR_EACH_EDGE (e, ei, first_bb->succs)
3068 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3069 || e->dest == first_bb)
3070 return;
3071 else if (single_pred_p (e->dest))
3072 {
3073 stmt = last_stmt (e->dest);
3074 if (stmt
3075 && gimple_code (stmt) == GIMPLE_COND
3076 && EDGE_COUNT (e->dest->succs) == 2)
3077 {
3078 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3079 break;
3080 else
3081 other_bb = NULL;
3082 }
3083 else if (stmt
3084 && final_range_test_p (stmt)
3085 && find_edge (first_bb, single_succ (e->dest)))
3086 {
3087 other_bb = single_succ (e->dest);
3088 if (other_bb == first_bb)
3089 other_bb = NULL;
3090 }
3091 }
3092 if (other_bb == NULL)
3093 return;
3094 }
3095 /* Now do the forward search, moving last_bb to successor bbs
3096 that aren't other_bb. */
3097 while (EDGE_COUNT (last_bb->succs) == 2)
3098 {
3099 FOR_EACH_EDGE (e, ei, last_bb->succs)
3100 if (e->dest != other_bb)
3101 break;
3102 if (e == NULL)
3103 break;
3104 if (!single_pred_p (e->dest))
3105 break;
3106 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3107 break;
3108 if (!no_side_effect_bb (e->dest))
3109 break;
3110 last_bb = e->dest;
3111 }
3112 if (first_bb == last_bb)
3113 return;
3114 /* Here basic blocks first_bb through last_bb's predecessor
3115 end with GIMPLE_COND, all of them have one of the edges to
3116 other_bb and another to another block in the range,
3117 all blocks except first_bb don't have side-effects and
3118 last_bb ends with either GIMPLE_COND, or cast satisfying
3119 final_range_test_p. */
3120 for (bb = last_bb; ; bb = single_pred (bb))
3121 {
3122 enum tree_code code;
3123 tree lhs, rhs;
3124 inter_bb_range_test_entry bb_ent;
3125
3126 bb_ent.op = NULL_TREE;
3127 bb_ent.first_idx = ops.length ();
3128 bb_ent.last_idx = bb_ent.first_idx;
3129 e = find_edge (bb, other_bb);
3130 stmt = last_stmt (bb);
3131 gimple_set_visited (stmt, true);
3132 if (gimple_code (stmt) != GIMPLE_COND)
3133 {
3134 use_operand_p use_p;
3135 gimple phi;
3136 edge e2;
3137 unsigned int d;
3138
3139 lhs = gimple_assign_lhs (stmt);
3140 rhs = gimple_assign_rhs1 (stmt);
3141 gcc_assert (bb == last_bb);
3142
3143 /* stmt is
3144 _123 = (int) _234;
3145
3146 followed by:
3147 <bb M>:
3148 # _345 = PHI <_123(N), 1(...), 1(...)>
3149
3150 or 0 instead of 1. If it is 0, the _234
3151 range test is anded together with all the
3152 other range tests, if it is 1, it is ored with
3153 them. */
3154 single_imm_use (lhs, &use_p, &phi);
3155 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3156 e2 = find_edge (first_bb, other_bb);
3157 d = e2->dest_idx;
3158 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3159 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3160 code = BIT_AND_EXPR;
3161 else
3162 {
3163 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3164 code = BIT_IOR_EXPR;
3165 }
3166
3167 /* If _234 SSA_NAME_DEF_STMT is
3168 _234 = _567 | _789;
3169 (or &, corresponding to 1/0 in the phi arguments,
3170 push into ops the individual range test arguments
3171 of the bitwise or resp. and, recursively. */
3172 if (!get_ops (rhs, code, &ops,
3173 loop_containing_stmt (stmt))
3174 && has_single_use (rhs))
3175 {
3176 /* Otherwise, push the _234 range test itself. */
3177 operand_entry_t oe
3178 = (operand_entry_t) pool_alloc (operand_entry_pool);
3179
3180 oe->op = rhs;
3181 oe->rank = code;
3182 oe->id = 0;
3183 oe->count = 1;
3184 ops.safe_push (oe);
3185 bb_ent.last_idx++;
3186 }
3187 else
3188 bb_ent.last_idx = ops.length ();
3189 bb_ent.op = rhs;
3190 bbinfo.safe_push (bb_ent);
3191 continue;
3192 }
3193 /* Otherwise stmt is GIMPLE_COND. */
3194 code = gimple_cond_code (stmt);
3195 lhs = gimple_cond_lhs (stmt);
3196 rhs = gimple_cond_rhs (stmt);
3197 if (TREE_CODE (lhs) == SSA_NAME
3198 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3199 && ((code != EQ_EXPR && code != NE_EXPR)
3200 || rhs != boolean_false_node
3201 /* Either push into ops the individual bitwise
3202 or resp. and operands, depending on which
3203 edge is other_bb. */
3204 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3205 ^ (code == EQ_EXPR))
3206 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3207 loop_containing_stmt (stmt))))
3208 {
3209 /* Or push the GIMPLE_COND stmt itself. */
3210 operand_entry_t oe
3211 = (operand_entry_t) pool_alloc (operand_entry_pool);
3212
3213 oe->op = NULL;
3214 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3215 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3216 /* oe->op = NULL signs that there is no SSA_NAME
3217 for the range test, and oe->id instead is the
3218 basic block number, at which's end the GIMPLE_COND
3219 is. */
3220 oe->id = bb->index;
3221 oe->count = 1;
3222 ops.safe_push (oe);
3223 bb_ent.op = NULL;
3224 bb_ent.last_idx++;
3225 }
3226 else if (ops.length () > bb_ent.first_idx)
3227 {
3228 bb_ent.op = lhs;
3229 bb_ent.last_idx = ops.length ();
3230 }
3231 bbinfo.safe_push (bb_ent);
3232 if (bb == first_bb)
3233 break;
3234 }
3235 if (ops.length () > 1)
3236 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3237 if (any_changes)
3238 {
3239 unsigned int idx;
3240 /* update_ops relies on has_single_use predicates returning the
3241 same values as it did during get_ops earlier. Additionally it
3242 never removes statements, only adds new ones and it should walk
3243 from the single imm use and check the predicate already before
3244 making those changes.
3245 On the other side, the handling of GIMPLE_COND directly can turn
3246 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3247 it needs to be done in a separate loop afterwards. */
3248 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3249 {
3250 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3251 && bbinfo[idx].op != NULL_TREE)
3252 {
3253 tree new_op;
3254
3255 stmt = last_stmt (bb);
3256 new_op = update_ops (bbinfo[idx].op,
3257 (enum tree_code)
3258 ops[bbinfo[idx].first_idx]->rank,
3259 ops, &bbinfo[idx].first_idx,
3260 loop_containing_stmt (stmt));
3261 if (new_op == NULL_TREE)
3262 {
3263 gcc_assert (bb == last_bb);
3264 new_op = ops[bbinfo[idx].first_idx++]->op;
3265 }
3266 if (bbinfo[idx].op != new_op)
3267 {
3268 imm_use_iterator iter;
3269 use_operand_p use_p;
3270 gimple use_stmt, cast_stmt = NULL;
3271
3272 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3273 if (is_gimple_debug (use_stmt))
3274 continue;
3275 else if (gimple_code (use_stmt) == GIMPLE_COND
3276 || gimple_code (use_stmt) == GIMPLE_PHI)
3277 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3278 SET_USE (use_p, new_op);
3279 else if (gimple_assign_cast_p (use_stmt))
3280 cast_stmt = use_stmt;
3281 else
3282 gcc_unreachable ();
3283 if (cast_stmt)
3284 {
3285 gcc_assert (bb == last_bb);
3286 tree lhs = gimple_assign_lhs (cast_stmt);
3287 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3288 enum tree_code rhs_code
3289 = gimple_assign_rhs_code (cast_stmt);
3290 gimple g;
3291 if (is_gimple_min_invariant (new_op))
3292 {
3293 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3294 g = gimple_build_assign (new_lhs, new_op);
3295 }
3296 else
3297 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3298 new_op, NULL_TREE);
3299 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3300 gimple_set_uid (g, gimple_uid (cast_stmt));
3301 gimple_set_visited (g, true);
3302 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3303 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3304 if (is_gimple_debug (use_stmt))
3305 continue;
3306 else if (gimple_code (use_stmt) == GIMPLE_COND
3307 || gimple_code (use_stmt) == GIMPLE_PHI)
3308 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3309 SET_USE (use_p, new_lhs);
3310 else
3311 gcc_unreachable ();
3312 }
3313 }
3314 }
3315 if (bb == first_bb)
3316 break;
3317 }
3318 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3319 {
3320 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3321 && bbinfo[idx].op == NULL_TREE
3322 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3323 {
3324 stmt = last_stmt (bb);
3325 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3326 gimple_cond_make_false (stmt);
3327 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3328 gimple_cond_make_true (stmt);
3329 else
3330 {
3331 gimple_cond_set_code (stmt, NE_EXPR);
3332 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3333 gimple_cond_set_rhs (stmt, boolean_false_node);
3334 }
3335 update_stmt (stmt);
3336 }
3337 if (bb == first_bb)
3338 break;
3339 }
3340 }
3341 }
3342
3343 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3344 of STMT in it's operands. This is also known as a "destructive
3345 update" operation. */
3346
3347 static bool
3348 is_phi_for_stmt (gimple stmt, tree operand)
3349 {
3350 gimple def_stmt;
3351 tree lhs;
3352 use_operand_p arg_p;
3353 ssa_op_iter i;
3354
3355 if (TREE_CODE (operand) != SSA_NAME)
3356 return false;
3357
3358 lhs = gimple_assign_lhs (stmt);
3359
3360 def_stmt = SSA_NAME_DEF_STMT (operand);
3361 if (gimple_code (def_stmt) != GIMPLE_PHI)
3362 return false;
3363
3364 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3365 if (lhs == USE_FROM_PTR (arg_p))
3366 return true;
3367 return false;
3368 }
3369
3370 /* Remove def stmt of VAR if VAR has zero uses and recurse
3371 on rhs1 operand if so. */
3372
3373 static void
3374 remove_visited_stmt_chain (tree var)
3375 {
3376 gimple stmt;
3377 gimple_stmt_iterator gsi;
3378
3379 while (1)
3380 {
3381 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3382 return;
3383 stmt = SSA_NAME_DEF_STMT (var);
3384 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3385 {
3386 var = gimple_assign_rhs1 (stmt);
3387 gsi = gsi_for_stmt (stmt);
3388 reassoc_remove_stmt (&gsi);
3389 release_defs (stmt);
3390 }
3391 else
3392 return;
3393 }
3394 }
3395
3396 /* This function checks three consequtive operands in
3397 passed operands vector OPS starting from OPINDEX and
3398 swaps two operands if it is profitable for binary operation
3399 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3400
3401 We pair ops with the same rank if possible.
3402
3403 The alternative we try is to see if STMT is a destructive
3404 update style statement, which is like:
3405 b = phi (a, ...)
3406 a = c + b;
3407 In that case, we want to use the destructive update form to
3408 expose the possible vectorizer sum reduction opportunity.
3409 In that case, the third operand will be the phi node. This
3410 check is not performed if STMT is null.
3411
3412 We could, of course, try to be better as noted above, and do a
3413 lot of work to try to find these opportunities in >3 operand
3414 cases, but it is unlikely to be worth it. */
3415
3416 static void
3417 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3418 unsigned int opindex, gimple stmt)
3419 {
3420 operand_entry_t oe1, oe2, oe3;
3421
3422 oe1 = ops[opindex];
3423 oe2 = ops[opindex + 1];
3424 oe3 = ops[opindex + 2];
3425
3426 if ((oe1->rank == oe2->rank
3427 && oe2->rank != oe3->rank)
3428 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3429 && !is_phi_for_stmt (stmt, oe1->op)
3430 && !is_phi_for_stmt (stmt, oe2->op)))
3431 {
3432 struct operand_entry temp = *oe3;
3433 oe3->op = oe1->op;
3434 oe3->rank = oe1->rank;
3435 oe1->op = temp.op;
3436 oe1->rank= temp.rank;
3437 }
3438 else if ((oe1->rank == oe3->rank
3439 && oe2->rank != oe3->rank)
3440 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3441 && !is_phi_for_stmt (stmt, oe1->op)
3442 && !is_phi_for_stmt (stmt, oe3->op)))
3443 {
3444 struct operand_entry temp = *oe2;
3445 oe2->op = oe1->op;
3446 oe2->rank = oe1->rank;
3447 oe1->op = temp.op;
3448 oe1->rank = temp.rank;
3449 }
3450 }
3451
3452 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3453 two definitions, otherwise return STMT. */
3454
3455 static inline gimple
3456 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3457 {
3458 if (TREE_CODE (rhs1) == SSA_NAME
3459 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3460 stmt = SSA_NAME_DEF_STMT (rhs1);
3461 if (TREE_CODE (rhs2) == SSA_NAME
3462 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3463 stmt = SSA_NAME_DEF_STMT (rhs2);
3464 return stmt;
3465 }
3466
3467 /* Recursively rewrite our linearized statements so that the operators
3468 match those in OPS[OPINDEX], putting the computation in rank
3469 order. Return new lhs. */
3470
3471 static tree
3472 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3473 vec<operand_entry_t> ops, bool changed)
3474 {
3475 tree rhs1 = gimple_assign_rhs1 (stmt);
3476 tree rhs2 = gimple_assign_rhs2 (stmt);
3477 tree lhs = gimple_assign_lhs (stmt);
3478 operand_entry_t oe;
3479
3480 /* The final recursion case for this function is that you have
3481 exactly two operations left.
3482 If we had one exactly one op in the entire list to start with, we
3483 would have never called this function, and the tail recursion
3484 rewrites them one at a time. */
3485 if (opindex + 2 == ops.length ())
3486 {
3487 operand_entry_t oe1, oe2;
3488
3489 oe1 = ops[opindex];
3490 oe2 = ops[opindex + 1];
3491
3492 if (rhs1 != oe1->op || rhs2 != oe2->op)
3493 {
3494 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3495 unsigned int uid = gimple_uid (stmt);
3496
3497 if (dump_file && (dump_flags & TDF_DETAILS))
3498 {
3499 fprintf (dump_file, "Transforming ");
3500 print_gimple_stmt (dump_file, stmt, 0, 0);
3501 }
3502
3503 if (changed)
3504 {
3505 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3506 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3507 stmt
3508 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3509 lhs, oe1->op, oe2->op);
3510 gimple_set_uid (stmt, uid);
3511 gimple_set_visited (stmt, true);
3512 if (insert_point == gsi_stmt (gsi))
3513 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3514 else
3515 insert_stmt_after (stmt, insert_point);
3516 }
3517 else
3518 {
3519 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3520 == stmt);
3521 gimple_assign_set_rhs1 (stmt, oe1->op);
3522 gimple_assign_set_rhs2 (stmt, oe2->op);
3523 update_stmt (stmt);
3524 }
3525
3526 if (rhs1 != oe1->op && rhs1 != oe2->op)
3527 remove_visited_stmt_chain (rhs1);
3528
3529 if (dump_file && (dump_flags & TDF_DETAILS))
3530 {
3531 fprintf (dump_file, " into ");
3532 print_gimple_stmt (dump_file, stmt, 0, 0);
3533 }
3534 }
3535 return lhs;
3536 }
3537
3538 /* If we hit here, we should have 3 or more ops left. */
3539 gcc_assert (opindex + 2 < ops.length ());
3540
3541 /* Rewrite the next operator. */
3542 oe = ops[opindex];
3543
3544 /* Recurse on the LHS of the binary operator, which is guaranteed to
3545 be the non-leaf side. */
3546 tree new_rhs1
3547 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3548 changed || oe->op != rhs2);
3549
3550 if (oe->op != rhs2 || new_rhs1 != rhs1)
3551 {
3552 if (dump_file && (dump_flags & TDF_DETAILS))
3553 {
3554 fprintf (dump_file, "Transforming ");
3555 print_gimple_stmt (dump_file, stmt, 0, 0);
3556 }
3557
3558 /* If changed is false, this is either opindex == 0
3559 or all outer rhs2's were equal to corresponding oe->op,
3560 and powi_result is NULL.
3561 That means lhs is equivalent before and after reassociation.
3562 Otherwise ensure the old lhs SSA_NAME is not reused and
3563 create a new stmt as well, so that any debug stmts will be
3564 properly adjusted. */
3565 if (changed)
3566 {
3567 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3568 unsigned int uid = gimple_uid (stmt);
3569 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3570
3571 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3572 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3573 lhs, new_rhs1, oe->op);
3574 gimple_set_uid (stmt, uid);
3575 gimple_set_visited (stmt, true);
3576 if (insert_point == gsi_stmt (gsi))
3577 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3578 else
3579 insert_stmt_after (stmt, insert_point);
3580 }
3581 else
3582 {
3583 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3584 == stmt);
3585 gimple_assign_set_rhs1 (stmt, new_rhs1);
3586 gimple_assign_set_rhs2 (stmt, oe->op);
3587 update_stmt (stmt);
3588 }
3589
3590 if (dump_file && (dump_flags & TDF_DETAILS))
3591 {
3592 fprintf (dump_file, " into ");
3593 print_gimple_stmt (dump_file, stmt, 0, 0);
3594 }
3595 }
3596 return lhs;
3597 }
3598
3599 /* Find out how many cycles we need to compute statements chain.
3600 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3601 maximum number of independent statements we may execute per cycle. */
3602
3603 static int
3604 get_required_cycles (int ops_num, int cpu_width)
3605 {
3606 int res;
3607 int elog;
3608 unsigned int rest;
3609
3610 /* While we have more than 2 * cpu_width operands
3611 we may reduce number of operands by cpu_width
3612 per cycle. */
3613 res = ops_num / (2 * cpu_width);
3614
3615 /* Remained operands count may be reduced twice per cycle
3616 until we have only one operand. */
3617 rest = (unsigned)(ops_num - res * cpu_width);
3618 elog = exact_log2 (rest);
3619 if (elog >= 0)
3620 res += elog;
3621 else
3622 res += floor_log2 (rest) + 1;
3623
3624 return res;
3625 }
3626
3627 /* Returns an optimal number of registers to use for computation of
3628 given statements. */
3629
3630 static int
3631 get_reassociation_width (int ops_num, enum tree_code opc,
3632 enum machine_mode mode)
3633 {
3634 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3635 int width;
3636 int width_min;
3637 int cycles_best;
3638
3639 if (param_width > 0)
3640 width = param_width;
3641 else
3642 width = targetm.sched.reassociation_width (opc, mode);
3643
3644 if (width == 1)
3645 return width;
3646
3647 /* Get the minimal time required for sequence computation. */
3648 cycles_best = get_required_cycles (ops_num, width);
3649
3650 /* Check if we may use less width and still compute sequence for
3651 the same time. It will allow us to reduce registers usage.
3652 get_required_cycles is monotonically increasing with lower width
3653 so we can perform a binary search for the minimal width that still
3654 results in the optimal cycle count. */
3655 width_min = 1;
3656 while (width > width_min)
3657 {
3658 int width_mid = (width + width_min) / 2;
3659
3660 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3661 width = width_mid;
3662 else if (width_min < width_mid)
3663 width_min = width_mid;
3664 else
3665 break;
3666 }
3667
3668 return width;
3669 }
3670
3671 /* Recursively rewrite our linearized statements so that the operators
3672 match those in OPS[OPINDEX], putting the computation in rank
3673 order and trying to allow operations to be executed in
3674 parallel. */
3675
3676 static void
3677 rewrite_expr_tree_parallel (gimple stmt, int width,
3678 vec<operand_entry_t> ops)
3679 {
3680 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3681 int op_num = ops.length ();
3682 int stmt_num = op_num - 1;
3683 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3684 int op_index = op_num - 1;
3685 int stmt_index = 0;
3686 int ready_stmts_end = 0;
3687 int i = 0;
3688 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3689
3690 /* We start expression rewriting from the top statements.
3691 So, in this loop we create a full list of statements
3692 we will work with. */
3693 stmts[stmt_num - 1] = stmt;
3694 for (i = stmt_num - 2; i >= 0; i--)
3695 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3696
3697 for (i = 0; i < stmt_num; i++)
3698 {
3699 tree op1, op2;
3700
3701 /* Determine whether we should use results of
3702 already handled statements or not. */
3703 if (ready_stmts_end == 0
3704 && (i - stmt_index >= width || op_index < 1))
3705 ready_stmts_end = i;
3706
3707 /* Now we choose operands for the next statement. Non zero
3708 value in ready_stmts_end means here that we should use
3709 the result of already generated statements as new operand. */
3710 if (ready_stmts_end > 0)
3711 {
3712 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3713 if (ready_stmts_end > stmt_index)
3714 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3715 else if (op_index >= 0)
3716 op2 = ops[op_index--]->op;
3717 else
3718 {
3719 gcc_assert (stmt_index < i);
3720 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3721 }
3722
3723 if (stmt_index >= ready_stmts_end)
3724 ready_stmts_end = 0;
3725 }
3726 else
3727 {
3728 if (op_index > 1)
3729 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3730 op2 = ops[op_index--]->op;
3731 op1 = ops[op_index--]->op;
3732 }
3733
3734 /* If we emit the last statement then we should put
3735 operands into the last statement. It will also
3736 break the loop. */
3737 if (op_index < 0 && stmt_index == i)
3738 i = stmt_num - 1;
3739
3740 if (dump_file && (dump_flags & TDF_DETAILS))
3741 {
3742 fprintf (dump_file, "Transforming ");
3743 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3744 }
3745
3746 /* We keep original statement only for the last one. All
3747 others are recreated. */
3748 if (i == stmt_num - 1)
3749 {
3750 gimple_assign_set_rhs1 (stmts[i], op1);
3751 gimple_assign_set_rhs2 (stmts[i], op2);
3752 update_stmt (stmts[i]);
3753 }
3754 else
3755 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3756
3757 if (dump_file && (dump_flags & TDF_DETAILS))
3758 {
3759 fprintf (dump_file, " into ");
3760 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3761 }
3762 }
3763
3764 remove_visited_stmt_chain (last_rhs1);
3765 }
3766
3767 /* Transform STMT, which is really (A +B) + (C + D) into the left
3768 linear form, ((A+B)+C)+D.
3769 Recurse on D if necessary. */
3770
3771 static void
3772 linearize_expr (gimple stmt)
3773 {
3774 gimple_stmt_iterator gsi;
3775 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3776 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3777 gimple oldbinrhs = binrhs;
3778 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3779 gimple newbinrhs = NULL;
3780 struct loop *loop = loop_containing_stmt (stmt);
3781 tree lhs = gimple_assign_lhs (stmt);
3782
3783 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3784 && is_reassociable_op (binrhs, rhscode, loop));
3785
3786 gsi = gsi_for_stmt (stmt);
3787
3788 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3789 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3790 make_ssa_name (TREE_TYPE (lhs), NULL),
3791 gimple_assign_lhs (binlhs),
3792 gimple_assign_rhs2 (binrhs));
3793 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3794 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3795 gimple_set_uid (binrhs, gimple_uid (stmt));
3796
3797 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3798 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3799
3800 if (dump_file && (dump_flags & TDF_DETAILS))
3801 {
3802 fprintf (dump_file, "Linearized: ");
3803 print_gimple_stmt (dump_file, stmt, 0, 0);
3804 }
3805
3806 reassociate_stats.linearized++;
3807 update_stmt (stmt);
3808
3809 gsi = gsi_for_stmt (oldbinrhs);
3810 reassoc_remove_stmt (&gsi);
3811 release_defs (oldbinrhs);
3812
3813 gimple_set_visited (stmt, true);
3814 gimple_set_visited (binlhs, true);
3815 gimple_set_visited (binrhs, true);
3816
3817 /* Tail recurse on the new rhs if it still needs reassociation. */
3818 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3819 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3820 want to change the algorithm while converting to tuples. */
3821 linearize_expr (stmt);
3822 }
3823
3824 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3825 it. Otherwise, return NULL. */
3826
3827 static gimple
3828 get_single_immediate_use (tree lhs)
3829 {
3830 use_operand_p immuse;
3831 gimple immusestmt;
3832
3833 if (TREE_CODE (lhs) == SSA_NAME
3834 && single_imm_use (lhs, &immuse, &immusestmt)
3835 && is_gimple_assign (immusestmt))
3836 return immusestmt;
3837
3838 return NULL;
3839 }
3840
3841 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3842 representing the negated value. Insertions of any necessary
3843 instructions go before GSI.
3844 This function is recursive in that, if you hand it "a_5" as the
3845 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3846 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3847
3848 static tree
3849 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3850 {
3851 gimple negatedefstmt = NULL;
3852 tree resultofnegate;
3853 gimple_stmt_iterator gsi;
3854 unsigned int uid;
3855
3856 /* If we are trying to negate a name, defined by an add, negate the
3857 add operands instead. */
3858 if (TREE_CODE (tonegate) == SSA_NAME)
3859 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3860 if (TREE_CODE (tonegate) == SSA_NAME
3861 && is_gimple_assign (negatedefstmt)
3862 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3863 && has_single_use (gimple_assign_lhs (negatedefstmt))
3864 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3865 {
3866 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3867 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3868 tree lhs = gimple_assign_lhs (negatedefstmt);
3869 gimple g;
3870
3871 gsi = gsi_for_stmt (negatedefstmt);
3872 rhs1 = negate_value (rhs1, &gsi);
3873
3874 gsi = gsi_for_stmt (negatedefstmt);
3875 rhs2 = negate_value (rhs2, &gsi);
3876
3877 gsi = gsi_for_stmt (negatedefstmt);
3878 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3879 gimple_set_visited (negatedefstmt, true);
3880 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3881 gimple_set_uid (g, gimple_uid (negatedefstmt));
3882 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3883 return lhs;
3884 }
3885
3886 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3887 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3888 NULL_TREE, true, GSI_SAME_STMT);
3889 gsi = *gsip;
3890 uid = gimple_uid (gsi_stmt (gsi));
3891 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3892 {
3893 gimple stmt = gsi_stmt (gsi);
3894 if (gimple_uid (stmt) != 0)
3895 break;
3896 gimple_set_uid (stmt, uid);
3897 }
3898 return resultofnegate;
3899 }
3900
3901 /* Return true if we should break up the subtract in STMT into an add
3902 with negate. This is true when we the subtract operands are really
3903 adds, or the subtract itself is used in an add expression. In
3904 either case, breaking up the subtract into an add with negate
3905 exposes the adds to reassociation. */
3906
3907 static bool
3908 should_break_up_subtract (gimple stmt)
3909 {
3910 tree lhs = gimple_assign_lhs (stmt);
3911 tree binlhs = gimple_assign_rhs1 (stmt);
3912 tree binrhs = gimple_assign_rhs2 (stmt);
3913 gimple immusestmt;
3914 struct loop *loop = loop_containing_stmt (stmt);
3915
3916 if (TREE_CODE (binlhs) == SSA_NAME
3917 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3918 return true;
3919
3920 if (TREE_CODE (binrhs) == SSA_NAME
3921 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3922 return true;
3923
3924 if (TREE_CODE (lhs) == SSA_NAME
3925 && (immusestmt = get_single_immediate_use (lhs))
3926 && is_gimple_assign (immusestmt)
3927 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3928 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3929 return true;
3930 return false;
3931 }
3932
3933 /* Transform STMT from A - B into A + -B. */
3934
3935 static void
3936 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3937 {
3938 tree rhs1 = gimple_assign_rhs1 (stmt);
3939 tree rhs2 = gimple_assign_rhs2 (stmt);
3940
3941 if (dump_file && (dump_flags & TDF_DETAILS))
3942 {
3943 fprintf (dump_file, "Breaking up subtract ");
3944 print_gimple_stmt (dump_file, stmt, 0, 0);
3945 }
3946
3947 rhs2 = negate_value (rhs2, gsip);
3948 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3949 update_stmt (stmt);
3950 }
3951
3952 /* Determine whether STMT is a builtin call that raises an SSA name
3953 to an integer power and has only one use. If so, and this is early
3954 reassociation and unsafe math optimizations are permitted, place
3955 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3956 If any of these conditions does not hold, return FALSE. */
3957
3958 static bool
3959 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3960 {
3961 tree fndecl, arg1;
3962 REAL_VALUE_TYPE c, cint;
3963
3964 if (!first_pass_instance
3965 || !flag_unsafe_math_optimizations
3966 || !is_gimple_call (stmt)
3967 || !has_single_use (gimple_call_lhs (stmt)))
3968 return false;
3969
3970 fndecl = gimple_call_fndecl (stmt);
3971
3972 if (!fndecl
3973 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3974 return false;
3975
3976 switch (DECL_FUNCTION_CODE (fndecl))
3977 {
3978 CASE_FLT_FN (BUILT_IN_POW):
3979 *base = gimple_call_arg (stmt, 0);
3980 arg1 = gimple_call_arg (stmt, 1);
3981
3982 if (TREE_CODE (arg1) != REAL_CST)
3983 return false;
3984
3985 c = TREE_REAL_CST (arg1);
3986
3987 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3988 return false;
3989
3990 *exponent = real_to_integer (&c);
3991 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3992 if (!real_identical (&c, &cint))
3993 return false;
3994
3995 break;
3996
3997 CASE_FLT_FN (BUILT_IN_POWI):
3998 *base = gimple_call_arg (stmt, 0);
3999 arg1 = gimple_call_arg (stmt, 1);
4000
4001 if (!tree_fits_shwi_p (arg1))
4002 return false;
4003
4004 *exponent = tree_to_shwi (arg1);
4005 break;
4006
4007 default:
4008 return false;
4009 }
4010
4011 /* Expanding negative exponents is generally unproductive, so we don't
4012 complicate matters with those. Exponents of zero and one should
4013 have been handled by expression folding. */
4014 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4015 return false;
4016
4017 return true;
4018 }
4019
4020 /* Recursively linearize a binary expression that is the RHS of STMT.
4021 Place the operands of the expression tree in the vector named OPS. */
4022
4023 static void
4024 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4025 bool is_associative, bool set_visited)
4026 {
4027 tree binlhs = gimple_assign_rhs1 (stmt);
4028 tree binrhs = gimple_assign_rhs2 (stmt);
4029 gimple binlhsdef = NULL, binrhsdef = NULL;
4030 bool binlhsisreassoc = false;
4031 bool binrhsisreassoc = false;
4032 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4033 struct loop *loop = loop_containing_stmt (stmt);
4034 tree base = NULL_TREE;
4035 HOST_WIDE_INT exponent = 0;
4036
4037 if (set_visited)
4038 gimple_set_visited (stmt, true);
4039
4040 if (TREE_CODE (binlhs) == SSA_NAME)
4041 {
4042 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4043 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4044 && !stmt_could_throw_p (binlhsdef));
4045 }
4046
4047 if (TREE_CODE (binrhs) == SSA_NAME)
4048 {
4049 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4050 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4051 && !stmt_could_throw_p (binrhsdef));
4052 }
4053
4054 /* If the LHS is not reassociable, but the RHS is, we need to swap
4055 them. If neither is reassociable, there is nothing we can do, so
4056 just put them in the ops vector. If the LHS is reassociable,
4057 linearize it. If both are reassociable, then linearize the RHS
4058 and the LHS. */
4059
4060 if (!binlhsisreassoc)
4061 {
4062 tree temp;
4063
4064 /* If this is not a associative operation like division, give up. */
4065 if (!is_associative)
4066 {
4067 add_to_ops_vec (ops, binrhs);
4068 return;
4069 }
4070
4071 if (!binrhsisreassoc)
4072 {
4073 if (rhscode == MULT_EXPR
4074 && TREE_CODE (binrhs) == SSA_NAME
4075 && acceptable_pow_call (binrhsdef, &base, &exponent))
4076 {
4077 add_repeat_to_ops_vec (ops, base, exponent);
4078 gimple_set_visited (binrhsdef, true);
4079 }
4080 else
4081 add_to_ops_vec (ops, binrhs);
4082
4083 if (rhscode == MULT_EXPR
4084 && TREE_CODE (binlhs) == SSA_NAME
4085 && acceptable_pow_call (binlhsdef, &base, &exponent))
4086 {
4087 add_repeat_to_ops_vec (ops, base, exponent);
4088 gimple_set_visited (binlhsdef, true);
4089 }
4090 else
4091 add_to_ops_vec (ops, binlhs);
4092
4093 return;
4094 }
4095
4096 if (dump_file && (dump_flags & TDF_DETAILS))
4097 {
4098 fprintf (dump_file, "swapping operands of ");
4099 print_gimple_stmt (dump_file, stmt, 0, 0);
4100 }
4101
4102 swap_ssa_operands (stmt,
4103 gimple_assign_rhs1_ptr (stmt),
4104 gimple_assign_rhs2_ptr (stmt));
4105 update_stmt (stmt);
4106
4107 if (dump_file && (dump_flags & TDF_DETAILS))
4108 {
4109 fprintf (dump_file, " is now ");
4110 print_gimple_stmt (dump_file, stmt, 0, 0);
4111 }
4112
4113 /* We want to make it so the lhs is always the reassociative op,
4114 so swap. */
4115 temp = binlhs;
4116 binlhs = binrhs;
4117 binrhs = temp;
4118 }
4119 else if (binrhsisreassoc)
4120 {
4121 linearize_expr (stmt);
4122 binlhs = gimple_assign_rhs1 (stmt);
4123 binrhs = gimple_assign_rhs2 (stmt);
4124 }
4125
4126 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4127 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4128 rhscode, loop));
4129 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4130 is_associative, set_visited);
4131
4132 if (rhscode == MULT_EXPR
4133 && TREE_CODE (binrhs) == SSA_NAME
4134 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4135 {
4136 add_repeat_to_ops_vec (ops, base, exponent);
4137 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4138 }
4139 else
4140 add_to_ops_vec (ops, binrhs);
4141 }
4142
4143 /* Repropagate the negates back into subtracts, since no other pass
4144 currently does it. */
4145
4146 static void
4147 repropagate_negates (void)
4148 {
4149 unsigned int i = 0;
4150 tree negate;
4151
4152 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4153 {
4154 gimple user = get_single_immediate_use (negate);
4155
4156 if (!user || !is_gimple_assign (user))
4157 continue;
4158
4159 /* The negate operand can be either operand of a PLUS_EXPR
4160 (it can be the LHS if the RHS is a constant for example).
4161
4162 Force the negate operand to the RHS of the PLUS_EXPR, then
4163 transform the PLUS_EXPR into a MINUS_EXPR. */
4164 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4165 {
4166 /* If the negated operand appears on the LHS of the
4167 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4168 to force the negated operand to the RHS of the PLUS_EXPR. */
4169 if (gimple_assign_rhs1 (user) == negate)
4170 {
4171 swap_ssa_operands (user,
4172 gimple_assign_rhs1_ptr (user),
4173 gimple_assign_rhs2_ptr (user));
4174 }
4175
4176 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4177 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4178 if (gimple_assign_rhs2 (user) == negate)
4179 {
4180 tree rhs1 = gimple_assign_rhs1 (user);
4181 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4182 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4183 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4184 update_stmt (user);
4185 }
4186 }
4187 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4188 {
4189 if (gimple_assign_rhs1 (user) == negate)
4190 {
4191 /* We have
4192 x = -a
4193 y = x - b
4194 which we transform into
4195 x = a + b
4196 y = -x .
4197 This pushes down the negate which we possibly can merge
4198 into some other operation, hence insert it into the
4199 plus_negates vector. */
4200 gimple feed = SSA_NAME_DEF_STMT (negate);
4201 tree a = gimple_assign_rhs1 (feed);
4202 tree b = gimple_assign_rhs2 (user);
4203 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4204 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4205 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
4206 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
4207 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4208 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
4209 user = gsi_stmt (gsi2);
4210 update_stmt (user);
4211 reassoc_remove_stmt (&gsi);
4212 release_defs (feed);
4213 plus_negates.safe_push (gimple_assign_lhs (user));
4214 }
4215 else
4216 {
4217 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4218 rid of one operation. */
4219 gimple feed = SSA_NAME_DEF_STMT (negate);
4220 tree a = gimple_assign_rhs1 (feed);
4221 tree rhs1 = gimple_assign_rhs1 (user);
4222 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4223 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4224 update_stmt (gsi_stmt (gsi));
4225 }
4226 }
4227 }
4228 }
4229
4230 /* Returns true if OP is of a type for which we can do reassociation.
4231 That is for integral or non-saturating fixed-point types, and for
4232 floating point type when associative-math is enabled. */
4233
4234 static bool
4235 can_reassociate_p (tree op)
4236 {
4237 tree type = TREE_TYPE (op);
4238 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4239 || NON_SAT_FIXED_POINT_TYPE_P (type)
4240 || (flag_associative_math && FLOAT_TYPE_P (type)))
4241 return true;
4242 return false;
4243 }
4244
4245 /* Break up subtract operations in block BB.
4246
4247 We do this top down because we don't know whether the subtract is
4248 part of a possible chain of reassociation except at the top.
4249
4250 IE given
4251 d = f + g
4252 c = a + e
4253 b = c - d
4254 q = b - r
4255 k = t - q
4256
4257 we want to break up k = t - q, but we won't until we've transformed q
4258 = b - r, which won't be broken up until we transform b = c - d.
4259
4260 En passant, clear the GIMPLE visited flag on every statement
4261 and set UIDs within each basic block. */
4262
4263 static void
4264 break_up_subtract_bb (basic_block bb)
4265 {
4266 gimple_stmt_iterator gsi;
4267 basic_block son;
4268 unsigned int uid = 1;
4269
4270 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4271 {
4272 gimple stmt = gsi_stmt (gsi);
4273 gimple_set_visited (stmt, false);
4274 gimple_set_uid (stmt, uid++);
4275
4276 if (!is_gimple_assign (stmt)
4277 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4278 continue;
4279
4280 /* Look for simple gimple subtract operations. */
4281 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4282 {
4283 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4284 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4285 continue;
4286
4287 /* Check for a subtract used only in an addition. If this
4288 is the case, transform it into add of a negate for better
4289 reassociation. IE transform C = A-B into C = A + -B if C
4290 is only used in an addition. */
4291 if (should_break_up_subtract (stmt))
4292 break_up_subtract (stmt, &gsi);
4293 }
4294 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4295 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4296 plus_negates.safe_push (gimple_assign_lhs (stmt));
4297 }
4298 for (son = first_dom_son (CDI_DOMINATORS, bb);
4299 son;
4300 son = next_dom_son (CDI_DOMINATORS, son))
4301 break_up_subtract_bb (son);
4302 }
4303
4304 /* Used for repeated factor analysis. */
4305 struct repeat_factor_d
4306 {
4307 /* An SSA name that occurs in a multiply chain. */
4308 tree factor;
4309
4310 /* Cached rank of the factor. */
4311 unsigned rank;
4312
4313 /* Number of occurrences of the factor in the chain. */
4314 HOST_WIDE_INT count;
4315
4316 /* An SSA name representing the product of this factor and
4317 all factors appearing later in the repeated factor vector. */
4318 tree repr;
4319 };
4320
4321 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4322 typedef const struct repeat_factor_d *const_repeat_factor_t;
4323
4324
4325 static vec<repeat_factor> repeat_factor_vec;
4326
4327 /* Used for sorting the repeat factor vector. Sort primarily by
4328 ascending occurrence count, secondarily by descending rank. */
4329
4330 static int
4331 compare_repeat_factors (const void *x1, const void *x2)
4332 {
4333 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4334 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4335
4336 if (rf1->count != rf2->count)
4337 return rf1->count - rf2->count;
4338
4339 return rf2->rank - rf1->rank;
4340 }
4341
4342 /* Look for repeated operands in OPS in the multiply tree rooted at
4343 STMT. Replace them with an optimal sequence of multiplies and powi
4344 builtin calls, and remove the used operands from OPS. Return an
4345 SSA name representing the value of the replacement sequence. */
4346
4347 static tree
4348 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4349 {
4350 unsigned i, j, vec_len;
4351 int ii;
4352 operand_entry_t oe;
4353 repeat_factor_t rf1, rf2;
4354 repeat_factor rfnew;
4355 tree result = NULL_TREE;
4356 tree target_ssa, iter_result;
4357 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4358 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4359 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4360 gimple mul_stmt, pow_stmt;
4361
4362 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4363 target. */
4364 if (!powi_fndecl)
4365 return NULL_TREE;
4366
4367 /* Allocate the repeated factor vector. */
4368 repeat_factor_vec.create (10);
4369
4370 /* Scan the OPS vector for all SSA names in the product and build
4371 up a vector of occurrence counts for each factor. */
4372 FOR_EACH_VEC_ELT (*ops, i, oe)
4373 {
4374 if (TREE_CODE (oe->op) == SSA_NAME)
4375 {
4376 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4377 {
4378 if (rf1->factor == oe->op)
4379 {
4380 rf1->count += oe->count;
4381 break;
4382 }
4383 }
4384
4385 if (j >= repeat_factor_vec.length ())
4386 {
4387 rfnew.factor = oe->op;
4388 rfnew.rank = oe->rank;
4389 rfnew.count = oe->count;
4390 rfnew.repr = NULL_TREE;
4391 repeat_factor_vec.safe_push (rfnew);
4392 }
4393 }
4394 }
4395
4396 /* Sort the repeated factor vector by (a) increasing occurrence count,
4397 and (b) decreasing rank. */
4398 repeat_factor_vec.qsort (compare_repeat_factors);
4399
4400 /* It is generally best to combine as many base factors as possible
4401 into a product before applying __builtin_powi to the result.
4402 However, the sort order chosen for the repeated factor vector
4403 allows us to cache partial results for the product of the base
4404 factors for subsequent use. When we already have a cached partial
4405 result from a previous iteration, it is best to make use of it
4406 before looking for another __builtin_pow opportunity.
4407
4408 As an example, consider x * x * y * y * y * z * z * z * z.
4409 We want to first compose the product x * y * z, raise it to the
4410 second power, then multiply this by y * z, and finally multiply
4411 by z. This can be done in 5 multiplies provided we cache y * z
4412 for use in both expressions:
4413
4414 t1 = y * z
4415 t2 = t1 * x
4416 t3 = t2 * t2
4417 t4 = t1 * t3
4418 result = t4 * z
4419
4420 If we instead ignored the cached y * z and first multiplied by
4421 the __builtin_pow opportunity z * z, we would get the inferior:
4422
4423 t1 = y * z
4424 t2 = t1 * x
4425 t3 = t2 * t2
4426 t4 = z * z
4427 t5 = t3 * t4
4428 result = t5 * y */
4429
4430 vec_len = repeat_factor_vec.length ();
4431
4432 /* Repeatedly look for opportunities to create a builtin_powi call. */
4433 while (true)
4434 {
4435 HOST_WIDE_INT power;
4436
4437 /* First look for the largest cached product of factors from
4438 preceding iterations. If found, create a builtin_powi for
4439 it if the minimum occurrence count for its factors is at
4440 least 2, or just use this cached product as our next
4441 multiplicand if the minimum occurrence count is 1. */
4442 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4443 {
4444 if (rf1->repr && rf1->count > 0)
4445 break;
4446 }
4447
4448 if (j < vec_len)
4449 {
4450 power = rf1->count;
4451
4452 if (power == 1)
4453 {
4454 iter_result = rf1->repr;
4455
4456 if (dump_file && (dump_flags & TDF_DETAILS))
4457 {
4458 unsigned elt;
4459 repeat_factor_t rf;
4460 fputs ("Multiplying by cached product ", dump_file);
4461 for (elt = j; elt < vec_len; elt++)
4462 {
4463 rf = &repeat_factor_vec[elt];
4464 print_generic_expr (dump_file, rf->factor, 0);
4465 if (elt < vec_len - 1)
4466 fputs (" * ", dump_file);
4467 }
4468 fputs ("\n", dump_file);
4469 }
4470 }
4471 else
4472 {
4473 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4474 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4475 build_int_cst (integer_type_node,
4476 power));
4477 gimple_call_set_lhs (pow_stmt, iter_result);
4478 gimple_set_location (pow_stmt, gimple_location (stmt));
4479 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4480
4481 if (dump_file && (dump_flags & TDF_DETAILS))
4482 {
4483 unsigned elt;
4484 repeat_factor_t rf;
4485 fputs ("Building __builtin_pow call for cached product (",
4486 dump_file);
4487 for (elt = j; elt < vec_len; elt++)
4488 {
4489 rf = &repeat_factor_vec[elt];
4490 print_generic_expr (dump_file, rf->factor, 0);
4491 if (elt < vec_len - 1)
4492 fputs (" * ", dump_file);
4493 }
4494 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4495 power);
4496 }
4497 }
4498 }
4499 else
4500 {
4501 /* Otherwise, find the first factor in the repeated factor
4502 vector whose occurrence count is at least 2. If no such
4503 factor exists, there are no builtin_powi opportunities
4504 remaining. */
4505 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4506 {
4507 if (rf1->count >= 2)
4508 break;
4509 }
4510
4511 if (j >= vec_len)
4512 break;
4513
4514 power = rf1->count;
4515
4516 if (dump_file && (dump_flags & TDF_DETAILS))
4517 {
4518 unsigned elt;
4519 repeat_factor_t rf;
4520 fputs ("Building __builtin_pow call for (", dump_file);
4521 for (elt = j; elt < vec_len; elt++)
4522 {
4523 rf = &repeat_factor_vec[elt];
4524 print_generic_expr (dump_file, rf->factor, 0);
4525 if (elt < vec_len - 1)
4526 fputs (" * ", dump_file);
4527 }
4528 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4529 }
4530
4531 reassociate_stats.pows_created++;
4532
4533 /* Visit each element of the vector in reverse order (so that
4534 high-occurrence elements are visited first, and within the
4535 same occurrence count, lower-ranked elements are visited
4536 first). Form a linear product of all elements in this order
4537 whose occurrencce count is at least that of element J.
4538 Record the SSA name representing the product of each element
4539 with all subsequent elements in the vector. */
4540 if (j == vec_len - 1)
4541 rf1->repr = rf1->factor;
4542 else
4543 {
4544 for (ii = vec_len - 2; ii >= (int)j; ii--)
4545 {
4546 tree op1, op2;
4547
4548 rf1 = &repeat_factor_vec[ii];
4549 rf2 = &repeat_factor_vec[ii + 1];
4550
4551 /* Init the last factor's representative to be itself. */
4552 if (!rf2->repr)
4553 rf2->repr = rf2->factor;
4554
4555 op1 = rf1->factor;
4556 op2 = rf2->repr;
4557
4558 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4559 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4560 target_ssa,
4561 op1, op2);
4562 gimple_set_location (mul_stmt, gimple_location (stmt));
4563 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4564 rf1->repr = target_ssa;
4565
4566 /* Don't reprocess the multiply we just introduced. */
4567 gimple_set_visited (mul_stmt, true);
4568 }
4569 }
4570
4571 /* Form a call to __builtin_powi for the maximum product
4572 just formed, raised to the power obtained earlier. */
4573 rf1 = &repeat_factor_vec[j];
4574 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4575 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4576 build_int_cst (integer_type_node,
4577 power));
4578 gimple_call_set_lhs (pow_stmt, iter_result);
4579 gimple_set_location (pow_stmt, gimple_location (stmt));
4580 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4581 }
4582
4583 /* If we previously formed at least one other builtin_powi call,
4584 form the product of this one and those others. */
4585 if (result)
4586 {
4587 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4588 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4589 result, iter_result);
4590 gimple_set_location (mul_stmt, gimple_location (stmt));
4591 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4592 gimple_set_visited (mul_stmt, true);
4593 result = new_result;
4594 }
4595 else
4596 result = iter_result;
4597
4598 /* Decrement the occurrence count of each element in the product
4599 by the count found above, and remove this many copies of each
4600 factor from OPS. */
4601 for (i = j; i < vec_len; i++)
4602 {
4603 unsigned k = power;
4604 unsigned n;
4605
4606 rf1 = &repeat_factor_vec[i];
4607 rf1->count -= power;
4608
4609 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4610 {
4611 if (oe->op == rf1->factor)
4612 {
4613 if (oe->count <= k)
4614 {
4615 ops->ordered_remove (n);
4616 k -= oe->count;
4617
4618 if (k == 0)
4619 break;
4620 }
4621 else
4622 {
4623 oe->count -= k;
4624 break;
4625 }
4626 }
4627 }
4628 }
4629 }
4630
4631 /* At this point all elements in the repeated factor vector have a
4632 remaining occurrence count of 0 or 1, and those with a count of 1
4633 don't have cached representatives. Re-sort the ops vector and
4634 clean up. */
4635 ops->qsort (sort_by_operand_rank);
4636 repeat_factor_vec.release ();
4637
4638 /* Return the final product computed herein. Note that there may
4639 still be some elements with single occurrence count left in OPS;
4640 those will be handled by the normal reassociation logic. */
4641 return result;
4642 }
4643
4644 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4645
4646 static void
4647 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4648 {
4649 tree rhs1;
4650
4651 if (dump_file && (dump_flags & TDF_DETAILS))
4652 {
4653 fprintf (dump_file, "Transforming ");
4654 print_gimple_stmt (dump_file, stmt, 0, 0);
4655 }
4656
4657 rhs1 = gimple_assign_rhs1 (stmt);
4658 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4659 update_stmt (stmt);
4660 remove_visited_stmt_chain (rhs1);
4661
4662 if (dump_file && (dump_flags & TDF_DETAILS))
4663 {
4664 fprintf (dump_file, " into ");
4665 print_gimple_stmt (dump_file, stmt, 0, 0);
4666 }
4667 }
4668
4669 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4670
4671 static void
4672 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4673 tree rhs1, tree rhs2)
4674 {
4675 if (dump_file && (dump_flags & TDF_DETAILS))
4676 {
4677 fprintf (dump_file, "Transforming ");
4678 print_gimple_stmt (dump_file, stmt, 0, 0);
4679 }
4680
4681 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4682 update_stmt (gsi_stmt (*gsi));
4683 remove_visited_stmt_chain (rhs1);
4684
4685 if (dump_file && (dump_flags & TDF_DETAILS))
4686 {
4687 fprintf (dump_file, " into ");
4688 print_gimple_stmt (dump_file, stmt, 0, 0);
4689 }
4690 }
4691
4692 /* Reassociate expressions in basic block BB and its post-dominator as
4693 children. */
4694
4695 static void
4696 reassociate_bb (basic_block bb)
4697 {
4698 gimple_stmt_iterator gsi;
4699 basic_block son;
4700 gimple stmt = last_stmt (bb);
4701
4702 if (stmt && !gimple_visited_p (stmt))
4703 maybe_optimize_range_tests (stmt);
4704
4705 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4706 {
4707 stmt = gsi_stmt (gsi);
4708
4709 if (is_gimple_assign (stmt)
4710 && !stmt_could_throw_p (stmt))
4711 {
4712 tree lhs, rhs1, rhs2;
4713 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4714
4715 /* If this is not a gimple binary expression, there is
4716 nothing for us to do with it. */
4717 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4718 continue;
4719
4720 /* If this was part of an already processed statement,
4721 we don't need to touch it again. */
4722 if (gimple_visited_p (stmt))
4723 {
4724 /* This statement might have become dead because of previous
4725 reassociations. */
4726 if (has_zero_uses (gimple_get_lhs (stmt)))
4727 {
4728 reassoc_remove_stmt (&gsi);
4729 release_defs (stmt);
4730 /* We might end up removing the last stmt above which
4731 places the iterator to the end of the sequence.
4732 Reset it to the last stmt in this case which might
4733 be the end of the sequence as well if we removed
4734 the last statement of the sequence. In which case
4735 we need to bail out. */
4736 if (gsi_end_p (gsi))
4737 {
4738 gsi = gsi_last_bb (bb);
4739 if (gsi_end_p (gsi))
4740 break;
4741 }
4742 }
4743 continue;
4744 }
4745
4746 lhs = gimple_assign_lhs (stmt);
4747 rhs1 = gimple_assign_rhs1 (stmt);
4748 rhs2 = gimple_assign_rhs2 (stmt);
4749
4750 /* For non-bit or min/max operations we can't associate
4751 all types. Verify that here. */
4752 if (rhs_code != BIT_IOR_EXPR
4753 && rhs_code != BIT_AND_EXPR
4754 && rhs_code != BIT_XOR_EXPR
4755 && rhs_code != MIN_EXPR
4756 && rhs_code != MAX_EXPR
4757 && (!can_reassociate_p (lhs)
4758 || !can_reassociate_p (rhs1)
4759 || !can_reassociate_p (rhs2)))
4760 continue;
4761
4762 if (associative_tree_code (rhs_code))
4763 {
4764 auto_vec<operand_entry_t> ops;
4765 tree powi_result = NULL_TREE;
4766
4767 /* There may be no immediate uses left by the time we
4768 get here because we may have eliminated them all. */
4769 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4770 continue;
4771
4772 gimple_set_visited (stmt, true);
4773 linearize_expr_tree (&ops, stmt, true, true);
4774 ops.qsort (sort_by_operand_rank);
4775 optimize_ops_list (rhs_code, &ops);
4776 if (undistribute_ops_list (rhs_code, &ops,
4777 loop_containing_stmt (stmt)))
4778 {
4779 ops.qsort (sort_by_operand_rank);
4780 optimize_ops_list (rhs_code, &ops);
4781 }
4782
4783 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4784 optimize_range_tests (rhs_code, &ops);
4785
4786 if (first_pass_instance
4787 && rhs_code == MULT_EXPR
4788 && flag_unsafe_math_optimizations)
4789 powi_result = attempt_builtin_powi (stmt, &ops);
4790
4791 /* If the operand vector is now empty, all operands were
4792 consumed by the __builtin_powi optimization. */
4793 if (ops.length () == 0)
4794 transform_stmt_to_copy (&gsi, stmt, powi_result);
4795 else if (ops.length () == 1)
4796 {
4797 tree last_op = ops.last ()->op;
4798
4799 if (powi_result)
4800 transform_stmt_to_multiply (&gsi, stmt, last_op,
4801 powi_result);
4802 else
4803 transform_stmt_to_copy (&gsi, stmt, last_op);
4804 }
4805 else
4806 {
4807 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4808 int ops_num = ops.length ();
4809 int width = get_reassociation_width (ops_num, rhs_code, mode);
4810 tree new_lhs = lhs;
4811
4812 if (dump_file && (dump_flags & TDF_DETAILS))
4813 fprintf (dump_file,
4814 "Width = %d was chosen for reassociation\n", width);
4815
4816 if (width > 1
4817 && ops.length () > 3)
4818 rewrite_expr_tree_parallel (stmt, width, ops);
4819 else
4820 {
4821 /* When there are three operands left, we want
4822 to make sure the ones that get the double
4823 binary op are chosen wisely. */
4824 int len = ops.length ();
4825 if (len >= 3)
4826 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4827
4828 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4829 powi_result != NULL);
4830 }
4831
4832 /* If we combined some repeated factors into a
4833 __builtin_powi call, multiply that result by the
4834 reassociated operands. */
4835 if (powi_result)
4836 {
4837 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4838 tree type = TREE_TYPE (lhs);
4839 tree target_ssa = make_temp_ssa_name (type, NULL,
4840 "reassocpow");
4841 gimple_set_lhs (lhs_stmt, target_ssa);
4842 update_stmt (lhs_stmt);
4843 if (lhs != new_lhs)
4844 target_ssa = new_lhs;
4845 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4846 powi_result,
4847 target_ssa);
4848 gimple_set_location (mul_stmt, gimple_location (stmt));
4849 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4850 }
4851 }
4852 }
4853 }
4854 }
4855 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4856 son;
4857 son = next_dom_son (CDI_POST_DOMINATORS, son))
4858 reassociate_bb (son);
4859 }
4860
4861 /* Add jumps around shifts for range tests turned into bit tests.
4862 For each SSA_NAME VAR we have code like:
4863 VAR = ...; // final stmt of range comparison
4864 // bit test here...;
4865 OTHERVAR = ...; // final stmt of the bit test sequence
4866 RES = VAR | OTHERVAR;
4867 Turn the above into:
4868 VAR = ...;
4869 if (VAR != 0)
4870 goto <l3>;
4871 else
4872 goto <l2>;
4873 <l2>:
4874 // bit test here...;
4875 OTHERVAR = ...;
4876 <l3>:
4877 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4878
4879 static void
4880 branch_fixup (void)
4881 {
4882 tree var;
4883 unsigned int i;
4884
4885 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4886 {
4887 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4888 gimple use_stmt;
4889 use_operand_p use;
4890 bool ok = single_imm_use (var, &use, &use_stmt);
4891 gcc_assert (ok
4892 && is_gimple_assign (use_stmt)
4893 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4894 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4895
4896 basic_block cond_bb = gimple_bb (def_stmt);
4897 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4898 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4899
4900 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4901 gimple g = gimple_build_cond (NE_EXPR, var,
4902 build_zero_cst (TREE_TYPE (var)),
4903 NULL_TREE, NULL_TREE);
4904 location_t loc = gimple_location (use_stmt);
4905 gimple_set_location (g, loc);
4906 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4907
4908 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4909 etrue->probability = REG_BR_PROB_BASE / 2;
4910 etrue->count = cond_bb->count / 2;
4911 edge efalse = find_edge (cond_bb, then_bb);
4912 efalse->flags = EDGE_FALSE_VALUE;
4913 efalse->probability -= etrue->probability;
4914 efalse->count -= etrue->count;
4915 then_bb->count -= etrue->count;
4916
4917 tree othervar = NULL_TREE;
4918 if (gimple_assign_rhs1 (use_stmt) == var)
4919 othervar = gimple_assign_rhs2 (use_stmt);
4920 else if (gimple_assign_rhs2 (use_stmt) == var)
4921 othervar = gimple_assign_rhs1 (use_stmt);
4922 else
4923 gcc_unreachable ();
4924 tree lhs = gimple_assign_lhs (use_stmt);
4925 gimple phi = create_phi_node (lhs, merge_bb);
4926 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4927 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4928 gsi = gsi_for_stmt (use_stmt);
4929 gsi_remove (&gsi, true);
4930
4931 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4932 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4933 }
4934 reassoc_branch_fixups.release ();
4935 }
4936
4937 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4938 void debug_ops_vector (vec<operand_entry_t> ops);
4939
4940 /* Dump the operand entry vector OPS to FILE. */
4941
4942 void
4943 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4944 {
4945 operand_entry_t oe;
4946 unsigned int i;
4947
4948 FOR_EACH_VEC_ELT (ops, i, oe)
4949 {
4950 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4951 print_generic_expr (file, oe->op, 0);
4952 }
4953 }
4954
4955 /* Dump the operand entry vector OPS to STDERR. */
4956
4957 DEBUG_FUNCTION void
4958 debug_ops_vector (vec<operand_entry_t> ops)
4959 {
4960 dump_ops_vector (stderr, ops);
4961 }
4962
4963 static void
4964 do_reassoc (void)
4965 {
4966 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4967 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4968 }
4969
4970 /* Initialize the reassociation pass. */
4971
4972 static void
4973 init_reassoc (void)
4974 {
4975 int i;
4976 long rank = 2;
4977 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4978
4979 /* Find the loops, so that we can prevent moving calculations in
4980 them. */
4981 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4982
4983 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4984
4985 operand_entry_pool = create_alloc_pool ("operand entry pool",
4986 sizeof (struct operand_entry), 30);
4987 next_operand_entry_id = 0;
4988
4989 /* Reverse RPO (Reverse Post Order) will give us something where
4990 deeper loops come later. */
4991 pre_and_rev_post_order_compute (NULL, bbs, false);
4992 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4993 operand_rank = new hash_map<tree, long>;
4994
4995 /* Give each default definition a distinct rank. This includes
4996 parameters and the static chain. Walk backwards over all
4997 SSA names so that we get proper rank ordering according
4998 to tree_swap_operands_p. */
4999 for (i = num_ssa_names - 1; i > 0; --i)
5000 {
5001 tree name = ssa_name (i);
5002 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5003 insert_operand_rank (name, ++rank);
5004 }
5005
5006 /* Set up rank for each BB */
5007 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5008 bb_rank[bbs[i]] = ++rank << 16;
5009
5010 free (bbs);
5011 calculate_dominance_info (CDI_POST_DOMINATORS);
5012 plus_negates = vNULL;
5013 }
5014
5015 /* Cleanup after the reassociation pass, and print stats if
5016 requested. */
5017
5018 static void
5019 fini_reassoc (void)
5020 {
5021 statistics_counter_event (cfun, "Linearized",
5022 reassociate_stats.linearized);
5023 statistics_counter_event (cfun, "Constants eliminated",
5024 reassociate_stats.constants_eliminated);
5025 statistics_counter_event (cfun, "Ops eliminated",
5026 reassociate_stats.ops_eliminated);
5027 statistics_counter_event (cfun, "Statements rewritten",
5028 reassociate_stats.rewritten);
5029 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5030 reassociate_stats.pows_encountered);
5031 statistics_counter_event (cfun, "Built-in powi calls created",
5032 reassociate_stats.pows_created);
5033
5034 delete operand_rank;
5035 free_alloc_pool (operand_entry_pool);
5036 free (bb_rank);
5037 plus_negates.release ();
5038 free_dominance_info (CDI_POST_DOMINATORS);
5039 loop_optimizer_finalize ();
5040 }
5041
5042 /* Gate and execute functions for Reassociation. */
5043
5044 static unsigned int
5045 execute_reassoc (void)
5046 {
5047 init_reassoc ();
5048
5049 do_reassoc ();
5050 repropagate_negates ();
5051 branch_fixup ();
5052
5053 fini_reassoc ();
5054 return 0;
5055 }
5056
5057 namespace {
5058
5059 const pass_data pass_data_reassoc =
5060 {
5061 GIMPLE_PASS, /* type */
5062 "reassoc", /* name */
5063 OPTGROUP_NONE, /* optinfo_flags */
5064 TV_TREE_REASSOC, /* tv_id */
5065 ( PROP_cfg | PROP_ssa ), /* properties_required */
5066 0, /* properties_provided */
5067 0, /* properties_destroyed */
5068 0, /* todo_flags_start */
5069 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5070 };
5071
5072 class pass_reassoc : public gimple_opt_pass
5073 {
5074 public:
5075 pass_reassoc (gcc::context *ctxt)
5076 : gimple_opt_pass (pass_data_reassoc, ctxt)
5077 {}
5078
5079 /* opt_pass methods: */
5080 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5081 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5082 virtual unsigned int execute (function *) { return execute_reassoc (); }
5083
5084 }; // class pass_reassoc
5085
5086 } // anon namespace
5087
5088 gimple_opt_pass *
5089 make_pass_reassoc (gcc::context *ctxt)
5090 {
5091 return new pass_reassoc (ctxt);
5092 }