1 /* Full and partial redundancy elimination and code hoisting on SSA GIMPLE.
2 Copyright (C) 2001-2020 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org> and Steven Bosscher
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
34 #include "gimple-pretty-print.h"
35 #include "fold-const.h"
37 #include "gimple-fold.h"
40 #include "gimple-iterator.h"
42 #include "tree-into-ssa.h"
46 #include "tree-ssa-sccvn.h"
47 #include "tree-scalar-evolution.h"
50 #include "tree-ssa-propagate.h"
51 #include "tree-ssa-dce.h"
52 #include "tree-cfgcleanup.h"
55 /* Even though this file is called tree-ssa-pre.c, we actually
56 implement a bit more than just PRE here. All of them piggy-back
57 on GVN which is implemented in tree-ssa-sccvn.c.
59 1. Full Redundancy Elimination (FRE)
60 This is the elimination phase of GVN.
62 2. Partial Redundancy Elimination (PRE)
63 This is adds computation of AVAIL_OUT and ANTIC_IN and
64 doing expression insertion to form GVN-PRE.
67 This optimization uses the ANTIC_IN sets computed for PRE
68 to move expressions further up than PRE would do, to make
69 multiple computations of the same value fully redundant.
70 This pass is explained below (after the explanation of the
71 basic algorithm for PRE).
76 1. Avail sets can be shared by making an avail_find_leader that
77 walks up the dominator tree and looks in those avail sets.
78 This might affect code optimality, it's unclear right now.
79 Currently the AVAIL_OUT sets are the remaining quadraticness in
81 2. Strength reduction can be performed by anticipating expressions
82 we can repair later on.
83 3. We can do back-substitution or smarter value numbering to catch
84 commutative expressions split up over multiple statements.
87 /* For ease of terminology, "expression node" in the below refers to
88 every expression node but GIMPLE_ASSIGN, because GIMPLE_ASSIGNs
89 represent the actual statement containing the expressions we care about,
90 and we cache the value number by putting it in the expression. */
92 /* Basic algorithm for Partial Redundancy Elimination:
94 First we walk the statements to generate the AVAIL sets, the
95 EXP_GEN sets, and the tmp_gen sets. EXP_GEN sets represent the
96 generation of values/expressions by a given block. We use them
97 when computing the ANTIC sets. The AVAIL sets consist of
98 SSA_NAME's that represent values, so we know what values are
99 available in what blocks. AVAIL is a forward dataflow problem. In
100 SSA, values are never killed, so we don't need a kill set, or a
101 fixpoint iteration, in order to calculate the AVAIL sets. In
102 traditional parlance, AVAIL sets tell us the downsafety of the
105 Next, we generate the ANTIC sets. These sets represent the
106 anticipatable expressions. ANTIC is a backwards dataflow
107 problem. An expression is anticipatable in a given block if it could
108 be generated in that block. This means that if we had to perform
109 an insertion in that block, of the value of that expression, we
110 could. Calculating the ANTIC sets requires phi translation of
111 expressions, because the flow goes backwards through phis. We must
112 iterate to a fixpoint of the ANTIC sets, because we have a kill
113 set. Even in SSA form, values are not live over the entire
114 function, only from their definition point onwards. So we have to
115 remove values from the ANTIC set once we go past the definition
116 point of the leaders that make them up.
117 compute_antic/compute_antic_aux performs this computation.
119 Third, we perform insertions to make partially redundant
120 expressions fully redundant.
122 An expression is partially redundant (excluding partial
125 1. It is AVAIL in some, but not all, of the predecessors of a
127 2. It is ANTIC in all the predecessors.
129 In order to make it fully redundant, we insert the expression into
130 the predecessors where it is not available, but is ANTIC.
132 When optimizing for size, we only eliminate the partial redundancy
133 if we need to insert in only one predecessor. This avoids almost
134 completely the code size increase that PRE usually causes.
136 For the partial anticipation case, we only perform insertion if it
137 is partially anticipated in some block, and fully available in all
140 do_pre_regular_insertion/do_pre_partial_partial_insertion
141 performs these steps, driven by insert/insert_aux.
143 Fourth, we eliminate fully redundant expressions.
144 This is a simple statement walk that replaces redundant
145 calculations with the now available values. */
147 /* Basic algorithm for Code Hoisting:
149 Code hoisting is: Moving value computations up in the control flow
150 graph to make multiple copies redundant. Typically this is a size
151 optimization, but there are cases where it also is helpful for speed.
153 A simple code hoisting algorithm is implemented that piggy-backs on
154 the PRE infrastructure. For code hoisting, we have to know ANTIC_OUT
155 which is effectively ANTIC_IN - AVAIL_OUT. The latter two have to be
156 computed for PRE, and we can use them to perform a limited version of
159 For the purpose of this implementation, a value is hoistable to a basic
160 block B if the following properties are met:
162 1. The value is in ANTIC_IN(B) -- the value will be computed on all
163 paths from B to function exit and it can be computed in B);
165 2. The value is not in AVAIL_OUT(B) -- there would be no need to
166 compute the value again and make it available twice;
168 3. All successors of B are dominated by B -- makes sure that inserting
169 a computation of the value in B will make the remaining
170 computations fully redundant;
172 4. At least one successor has the value in AVAIL_OUT -- to avoid
173 hoisting values up too far;
175 5. There are at least two successors of B -- hoisting in straight
176 line code is pointless.
178 The third condition is not strictly necessary, but it would complicate
179 the hoisting pass a lot. In fact, I don't know of any code hoisting
180 algorithm that does not have this requirement. Fortunately, experiments
181 have show that most candidate hoistable values are in regions that meet
182 this condition (e.g. diamond-shape regions).
184 The forth condition is necessary to avoid hoisting things up too far
185 away from the uses of the value. Nothing else limits the algorithm
186 from hoisting everything up as far as ANTIC_IN allows. Experiments
187 with SPEC and CSiBE have shown that hoisting up too far results in more
188 spilling, less benefits for code size, and worse benchmark scores.
189 Fortunately, in practice most of the interesting hoisting opportunities
190 are caught despite this limitation.
192 For hoistable values that meet all conditions, expressions are inserted
193 to make the calculation of the hoistable value fully redundant. We
194 perform code hoisting insertions after each round of PRE insertions,
195 because code hoisting never exposes new PRE opportunities, but PRE can
196 create new code hoisting opportunities.
198 The code hoisting algorithm is implemented in do_hoist_insert, driven
199 by insert/insert_aux. */
201 /* Representations of value numbers:
203 Value numbers are represented by a representative SSA_NAME. We
204 will create fake SSA_NAME's in situations where we need a
205 representative but do not have one (because it is a complex
206 expression). In order to facilitate storing the value numbers in
207 bitmaps, and keep the number of wasted SSA_NAME's down, we also
208 associate a value_id with each value number, and create full blown
209 ssa_name's only where we actually need them (IE in operands of
210 existing expressions).
212 Theoretically you could replace all the value_id's with
213 SSA_NAME_VERSION, but this would allocate a large number of
214 SSA_NAME's (which are each > 30 bytes) just to get a 4 byte number.
215 It would also require an additional indirection at each point we
218 /* Representation of expressions on value numbers:
220 Expressions consisting of value numbers are represented the same
221 way as our VN internally represents them, with an additional
222 "pre_expr" wrapping around them in order to facilitate storing all
223 of the expressions in the same sets. */
225 /* Representation of sets:
227 The dataflow sets do not need to be sorted in any particular order
228 for the majority of their lifetime, are simply represented as two
229 bitmaps, one that keeps track of values present in the set, and one
230 that keeps track of expressions present in the set.
232 When we need them in topological order, we produce it on demand by
233 transforming the bitmap into an array and sorting it into topo
236 /* Type of expression, used to know which member of the PRE_EXPR union
252 vn_reference_t reference
;
255 typedef struct pre_expr_d
: nofree_ptr_hash
<pre_expr_d
>
257 enum pre_expr_kind kind
;
263 /* hash_table support. */
264 static inline hashval_t
hash (const pre_expr_d
*);
265 static inline int equal (const pre_expr_d
*, const pre_expr_d
*);
268 #define PRE_EXPR_NAME(e) (e)->u.name
269 #define PRE_EXPR_NARY(e) (e)->u.nary
270 #define PRE_EXPR_REFERENCE(e) (e)->u.reference
271 #define PRE_EXPR_CONSTANT(e) (e)->u.constant
273 /* Compare E1 and E1 for equality. */
276 pre_expr_d::equal (const pre_expr_d
*e1
, const pre_expr_d
*e2
)
278 if (e1
->kind
!= e2
->kind
)
284 return vn_constant_eq_with_type (PRE_EXPR_CONSTANT (e1
),
285 PRE_EXPR_CONSTANT (e2
));
287 return PRE_EXPR_NAME (e1
) == PRE_EXPR_NAME (e2
);
289 return vn_nary_op_eq (PRE_EXPR_NARY (e1
), PRE_EXPR_NARY (e2
));
291 return vn_reference_eq (PRE_EXPR_REFERENCE (e1
),
292 PRE_EXPR_REFERENCE (e2
));
301 pre_expr_d::hash (const pre_expr_d
*e
)
306 return vn_hash_constant_with_type (PRE_EXPR_CONSTANT (e
));
308 return SSA_NAME_VERSION (PRE_EXPR_NAME (e
));
310 return PRE_EXPR_NARY (e
)->hashcode
;
312 return PRE_EXPR_REFERENCE (e
)->hashcode
;
318 /* Next global expression id number. */
319 static unsigned int next_expression_id
;
321 /* Mapping from expression to id number we can use in bitmap sets. */
322 static vec
<pre_expr
> expressions
;
323 static hash_table
<pre_expr_d
> *expression_to_id
;
324 static vec
<unsigned> name_to_id
;
326 /* Allocate an expression id for EXPR. */
328 static inline unsigned int
329 alloc_expression_id (pre_expr expr
)
331 struct pre_expr_d
**slot
;
332 /* Make sure we won't overflow. */
333 gcc_assert (next_expression_id
+ 1 > next_expression_id
);
334 expr
->id
= next_expression_id
++;
335 expressions
.safe_push (expr
);
336 if (expr
->kind
== NAME
)
338 unsigned version
= SSA_NAME_VERSION (PRE_EXPR_NAME (expr
));
339 /* vec::safe_grow_cleared allocates no headroom. Avoid frequent
340 re-allocations by using vec::reserve upfront. */
341 unsigned old_len
= name_to_id
.length ();
342 name_to_id
.reserve (num_ssa_names
- old_len
);
343 name_to_id
.quick_grow_cleared (num_ssa_names
);
344 gcc_assert (name_to_id
[version
] == 0);
345 name_to_id
[version
] = expr
->id
;
349 slot
= expression_to_id
->find_slot (expr
, INSERT
);
353 return next_expression_id
- 1;
356 /* Return the expression id for tree EXPR. */
358 static inline unsigned int
359 get_expression_id (const pre_expr expr
)
364 static inline unsigned int
365 lookup_expression_id (const pre_expr expr
)
367 struct pre_expr_d
**slot
;
369 if (expr
->kind
== NAME
)
371 unsigned version
= SSA_NAME_VERSION (PRE_EXPR_NAME (expr
));
372 if (name_to_id
.length () <= version
)
374 return name_to_id
[version
];
378 slot
= expression_to_id
->find_slot (expr
, NO_INSERT
);
381 return ((pre_expr
)*slot
)->id
;
385 /* Return the existing expression id for EXPR, or create one if one
386 does not exist yet. */
388 static inline unsigned int
389 get_or_alloc_expression_id (pre_expr expr
)
391 unsigned int id
= lookup_expression_id (expr
);
393 return alloc_expression_id (expr
);
394 return expr
->id
= id
;
397 /* Return the expression that has expression id ID */
399 static inline pre_expr
400 expression_for_id (unsigned int id
)
402 return expressions
[id
];
405 static object_allocator
<pre_expr_d
> pre_expr_pool ("pre_expr nodes");
407 /* Given an SSA_NAME NAME, get or create a pre_expr to represent it. */
410 get_or_alloc_expr_for_name (tree name
)
412 struct pre_expr_d expr
;
414 unsigned int result_id
;
418 PRE_EXPR_NAME (&expr
) = name
;
419 result_id
= lookup_expression_id (&expr
);
421 return expression_for_id (result_id
);
423 result
= pre_expr_pool
.allocate ();
425 result
->loc
= UNKNOWN_LOCATION
;
426 result
->value_id
= VN_INFO (name
)->value_id
;
427 PRE_EXPR_NAME (result
) = name
;
428 alloc_expression_id (result
);
432 /* Given an NARY, get or create a pre_expr to represent it. */
435 get_or_alloc_expr_for_nary (vn_nary_op_t nary
,
436 location_t loc
= UNKNOWN_LOCATION
)
438 struct pre_expr_d expr
;
440 unsigned int result_id
;
444 PRE_EXPR_NARY (&expr
) = nary
;
445 result_id
= lookup_expression_id (&expr
);
447 return expression_for_id (result_id
);
449 result
= pre_expr_pool
.allocate ();
452 result
->value_id
= nary
->value_id
;
453 PRE_EXPR_NARY (result
) = nary
;
454 alloc_expression_id (result
);
458 /* Given an REFERENCE, get or create a pre_expr to represent it. */
461 get_or_alloc_expr_for_reference (vn_reference_t reference
,
462 location_t loc
= UNKNOWN_LOCATION
)
464 struct pre_expr_d expr
;
466 unsigned int result_id
;
468 expr
.kind
= REFERENCE
;
470 PRE_EXPR_REFERENCE (&expr
) = reference
;
471 result_id
= lookup_expression_id (&expr
);
473 return expression_for_id (result_id
);
475 result
= pre_expr_pool
.allocate ();
476 result
->kind
= REFERENCE
;
478 result
->value_id
= reference
->value_id
;
479 PRE_EXPR_REFERENCE (result
) = reference
;
480 alloc_expression_id (result
);
485 /* An unordered bitmap set. One bitmap tracks values, the other,
487 typedef class bitmap_set
490 bitmap_head expressions
;
494 #define FOR_EACH_EXPR_ID_IN_SET(set, id, bi) \
495 EXECUTE_IF_SET_IN_BITMAP (&(set)->expressions, 0, (id), (bi))
497 #define FOR_EACH_VALUE_ID_IN_SET(set, id, bi) \
498 EXECUTE_IF_SET_IN_BITMAP (&(set)->values, 0, (id), (bi))
500 /* Mapping from value id to expressions with that value_id. */
501 static vec
<bitmap
> value_expressions
;
502 /* We just record a single expression for each constant value,
503 one of kind CONSTANT. */
504 static vec
<pre_expr
> constant_value_expressions
;
507 /* This structure is used to keep track of statistics on what
508 optimization PRE was able to perform. */
511 /* The number of new expressions/temporaries generated by PRE. */
514 /* The number of inserts found due to partial anticipation */
517 /* The number of inserts made for code hoisting. */
520 /* The number of new PHI nodes added by PRE. */
524 static bool do_partial_partial
;
525 static pre_expr
bitmap_find_leader (bitmap_set_t
, unsigned int);
526 static void bitmap_value_insert_into_set (bitmap_set_t
, pre_expr
);
527 static void bitmap_value_replace_in_set (bitmap_set_t
, pre_expr
);
528 static void bitmap_set_copy (bitmap_set_t
, bitmap_set_t
);
529 static bool bitmap_set_contains_value (bitmap_set_t
, unsigned int);
530 static void bitmap_insert_into_set (bitmap_set_t
, pre_expr
);
531 static bitmap_set_t
bitmap_set_new (void);
532 static tree
create_expression_by_pieces (basic_block
, pre_expr
, gimple_seq
*,
534 static tree
find_or_generate_expression (basic_block
, tree
, gimple_seq
*);
535 static unsigned int get_expr_value_id (pre_expr
);
537 /* We can add and remove elements and entries to and from sets
538 and hash tables, so we use alloc pools for them. */
540 static object_allocator
<bitmap_set
> bitmap_set_pool ("Bitmap sets");
541 static bitmap_obstack grand_bitmap_obstack
;
543 /* A three tuple {e, pred, v} used to cache phi translations in the
544 phi_translate_table. */
546 typedef struct expr_pred_trans_d
: public typed_noop_remove
<expr_pred_trans_d
>
548 typedef expr_pred_trans_d value_type
;
549 typedef expr_pred_trans_d compare_type
;
551 /* The expression ID. */
554 /* The value expression ID that resulted from the translation. */
557 /* hash_table support. */
558 static inline void mark_empty (expr_pred_trans_d
&);
559 static inline bool is_empty (const expr_pred_trans_d
&);
560 static inline void mark_deleted (expr_pred_trans_d
&);
561 static inline bool is_deleted (const expr_pred_trans_d
&);
562 static const bool empty_zero_p
= true;
563 static inline hashval_t
hash (const expr_pred_trans_d
&);
564 static inline int equal (const expr_pred_trans_d
&, const expr_pred_trans_d
&);
565 } *expr_pred_trans_t
;
566 typedef const struct expr_pred_trans_d
*const_expr_pred_trans_t
;
569 expr_pred_trans_d::is_empty (const expr_pred_trans_d
&e
)
575 expr_pred_trans_d::is_deleted (const expr_pred_trans_d
&e
)
581 expr_pred_trans_d::mark_empty (expr_pred_trans_d
&e
)
587 expr_pred_trans_d::mark_deleted (expr_pred_trans_d
&e
)
593 expr_pred_trans_d::hash (const expr_pred_trans_d
&e
)
599 expr_pred_trans_d::equal (const expr_pred_trans_d
&ve1
,
600 const expr_pred_trans_d
&ve2
)
602 return ve1
.e
== ve2
.e
;
605 /* Sets that we need to keep track of. */
606 typedef struct bb_bitmap_sets
608 /* The EXP_GEN set, which represents expressions/values generated in
610 bitmap_set_t exp_gen
;
612 /* The PHI_GEN set, which represents PHI results generated in a
614 bitmap_set_t phi_gen
;
616 /* The TMP_GEN set, which represents results/temporaries generated
617 in a basic block. IE the LHS of an expression. */
618 bitmap_set_t tmp_gen
;
620 /* The AVAIL_OUT set, which represents which values are available in
621 a given basic block. */
622 bitmap_set_t avail_out
;
624 /* The ANTIC_IN set, which represents which values are anticipatable
625 in a given basic block. */
626 bitmap_set_t antic_in
;
628 /* The PA_IN set, which represents which values are
629 partially anticipatable in a given basic block. */
632 /* The NEW_SETS set, which is used during insertion to augment the
633 AVAIL_OUT set of blocks with the new insertions performed during
634 the current iteration. */
635 bitmap_set_t new_sets
;
637 /* A cache for value_dies_in_block_x. */
640 /* The live virtual operand on successor edges. */
643 /* PHI translate cache for the single successor edge. */
644 hash_table
<expr_pred_trans_d
> *phi_translate_table
;
646 /* True if we have visited this block during ANTIC calculation. */
647 unsigned int visited
: 1;
649 /* True when the block contains a call that might not return. */
650 unsigned int contains_may_not_return_call
: 1;
653 #define EXP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->exp_gen
654 #define PHI_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->phi_gen
655 #define TMP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->tmp_gen
656 #define AVAIL_OUT(BB) ((bb_value_sets_t) ((BB)->aux))->avail_out
657 #define ANTIC_IN(BB) ((bb_value_sets_t) ((BB)->aux))->antic_in
658 #define PA_IN(BB) ((bb_value_sets_t) ((BB)->aux))->pa_in
659 #define NEW_SETS(BB) ((bb_value_sets_t) ((BB)->aux))->new_sets
660 #define EXPR_DIES(BB) ((bb_value_sets_t) ((BB)->aux))->expr_dies
661 #define PHI_TRANS_TABLE(BB) ((bb_value_sets_t) ((BB)->aux))->phi_translate_table
662 #define BB_VISITED(BB) ((bb_value_sets_t) ((BB)->aux))->visited
663 #define BB_MAY_NOTRETURN(BB) ((bb_value_sets_t) ((BB)->aux))->contains_may_not_return_call
664 #define BB_LIVE_VOP_ON_EXIT(BB) ((bb_value_sets_t) ((BB)->aux))->vop_on_exit
667 /* Add the tuple mapping from {expression E, basic block PRED} to
668 the phi translation table and return whether it pre-existed. */
671 phi_trans_add (expr_pred_trans_t
*entry
, pre_expr e
, basic_block pred
)
673 if (!PHI_TRANS_TABLE (pred
))
674 PHI_TRANS_TABLE (pred
) = new hash_table
<expr_pred_trans_d
> (11);
676 expr_pred_trans_t slot
;
677 expr_pred_trans_d tem
;
678 unsigned id
= get_expression_id (e
);
680 slot
= PHI_TRANS_TABLE (pred
)->find_slot_with_hash (tem
, id
, INSERT
);
693 /* Add expression E to the expression set of value id V. */
696 add_to_value (unsigned int v
, pre_expr e
)
698 gcc_checking_assert (get_expr_value_id (e
) == v
);
700 if (value_id_constant_p (v
))
702 if (e
->kind
!= CONSTANT
)
705 if (-v
>= constant_value_expressions
.length ())
706 constant_value_expressions
.safe_grow_cleared (-v
+ 1);
708 pre_expr leader
= constant_value_expressions
[-v
];
710 constant_value_expressions
[-v
] = e
;
714 if (v
>= value_expressions
.length ())
715 value_expressions
.safe_grow_cleared (v
+ 1);
717 bitmap set
= value_expressions
[v
];
720 set
= BITMAP_ALLOC (&grand_bitmap_obstack
);
721 value_expressions
[v
] = set
;
723 bitmap_set_bit (set
, get_or_alloc_expression_id (e
));
727 /* Create a new bitmap set and return it. */
730 bitmap_set_new (void)
732 bitmap_set_t ret
= bitmap_set_pool
.allocate ();
733 bitmap_initialize (&ret
->expressions
, &grand_bitmap_obstack
);
734 bitmap_initialize (&ret
->values
, &grand_bitmap_obstack
);
738 /* Return the value id for a PRE expression EXPR. */
741 get_expr_value_id (pre_expr expr
)
743 /* ??? We cannot assert that expr has a value-id (it can be 0), because
744 we assign value-ids only to expressions that have a result
745 in set_hashtable_value_ids. */
746 return expr
->value_id
;
749 /* Return a VN valnum (SSA name or constant) for the PRE value-id VAL. */
752 vn_valnum_from_value_id (unsigned int val
)
754 if (value_id_constant_p (val
))
756 pre_expr vexpr
= constant_value_expressions
[-val
];
758 return PRE_EXPR_CONSTANT (vexpr
);
762 bitmap exprset
= value_expressions
[val
];
765 EXECUTE_IF_SET_IN_BITMAP (exprset
, 0, i
, bi
)
767 pre_expr vexpr
= expression_for_id (i
);
768 if (vexpr
->kind
== NAME
)
769 return VN_INFO (PRE_EXPR_NAME (vexpr
))->valnum
;
774 /* Insert an expression EXPR into a bitmapped set. */
777 bitmap_insert_into_set (bitmap_set_t set
, pre_expr expr
)
779 unsigned int val
= get_expr_value_id (expr
);
780 if (! value_id_constant_p (val
))
782 /* Note this is the only function causing multiple expressions
783 for the same value to appear in a set. This is needed for
784 TMP_GEN, PHI_GEN and NEW_SETs. */
785 bitmap_set_bit (&set
->values
, val
);
786 bitmap_set_bit (&set
->expressions
, get_or_alloc_expression_id (expr
));
790 /* Copy a bitmapped set ORIG, into bitmapped set DEST. */
793 bitmap_set_copy (bitmap_set_t dest
, bitmap_set_t orig
)
795 bitmap_copy (&dest
->expressions
, &orig
->expressions
);
796 bitmap_copy (&dest
->values
, &orig
->values
);
800 /* Free memory used up by SET. */
802 bitmap_set_free (bitmap_set_t set
)
804 bitmap_clear (&set
->expressions
);
805 bitmap_clear (&set
->values
);
809 /* DFS walk EXPR to its operands with leaders in SET, collecting
810 expressions in SET in postorder into POST. */
813 pre_expr_DFS (pre_expr expr
, bitmap_set_t set
, bitmap visited
,
814 hash_set
<int_hash
<unsigned int, 0> > &leader_set
,
817 if (!bitmap_set_bit (visited
, get_expression_id (expr
)))
824 vn_nary_op_t nary
= PRE_EXPR_NARY (expr
);
825 for (unsigned i
= 0; i
< nary
->length
; i
++)
827 if (TREE_CODE (nary
->op
[i
]) != SSA_NAME
)
829 unsigned int op_val_id
= VN_INFO (nary
->op
[i
])->value_id
;
830 /* If we already found a leader for the value we've
831 recursed already. Avoid the costly bitmap_find_leader. */
832 if (!leader_set
.add (op_val_id
))
834 pre_expr leader
= bitmap_find_leader (set
, op_val_id
);
836 pre_expr_DFS (leader
, set
, visited
, leader_set
, post
);
843 vn_reference_t ref
= PRE_EXPR_REFERENCE (expr
);
844 vec
<vn_reference_op_s
> operands
= ref
->operands
;
845 vn_reference_op_t operand
;
846 for (unsigned i
= 0; operands
.iterate (i
, &operand
); i
++)
849 op
[0] = operand
->op0
;
850 op
[1] = operand
->op1
;
851 op
[2] = operand
->op2
;
852 for (unsigned n
= 0; n
< 3; ++n
)
854 if (!op
[n
] || TREE_CODE (op
[n
]) != SSA_NAME
)
856 unsigned op_val_id
= VN_INFO (op
[n
])->value_id
;
857 if (!leader_set
.add (op_val_id
))
859 pre_expr leader
= bitmap_find_leader (set
, op_val_id
);
861 pre_expr_DFS (leader
, set
, visited
, leader_set
, post
);
869 post
.quick_push (expr
);
872 /* Generate an topological-ordered array of bitmap set SET. */
875 sorted_array_from_bitmap_set (bitmap_set_t set
)
879 vec
<pre_expr
> result
;
881 /* Pre-allocate enough space for the array. */
882 size_t len
= bitmap_count_bits (&set
->expressions
);
884 hash_set
<int_hash
<unsigned int, 0> > leader_set (2*len
);
886 auto_bitmap
visited (&grand_bitmap_obstack
);
887 bitmap_tree_view (visited
);
888 FOR_EACH_EXPR_ID_IN_SET (set
, i
, bi
)
890 pre_expr expr
= expression_for_id (i
);
891 /* Hoist insertion calls us with a value-set we have to and with,
893 if (bitmap_set_contains_value (set
, get_expr_value_id (expr
)))
894 pre_expr_DFS (expr
, set
, visited
, leader_set
, result
);
900 /* Subtract all expressions contained in ORIG from DEST. */
903 bitmap_set_subtract_expressions (bitmap_set_t dest
, bitmap_set_t orig
)
905 bitmap_set_t result
= bitmap_set_new ();
909 bitmap_and_compl (&result
->expressions
, &dest
->expressions
,
912 FOR_EACH_EXPR_ID_IN_SET (result
, i
, bi
)
914 pre_expr expr
= expression_for_id (i
);
915 unsigned int value_id
= get_expr_value_id (expr
);
916 bitmap_set_bit (&result
->values
, value_id
);
922 /* Subtract all values in bitmap set B from bitmap set A. */
925 bitmap_set_subtract_values (bitmap_set_t a
, bitmap_set_t b
)
929 unsigned to_remove
= -1U;
930 bitmap_and_compl_into (&a
->values
, &b
->values
);
931 FOR_EACH_EXPR_ID_IN_SET (a
, i
, bi
)
933 if (to_remove
!= -1U)
935 bitmap_clear_bit (&a
->expressions
, to_remove
);
938 pre_expr expr
= expression_for_id (i
);
939 if (! bitmap_bit_p (&a
->values
, get_expr_value_id (expr
)))
942 if (to_remove
!= -1U)
943 bitmap_clear_bit (&a
->expressions
, to_remove
);
947 /* Return true if bitmapped set SET contains the value VALUE_ID. */
950 bitmap_set_contains_value (bitmap_set_t set
, unsigned int value_id
)
952 if (value_id_constant_p (value_id
))
955 return bitmap_bit_p (&set
->values
, value_id
);
958 /* Return true if two bitmap sets are equal. */
961 bitmap_set_equal (bitmap_set_t a
, bitmap_set_t b
)
963 return bitmap_equal_p (&a
->values
, &b
->values
);
966 /* Replace an instance of EXPR's VALUE with EXPR in SET if it exists,
967 and add it otherwise. */
970 bitmap_value_replace_in_set (bitmap_set_t set
, pre_expr expr
)
972 unsigned int val
= get_expr_value_id (expr
);
973 if (value_id_constant_p (val
))
976 if (bitmap_set_contains_value (set
, val
))
978 /* The number of expressions having a given value is usually
979 significantly less than the total number of expressions in SET.
980 Thus, rather than check, for each expression in SET, whether it
981 has the value LOOKFOR, we walk the reverse mapping that tells us
982 what expressions have a given value, and see if any of those
983 expressions are in our set. For large testcases, this is about
984 5-10x faster than walking the bitmap. If this is somehow a
985 significant lose for some cases, we can choose which set to walk
986 based on the set size. */
989 bitmap exprset
= value_expressions
[val
];
990 EXECUTE_IF_SET_IN_BITMAP (exprset
, 0, i
, bi
)
992 if (bitmap_clear_bit (&set
->expressions
, i
))
994 bitmap_set_bit (&set
->expressions
, get_expression_id (expr
));
1001 bitmap_insert_into_set (set
, expr
);
1004 /* Insert EXPR into SET if EXPR's value is not already present in
1008 bitmap_value_insert_into_set (bitmap_set_t set
, pre_expr expr
)
1010 unsigned int val
= get_expr_value_id (expr
);
1012 gcc_checking_assert (expr
->id
== get_or_alloc_expression_id (expr
));
1014 /* Constant values are always considered to be part of the set. */
1015 if (value_id_constant_p (val
))
1018 /* If the value membership changed, add the expression. */
1019 if (bitmap_set_bit (&set
->values
, val
))
1020 bitmap_set_bit (&set
->expressions
, expr
->id
);
1023 /* Print out EXPR to outfile. */
1026 print_pre_expr (FILE *outfile
, const pre_expr expr
)
1030 fprintf (outfile
, "NULL");
1036 print_generic_expr (outfile
, PRE_EXPR_CONSTANT (expr
));
1039 print_generic_expr (outfile
, PRE_EXPR_NAME (expr
));
1044 vn_nary_op_t nary
= PRE_EXPR_NARY (expr
);
1045 fprintf (outfile
, "{%s,", get_tree_code_name (nary
->opcode
));
1046 for (i
= 0; i
< nary
->length
; i
++)
1048 print_generic_expr (outfile
, nary
->op
[i
]);
1049 if (i
!= (unsigned) nary
->length
- 1)
1050 fprintf (outfile
, ",");
1052 fprintf (outfile
, "}");
1058 vn_reference_op_t vro
;
1060 vn_reference_t ref
= PRE_EXPR_REFERENCE (expr
);
1061 fprintf (outfile
, "{");
1063 ref
->operands
.iterate (i
, &vro
);
1066 bool closebrace
= false;
1067 if (vro
->opcode
!= SSA_NAME
1068 && TREE_CODE_CLASS (vro
->opcode
) != tcc_declaration
)
1070 fprintf (outfile
, "%s", get_tree_code_name (vro
->opcode
));
1073 fprintf (outfile
, "<");
1079 print_generic_expr (outfile
, vro
->op0
);
1082 fprintf (outfile
, ",");
1083 print_generic_expr (outfile
, vro
->op1
);
1087 fprintf (outfile
, ",");
1088 print_generic_expr (outfile
, vro
->op2
);
1092 fprintf (outfile
, ">");
1093 if (i
!= ref
->operands
.length () - 1)
1094 fprintf (outfile
, ",");
1096 fprintf (outfile
, "}");
1099 fprintf (outfile
, "@");
1100 print_generic_expr (outfile
, ref
->vuse
);
1106 void debug_pre_expr (pre_expr
);
1108 /* Like print_pre_expr but always prints to stderr. */
1110 debug_pre_expr (pre_expr e
)
1112 print_pre_expr (stderr
, e
);
1113 fprintf (stderr
, "\n");
1116 /* Print out SET to OUTFILE. */
1119 print_bitmap_set (FILE *outfile
, bitmap_set_t set
,
1120 const char *setname
, int blockindex
)
1122 fprintf (outfile
, "%s[%d] := { ", setname
, blockindex
);
1129 FOR_EACH_EXPR_ID_IN_SET (set
, i
, bi
)
1131 const pre_expr expr
= expression_for_id (i
);
1134 fprintf (outfile
, ", ");
1136 print_pre_expr (outfile
, expr
);
1138 fprintf (outfile
, " (%04d)", get_expr_value_id (expr
));
1141 fprintf (outfile
, " }\n");
1144 void debug_bitmap_set (bitmap_set_t
);
1147 debug_bitmap_set (bitmap_set_t set
)
1149 print_bitmap_set (stderr
, set
, "debug", 0);
1152 void debug_bitmap_sets_for (basic_block
);
1155 debug_bitmap_sets_for (basic_block bb
)
1157 print_bitmap_set (stderr
, AVAIL_OUT (bb
), "avail_out", bb
->index
);
1158 print_bitmap_set (stderr
, EXP_GEN (bb
), "exp_gen", bb
->index
);
1159 print_bitmap_set (stderr
, PHI_GEN (bb
), "phi_gen", bb
->index
);
1160 print_bitmap_set (stderr
, TMP_GEN (bb
), "tmp_gen", bb
->index
);
1161 print_bitmap_set (stderr
, ANTIC_IN (bb
), "antic_in", bb
->index
);
1162 if (do_partial_partial
)
1163 print_bitmap_set (stderr
, PA_IN (bb
), "pa_in", bb
->index
);
1164 print_bitmap_set (stderr
, NEW_SETS (bb
), "new_sets", bb
->index
);
1167 /* Print out the expressions that have VAL to OUTFILE. */
1170 print_value_expressions (FILE *outfile
, unsigned int val
)
1172 bitmap set
= value_expressions
[val
];
1177 sprintf (s
, "%04d", val
);
1178 x
.expressions
= *set
;
1179 print_bitmap_set (outfile
, &x
, s
, 0);
1185 debug_value_expressions (unsigned int val
)
1187 print_value_expressions (stderr
, val
);
1190 /* Given a CONSTANT, allocate a new CONSTANT type PRE_EXPR to
1194 get_or_alloc_expr_for_constant (tree constant
)
1196 unsigned int result_id
;
1197 struct pre_expr_d expr
;
1200 expr
.kind
= CONSTANT
;
1201 PRE_EXPR_CONSTANT (&expr
) = constant
;
1202 result_id
= lookup_expression_id (&expr
);
1204 return expression_for_id (result_id
);
1206 newexpr
= pre_expr_pool
.allocate ();
1207 newexpr
->kind
= CONSTANT
;
1208 newexpr
->loc
= UNKNOWN_LOCATION
;
1209 PRE_EXPR_CONSTANT (newexpr
) = constant
;
1210 alloc_expression_id (newexpr
);
1211 newexpr
->value_id
= get_or_alloc_constant_value_id (constant
);
1212 add_to_value (newexpr
->value_id
, newexpr
);
1216 /* Get or allocate a pre_expr for a piece of GIMPLE, and return it.
1217 Currently only supports constants and SSA_NAMES. */
1219 get_or_alloc_expr_for (tree t
)
1221 if (TREE_CODE (t
) == SSA_NAME
)
1222 return get_or_alloc_expr_for_name (t
);
1223 else if (is_gimple_min_invariant (t
))
1224 return get_or_alloc_expr_for_constant (t
);
1228 /* Return the folded version of T if T, when folded, is a gimple
1229 min_invariant or an SSA name. Otherwise, return T. */
1232 fully_constant_expression (pre_expr e
)
1240 vn_nary_op_t nary
= PRE_EXPR_NARY (e
);
1241 tree res
= vn_nary_simplify (nary
);
1244 if (is_gimple_min_invariant (res
))
1245 return get_or_alloc_expr_for_constant (res
);
1246 if (TREE_CODE (res
) == SSA_NAME
)
1247 return get_or_alloc_expr_for_name (res
);
1252 vn_reference_t ref
= PRE_EXPR_REFERENCE (e
);
1254 if ((folded
= fully_constant_vn_reference_p (ref
)))
1255 return get_or_alloc_expr_for_constant (folded
);
1264 /* Translate the VUSE backwards through phi nodes in PHIBLOCK, so that
1265 it has the value it would have in BLOCK. Set *SAME_VALID to true
1266 in case the new vuse doesn't change the value id of the OPERANDS. */
1269 translate_vuse_through_block (vec
<vn_reference_op_s
> operands
,
1270 alias_set_type set
, alias_set_type base_set
,
1271 tree type
, tree vuse
,
1272 basic_block phiblock
,
1273 basic_block block
, bool *same_valid
)
1275 gimple
*phi
= SSA_NAME_DEF_STMT (vuse
);
1283 if (gimple_bb (phi
) != phiblock
)
1286 unsigned int cnt
= param_sccvn_max_alias_queries_per_access
;
1287 use_oracle
= ao_ref_init_from_vn_reference (&ref
, set
, base_set
,
1290 /* Use the alias-oracle to find either the PHI node in this block,
1291 the first VUSE used in this block that is equivalent to vuse or
1292 the first VUSE which definition in this block kills the value. */
1293 if (gimple_code (phi
) == GIMPLE_PHI
)
1294 e
= find_edge (block
, phiblock
);
1295 else if (use_oracle
)
1297 && !stmt_may_clobber_ref_p_1 (phi
, &ref
))
1300 vuse
= gimple_vuse (phi
);
1301 phi
= SSA_NAME_DEF_STMT (vuse
);
1302 if (gimple_bb (phi
) != phiblock
)
1304 if (gimple_code (phi
) == GIMPLE_PHI
)
1306 e
= find_edge (block
, phiblock
);
1315 if (use_oracle
&& same_valid
)
1317 bitmap visited
= NULL
;
1318 /* Try to find a vuse that dominates this phi node by skipping
1319 non-clobbering statements. */
1320 vuse
= get_continuation_for_phi (phi
, &ref
, true,
1321 cnt
, &visited
, false, NULL
, NULL
);
1323 BITMAP_FREE (visited
);
1327 /* If we didn't find any, the value ID can't stay the same. */
1328 if (!vuse
&& same_valid
)
1329 *same_valid
= false;
1330 /* ??? We would like to return vuse here as this is the canonical
1331 upmost vdef that this reference is associated with. But during
1332 insertion of the references into the hash tables we only ever
1333 directly insert with their direct gimple_vuse, hence returning
1334 something else would make us not find the other expression. */
1335 return PHI_ARG_DEF (phi
, e
->dest_idx
);
1341 /* Like bitmap_find_leader, but checks for the value existing in SET1 *or*
1342 SET2 *or* SET3. This is used to avoid making a set consisting of the union
1343 of PA_IN and ANTIC_IN during insert and phi-translation. */
1345 static inline pre_expr
1346 find_leader_in_sets (unsigned int val
, bitmap_set_t set1
, bitmap_set_t set2
,
1347 bitmap_set_t set3
= NULL
)
1349 pre_expr result
= NULL
;
1352 result
= bitmap_find_leader (set1
, val
);
1353 if (!result
&& set2
)
1354 result
= bitmap_find_leader (set2
, val
);
1355 if (!result
&& set3
)
1356 result
= bitmap_find_leader (set3
, val
);
1360 /* Get the tree type for our PRE expression e. */
1363 get_expr_type (const pre_expr e
)
1368 return TREE_TYPE (PRE_EXPR_NAME (e
));
1370 return TREE_TYPE (PRE_EXPR_CONSTANT (e
));
1372 return PRE_EXPR_REFERENCE (e
)->type
;
1374 return PRE_EXPR_NARY (e
)->type
;
1379 /* Get a representative SSA_NAME for a given expression that is available in B.
1380 Since all of our sub-expressions are treated as values, we require
1381 them to be SSA_NAME's for simplicity.
1382 Prior versions of GVNPRE used to use "value handles" here, so that
1383 an expression would be VH.11 + VH.10 instead of d_3 + e_6. In
1384 either case, the operands are really values (IE we do not expect
1385 them to be usable without finding leaders). */
1388 get_representative_for (const pre_expr e
, basic_block b
= NULL
)
1390 tree name
, valnum
= NULL_TREE
;
1391 unsigned int value_id
= get_expr_value_id (e
);
1396 return PRE_EXPR_NAME (e
);
1398 return PRE_EXPR_CONSTANT (e
);
1402 /* Go through all of the expressions representing this value
1403 and pick out an SSA_NAME. */
1406 bitmap exprs
= value_expressions
[value_id
];
1407 EXECUTE_IF_SET_IN_BITMAP (exprs
, 0, i
, bi
)
1409 pre_expr rep
= expression_for_id (i
);
1410 if (rep
->kind
== NAME
)
1412 tree name
= PRE_EXPR_NAME (rep
);
1413 valnum
= VN_INFO (name
)->valnum
;
1414 gimple
*def
= SSA_NAME_DEF_STMT (name
);
1415 /* We have to return either a new representative or one
1416 that can be used for expression simplification and thus
1417 is available in B. */
1419 || gimple_nop_p (def
)
1420 || dominated_by_p (CDI_DOMINATORS
, b
, gimple_bb (def
)))
1423 else if (rep
->kind
== CONSTANT
)
1424 return PRE_EXPR_CONSTANT (rep
);
1430 /* If we reached here we couldn't find an SSA_NAME. This can
1431 happen when we've discovered a value that has never appeared in
1432 the program as set to an SSA_NAME, as the result of phi translation.
1434 ??? We should be able to re-use this when we insert the statement
1436 name
= make_temp_ssa_name (get_expr_type (e
), gimple_build_nop (), "pretmp");
1437 vn_ssa_aux_t vn_info
= VN_INFO (name
);
1438 vn_info
->value_id
= value_id
;
1439 vn_info
->valnum
= valnum
? valnum
: name
;
1440 /* ??? For now mark this SSA name for release by VN. */
1441 vn_info
->needs_insertion
= true;
1442 add_to_value (value_id
, get_or_alloc_expr_for_name (name
));
1443 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1445 fprintf (dump_file
, "Created SSA_NAME representative ");
1446 print_generic_expr (dump_file
, name
);
1447 fprintf (dump_file
, " for expression:");
1448 print_pre_expr (dump_file
, e
);
1449 fprintf (dump_file
, " (%04d)\n", value_id
);
1457 phi_translate (bitmap_set_t
, pre_expr
, bitmap_set_t
, bitmap_set_t
, edge
);
1459 /* Translate EXPR using phis in PHIBLOCK, so that it has the values of
1460 the phis in PRED. Return NULL if we can't find a leader for each part
1461 of the translated expression. */
1464 phi_translate_1 (bitmap_set_t dest
,
1465 pre_expr expr
, bitmap_set_t set1
, bitmap_set_t set2
, edge e
)
1467 basic_block pred
= e
->src
;
1468 basic_block phiblock
= e
->dest
;
1469 location_t expr_loc
= expr
->loc
;
1475 bool changed
= false;
1476 vn_nary_op_t nary
= PRE_EXPR_NARY (expr
);
1477 vn_nary_op_t newnary
= XALLOCAVAR (struct vn_nary_op_s
,
1478 sizeof_vn_nary_op (nary
->length
));
1479 memcpy (newnary
, nary
, sizeof_vn_nary_op (nary
->length
));
1481 for (i
= 0; i
< newnary
->length
; i
++)
1483 if (TREE_CODE (newnary
->op
[i
]) != SSA_NAME
)
1487 pre_expr leader
, result
;
1488 unsigned int op_val_id
= VN_INFO (newnary
->op
[i
])->value_id
;
1489 leader
= find_leader_in_sets (op_val_id
, set1
, set2
);
1490 result
= phi_translate (dest
, leader
, set1
, set2
, e
);
1491 if (result
&& result
!= leader
)
1492 /* If op has a leader in the sets we translate make
1493 sure to use the value of the translated expression.
1494 We might need a new representative for that. */
1495 newnary
->op
[i
] = get_representative_for (result
, pred
);
1499 changed
|= newnary
->op
[i
] != nary
->op
[i
];
1505 unsigned int new_val_id
;
1507 PRE_EXPR_NARY (expr
) = newnary
;
1508 constant
= fully_constant_expression (expr
);
1509 PRE_EXPR_NARY (expr
) = nary
;
1510 if (constant
!= expr
)
1512 /* For non-CONSTANTs we have to make sure we can eventually
1513 insert the expression. Which means we need to have a
1515 if (constant
->kind
!= CONSTANT
)
1517 /* Do not allow simplifications to non-constants over
1518 backedges as this will likely result in a loop PHI node
1519 to be inserted and increased register pressure.
1520 See PR77498 - this avoids doing predcoms work in
1521 a less efficient way. */
1522 if (e
->flags
& EDGE_DFS_BACK
)
1526 unsigned value_id
= get_expr_value_id (constant
);
1527 /* We want a leader in ANTIC_OUT or AVAIL_OUT here.
1528 dest has what we computed into ANTIC_OUT sofar
1529 so pick from that - since topological sorting
1530 by sorted_array_from_bitmap_set isn't perfect
1531 we may lose some cases here. */
1532 constant
= find_leader_in_sets (value_id
, dest
,
1536 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1538 fprintf (dump_file
, "simplifying ");
1539 print_pre_expr (dump_file
, expr
);
1540 fprintf (dump_file
, " translated %d -> %d to ",
1541 phiblock
->index
, pred
->index
);
1542 PRE_EXPR_NARY (expr
) = newnary
;
1543 print_pre_expr (dump_file
, expr
);
1544 PRE_EXPR_NARY (expr
) = nary
;
1545 fprintf (dump_file
, " to ");
1546 print_pre_expr (dump_file
, constant
);
1547 fprintf (dump_file
, "\n");
1557 /* vn_nary_* do not valueize operands. */
1558 for (i
= 0; i
< newnary
->length
; ++i
)
1559 if (TREE_CODE (newnary
->op
[i
]) == SSA_NAME
)
1560 newnary
->op
[i
] = VN_INFO (newnary
->op
[i
])->valnum
;
1561 tree result
= vn_nary_op_lookup_pieces (newnary
->length
,
1566 if (result
&& is_gimple_min_invariant (result
))
1567 return get_or_alloc_expr_for_constant (result
);
1569 if (!nary
|| nary
->predicated_values
)
1571 new_val_id
= get_next_value_id ();
1572 nary
= vn_nary_op_insert_pieces (newnary
->length
,
1576 result
, new_val_id
);
1578 expr
= get_or_alloc_expr_for_nary (nary
, expr_loc
);
1579 add_to_value (get_expr_value_id (expr
), expr
);
1587 vn_reference_t ref
= PRE_EXPR_REFERENCE (expr
);
1588 vec
<vn_reference_op_s
> operands
= ref
->operands
;
1589 tree vuse
= ref
->vuse
;
1590 tree newvuse
= vuse
;
1591 vec
<vn_reference_op_s
> newoperands
= vNULL
;
1592 bool changed
= false, same_valid
= true;
1594 vn_reference_op_t operand
;
1595 vn_reference_t newref
;
1597 for (i
= 0; operands
.iterate (i
, &operand
); i
++)
1602 tree type
= operand
->type
;
1603 vn_reference_op_s newop
= *operand
;
1604 op
[0] = operand
->op0
;
1605 op
[1] = operand
->op1
;
1606 op
[2] = operand
->op2
;
1607 for (n
= 0; n
< 3; ++n
)
1609 unsigned int op_val_id
;
1612 if (TREE_CODE (op
[n
]) != SSA_NAME
)
1614 /* We can't possibly insert these. */
1616 && !is_gimple_min_invariant (op
[n
]))
1620 op_val_id
= VN_INFO (op
[n
])->value_id
;
1621 leader
= find_leader_in_sets (op_val_id
, set1
, set2
);
1622 opresult
= phi_translate (dest
, leader
, set1
, set2
, e
);
1623 if (opresult
&& opresult
!= leader
)
1625 tree name
= get_representative_for (opresult
);
1626 changed
|= name
!= op
[n
];
1634 newoperands
.release ();
1639 if (!newoperands
.exists ())
1640 newoperands
= operands
.copy ();
1641 /* We may have changed from an SSA_NAME to a constant */
1642 if (newop
.opcode
== SSA_NAME
&& TREE_CODE (op
[0]) != SSA_NAME
)
1643 newop
.opcode
= TREE_CODE (op
[0]);
1648 newoperands
[i
] = newop
;
1650 gcc_checking_assert (i
== operands
.length ());
1654 newvuse
= translate_vuse_through_block (newoperands
.exists ()
1655 ? newoperands
: operands
,
1656 ref
->set
, ref
->base_set
,
1658 vuse
, phiblock
, pred
,
1660 ? NULL
: &same_valid
);
1661 if (newvuse
== NULL_TREE
)
1663 newoperands
.release ();
1668 if (changed
|| newvuse
!= vuse
)
1670 unsigned int new_val_id
;
1672 tree result
= vn_reference_lookup_pieces (newvuse
, ref
->set
,
1675 newoperands
.exists ()
1676 ? newoperands
: operands
,
1679 newoperands
.release ();
1681 /* We can always insert constants, so if we have a partial
1682 redundant constant load of another type try to translate it
1683 to a constant of appropriate type. */
1684 if (result
&& is_gimple_min_invariant (result
))
1687 if (!useless_type_conversion_p (ref
->type
, TREE_TYPE (result
)))
1689 tem
= fold_unary (VIEW_CONVERT_EXPR
, ref
->type
, result
);
1690 if (tem
&& !is_gimple_min_invariant (tem
))
1694 return get_or_alloc_expr_for_constant (tem
);
1697 /* If we'd have to convert things we would need to validate
1698 if we can insert the translated expression. So fail
1699 here for now - we cannot insert an alias with a different
1700 type in the VN tables either, as that would assert. */
1702 && !useless_type_conversion_p (ref
->type
, TREE_TYPE (result
)))
1704 else if (!result
&& newref
1705 && !useless_type_conversion_p (ref
->type
, newref
->type
))
1707 newoperands
.release ();
1712 new_val_id
= newref
->value_id
;
1715 if (changed
|| !same_valid
)
1716 new_val_id
= get_next_value_id ();
1718 new_val_id
= ref
->value_id
;
1719 if (!newoperands
.exists ())
1720 newoperands
= operands
.copy ();
1721 newref
= vn_reference_insert_pieces (newvuse
, ref
->set
,
1722 ref
->base_set
, ref
->type
,
1724 result
, new_val_id
);
1725 newoperands
= vNULL
;
1727 expr
= get_or_alloc_expr_for_reference (newref
, expr_loc
);
1728 add_to_value (new_val_id
, expr
);
1730 newoperands
.release ();
1737 tree name
= PRE_EXPR_NAME (expr
);
1738 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
1739 /* If the SSA name is defined by a PHI node in this block,
1741 if (gimple_code (def_stmt
) == GIMPLE_PHI
1742 && gimple_bb (def_stmt
) == phiblock
)
1744 tree def
= PHI_ARG_DEF (def_stmt
, e
->dest_idx
);
1746 /* Handle constant. */
1747 if (is_gimple_min_invariant (def
))
1748 return get_or_alloc_expr_for_constant (def
);
1750 return get_or_alloc_expr_for_name (def
);
1752 /* Otherwise return it unchanged - it will get removed if its
1753 value is not available in PREDs AVAIL_OUT set of expressions
1754 by the subtraction of TMP_GEN. */
1763 /* Wrapper around phi_translate_1 providing caching functionality. */
1766 phi_translate (bitmap_set_t dest
, pre_expr expr
,
1767 bitmap_set_t set1
, bitmap_set_t set2
, edge e
)
1769 expr_pred_trans_t slot
= NULL
;
1775 /* Constants contain no values that need translation. */
1776 if (expr
->kind
== CONSTANT
)
1779 if (value_id_constant_p (get_expr_value_id (expr
)))
1782 /* Don't add translations of NAMEs as those are cheap to translate. */
1783 if (expr
->kind
!= NAME
)
1785 if (phi_trans_add (&slot
, expr
, e
->src
))
1786 return slot
->v
== 0 ? NULL
: expression_for_id (slot
->v
);
1787 /* Store NULL for the value we want to return in the case of
1793 basic_block saved_valueize_bb
= vn_context_bb
;
1794 vn_context_bb
= e
->src
;
1795 phitrans
= phi_translate_1 (dest
, expr
, set1
, set2
, e
);
1796 vn_context_bb
= saved_valueize_bb
;
1800 /* We may have reallocated. */
1801 phi_trans_add (&slot
, expr
, e
->src
);
1803 slot
->v
= get_expression_id (phitrans
);
1805 /* Remove failed translations again, they cause insert
1806 iteration to not pick up new opportunities reliably. */
1807 PHI_TRANS_TABLE (e
->src
)->clear_slot (slot
);
1814 /* For each expression in SET, translate the values through phi nodes
1815 in PHIBLOCK using edge PHIBLOCK->PRED, and store the resulting
1816 expressions in DEST. */
1819 phi_translate_set (bitmap_set_t dest
, bitmap_set_t set
, edge e
)
1824 if (gimple_seq_empty_p (phi_nodes (e
->dest
)))
1826 bitmap_set_copy (dest
, set
);
1830 /* Allocate the phi-translation cache where we have an idea about
1831 its size. hash-table implementation internals tell us that
1832 allocating the table to fit twice the number of elements will
1833 make sure we do not usually re-allocate. */
1834 if (!PHI_TRANS_TABLE (e
->src
))
1835 PHI_TRANS_TABLE (e
->src
) = new hash_table
<expr_pred_trans_d
>
1836 (2 * bitmap_count_bits (&set
->expressions
));
1837 FOR_EACH_EXPR_ID_IN_SET (set
, i
, bi
)
1839 pre_expr expr
= expression_for_id (i
);
1840 pre_expr translated
= phi_translate (dest
, expr
, set
, NULL
, e
);
1844 bitmap_insert_into_set (dest
, translated
);
1848 /* Find the leader for a value (i.e., the name representing that
1849 value) in a given set, and return it. Return NULL if no leader
1853 bitmap_find_leader (bitmap_set_t set
, unsigned int val
)
1855 if (value_id_constant_p (val
))
1856 return constant_value_expressions
[-val
];
1858 if (bitmap_set_contains_value (set
, val
))
1860 /* Rather than walk the entire bitmap of expressions, and see
1861 whether any of them has the value we are looking for, we look
1862 at the reverse mapping, which tells us the set of expressions
1863 that have a given value (IE value->expressions with that
1864 value) and see if any of those expressions are in our set.
1865 The number of expressions per value is usually significantly
1866 less than the number of expressions in the set. In fact, for
1867 large testcases, doing it this way is roughly 5-10x faster
1868 than walking the bitmap.
1869 If this is somehow a significant lose for some cases, we can
1870 choose which set to walk based on which set is smaller. */
1873 bitmap exprset
= value_expressions
[val
];
1875 EXECUTE_IF_AND_IN_BITMAP (exprset
, &set
->expressions
, 0, i
, bi
)
1876 return expression_for_id (i
);
1881 /* Determine if EXPR, a memory expression, is ANTIC_IN at the top of
1882 BLOCK by seeing if it is not killed in the block. Note that we are
1883 only determining whether there is a store that kills it. Because
1884 of the order in which clean iterates over values, we are guaranteed
1885 that altered operands will have caused us to be eliminated from the
1886 ANTIC_IN set already. */
1889 value_dies_in_block_x (pre_expr expr
, basic_block block
)
1891 tree vuse
= PRE_EXPR_REFERENCE (expr
)->vuse
;
1892 vn_reference_t refx
= PRE_EXPR_REFERENCE (expr
);
1894 gimple_stmt_iterator gsi
;
1895 unsigned id
= get_expression_id (expr
);
1902 /* Lookup a previously calculated result. */
1903 if (EXPR_DIES (block
)
1904 && bitmap_bit_p (EXPR_DIES (block
), id
* 2))
1905 return bitmap_bit_p (EXPR_DIES (block
), id
* 2 + 1);
1907 /* A memory expression {e, VUSE} dies in the block if there is a
1908 statement that may clobber e. If, starting statement walk from the
1909 top of the basic block, a statement uses VUSE there can be no kill
1910 inbetween that use and the original statement that loaded {e, VUSE},
1911 so we can stop walking. */
1912 ref
.base
= NULL_TREE
;
1913 for (gsi
= gsi_start_bb (block
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1915 tree def_vuse
, def_vdef
;
1916 def
= gsi_stmt (gsi
);
1917 def_vuse
= gimple_vuse (def
);
1918 def_vdef
= gimple_vdef (def
);
1920 /* Not a memory statement. */
1924 /* Not a may-def. */
1927 /* A load with the same VUSE, we're done. */
1928 if (def_vuse
== vuse
)
1934 /* Init ref only if we really need it. */
1935 if (ref
.base
== NULL_TREE
1936 && !ao_ref_init_from_vn_reference (&ref
, refx
->set
, refx
->base_set
,
1937 refx
->type
, refx
->operands
))
1942 /* If the statement may clobber expr, it dies. */
1943 if (stmt_may_clobber_ref_p_1 (def
, &ref
))
1950 /* Remember the result. */
1951 if (!EXPR_DIES (block
))
1952 EXPR_DIES (block
) = BITMAP_ALLOC (&grand_bitmap_obstack
);
1953 bitmap_set_bit (EXPR_DIES (block
), id
* 2);
1955 bitmap_set_bit (EXPR_DIES (block
), id
* 2 + 1);
1961 /* Determine if OP is valid in SET1 U SET2, which it is when the union
1962 contains its value-id. */
1965 op_valid_in_sets (bitmap_set_t set1
, bitmap_set_t set2
, tree op
)
1967 if (op
&& TREE_CODE (op
) == SSA_NAME
)
1969 unsigned int value_id
= VN_INFO (op
)->value_id
;
1970 if (!(bitmap_set_contains_value (set1
, value_id
)
1971 || (set2
&& bitmap_set_contains_value (set2
, value_id
))))
1977 /* Determine if the expression EXPR is valid in SET1 U SET2.
1978 ONLY SET2 CAN BE NULL.
1979 This means that we have a leader for each part of the expression
1980 (if it consists of values), or the expression is an SSA_NAME.
1981 For loads/calls, we also see if the vuse is killed in this block. */
1984 valid_in_sets (bitmap_set_t set1
, bitmap_set_t set2
, pre_expr expr
)
1989 /* By construction all NAMEs are available. Non-available
1990 NAMEs are removed by subtracting TMP_GEN from the sets. */
1995 vn_nary_op_t nary
= PRE_EXPR_NARY (expr
);
1996 for (i
= 0; i
< nary
->length
; i
++)
1997 if (!op_valid_in_sets (set1
, set2
, nary
->op
[i
]))
2004 vn_reference_t ref
= PRE_EXPR_REFERENCE (expr
);
2005 vn_reference_op_t vro
;
2008 FOR_EACH_VEC_ELT (ref
->operands
, i
, vro
)
2010 if (!op_valid_in_sets (set1
, set2
, vro
->op0
)
2011 || !op_valid_in_sets (set1
, set2
, vro
->op1
)
2012 || !op_valid_in_sets (set1
, set2
, vro
->op2
))
2022 /* Clean the set of expressions SET1 that are no longer valid in SET1 or SET2.
2023 This means expressions that are made up of values we have no leaders for
2027 clean (bitmap_set_t set1
, bitmap_set_t set2
= NULL
)
2029 vec
<pre_expr
> exprs
= sorted_array_from_bitmap_set (set1
);
2033 FOR_EACH_VEC_ELT (exprs
, i
, expr
)
2035 if (!valid_in_sets (set1
, set2
, expr
))
2037 unsigned int val
= get_expr_value_id (expr
);
2038 bitmap_clear_bit (&set1
->expressions
, get_expression_id (expr
));
2039 /* We are entered with possibly multiple expressions for a value
2040 so before removing a value from the set see if there's an
2041 expression for it left. */
2042 if (! bitmap_find_leader (set1
, val
))
2043 bitmap_clear_bit (&set1
->values
, val
);
2052 FOR_EACH_EXPR_ID_IN_SET (set1
, j
, bi
)
2053 gcc_assert (valid_in_sets (set1
, set2
, expression_for_id (j
)));
2057 /* Clean the set of expressions that are no longer valid in SET because
2058 they are clobbered in BLOCK or because they trap and may not be executed. */
2061 prune_clobbered_mems (bitmap_set_t set
, basic_block block
)
2065 unsigned to_remove
= -1U;
2066 bool any_removed
= false;
2068 FOR_EACH_EXPR_ID_IN_SET (set
, i
, bi
)
2070 /* Remove queued expr. */
2071 if (to_remove
!= -1U)
2073 bitmap_clear_bit (&set
->expressions
, to_remove
);
2078 pre_expr expr
= expression_for_id (i
);
2079 if (expr
->kind
== REFERENCE
)
2081 vn_reference_t ref
= PRE_EXPR_REFERENCE (expr
);
2084 gimple
*def_stmt
= SSA_NAME_DEF_STMT (ref
->vuse
);
2085 if (!gimple_nop_p (def_stmt
)
2086 && ((gimple_bb (def_stmt
) != block
2087 && !dominated_by_p (CDI_DOMINATORS
,
2088 block
, gimple_bb (def_stmt
)))
2089 || (gimple_bb (def_stmt
) == block
2090 && value_dies_in_block_x (expr
, block
))))
2094 else if (expr
->kind
== NARY
)
2096 vn_nary_op_t nary
= PRE_EXPR_NARY (expr
);
2097 /* If the NARY may trap make sure the block does not contain
2098 a possible exit point.
2099 ??? This is overly conservative if we translate AVAIL_OUT
2100 as the available expression might be after the exit point. */
2101 if (BB_MAY_NOTRETURN (block
)
2102 && vn_nary_may_trap (nary
))
2107 /* Remove queued expr. */
2108 if (to_remove
!= -1U)
2110 bitmap_clear_bit (&set
->expressions
, to_remove
);
2114 /* Above we only removed expressions, now clean the set of values
2115 which no longer have any corresponding expression. We cannot
2116 clear the value at the time we remove an expression since there
2117 may be multiple expressions per value.
2118 If we'd queue possibly to be removed values we could use
2119 the bitmap_find_leader way to see if there's still an expression
2120 for it. For some ratio of to be removed values and number of
2121 values/expressions in the set this might be faster than rebuilding
2125 bitmap_clear (&set
->values
);
2126 FOR_EACH_EXPR_ID_IN_SET (set
, i
, bi
)
2128 pre_expr expr
= expression_for_id (i
);
2129 unsigned int value_id
= get_expr_value_id (expr
);
2130 bitmap_set_bit (&set
->values
, value_id
);
2135 /* Compute the ANTIC set for BLOCK.
2137 If succs(BLOCK) > 1 then
2138 ANTIC_OUT[BLOCK] = intersection of ANTIC_IN[b] for all succ(BLOCK)
2139 else if succs(BLOCK) == 1 then
2140 ANTIC_OUT[BLOCK] = phi_translate (ANTIC_IN[succ(BLOCK)])
2142 ANTIC_IN[BLOCK] = clean(ANTIC_OUT[BLOCK] U EXP_GEN[BLOCK] - TMP_GEN[BLOCK])
2144 Note that clean() is deferred until after the iteration. */
2147 compute_antic_aux (basic_block block
, bool block_has_abnormal_pred_edge
)
2149 bitmap_set_t S
, old
, ANTIC_OUT
;
2153 bool was_visited
= BB_VISITED (block
);
2154 bool changed
= ! BB_VISITED (block
);
2155 BB_VISITED (block
) = 1;
2156 old
= ANTIC_OUT
= S
= NULL
;
2158 /* If any edges from predecessors are abnormal, antic_in is empty,
2160 if (block_has_abnormal_pred_edge
)
2161 goto maybe_dump_sets
;
2163 old
= ANTIC_IN (block
);
2164 ANTIC_OUT
= bitmap_set_new ();
2166 /* If the block has no successors, ANTIC_OUT is empty. */
2167 if (EDGE_COUNT (block
->succs
) == 0)
2169 /* If we have one successor, we could have some phi nodes to
2170 translate through. */
2171 else if (single_succ_p (block
))
2173 e
= single_succ_edge (block
);
2174 gcc_assert (BB_VISITED (e
->dest
));
2175 phi_translate_set (ANTIC_OUT
, ANTIC_IN (e
->dest
), e
);
2177 /* If we have multiple successors, we take the intersection of all of
2178 them. Note that in the case of loop exit phi nodes, we may have
2179 phis to translate through. */
2185 auto_vec
<edge
> worklist (EDGE_COUNT (block
->succs
));
2186 FOR_EACH_EDGE (e
, ei
, block
->succs
)
2189 && BB_VISITED (e
->dest
))
2191 else if (BB_VISITED (e
->dest
))
2192 worklist
.quick_push (e
);
2195 /* Unvisited successors get their ANTIC_IN replaced by the
2196 maximal set to arrive at a maximum ANTIC_IN solution.
2197 We can ignore them in the intersection operation and thus
2198 need not explicitely represent that maximum solution. */
2199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2200 fprintf (dump_file
, "ANTIC_IN is MAX on %d->%d\n",
2201 e
->src
->index
, e
->dest
->index
);
2205 /* Of multiple successors we have to have visited one already
2206 which is guaranteed by iteration order. */
2207 gcc_assert (first
!= NULL
);
2209 phi_translate_set (ANTIC_OUT
, ANTIC_IN (first
->dest
), first
);
2211 /* If we have multiple successors we need to intersect the ANTIC_OUT
2212 sets. For values that's a simple intersection but for
2213 expressions it is a union. Given we want to have a single
2214 expression per value in our sets we have to canonicalize.
2215 Avoid randomness and running into cycles like for PR82129 and
2216 canonicalize the expression we choose to the one with the
2217 lowest id. This requires we actually compute the union first. */
2218 FOR_EACH_VEC_ELT (worklist
, i
, e
)
2220 if (!gimple_seq_empty_p (phi_nodes (e
->dest
)))
2222 bitmap_set_t tmp
= bitmap_set_new ();
2223 phi_translate_set (tmp
, ANTIC_IN (e
->dest
), e
);
2224 bitmap_and_into (&ANTIC_OUT
->values
, &tmp
->values
);
2225 bitmap_ior_into (&ANTIC_OUT
->expressions
, &tmp
->expressions
);
2226 bitmap_set_free (tmp
);
2230 bitmap_and_into (&ANTIC_OUT
->values
, &ANTIC_IN (e
->dest
)->values
);
2231 bitmap_ior_into (&ANTIC_OUT
->expressions
,
2232 &ANTIC_IN (e
->dest
)->expressions
);
2235 if (! worklist
.is_empty ())
2237 /* Prune expressions not in the value set. */
2240 unsigned int to_clear
= -1U;
2241 FOR_EACH_EXPR_ID_IN_SET (ANTIC_OUT
, i
, bi
)
2243 if (to_clear
!= -1U)
2245 bitmap_clear_bit (&ANTIC_OUT
->expressions
, to_clear
);
2248 pre_expr expr
= expression_for_id (i
);
2249 unsigned int value_id
= get_expr_value_id (expr
);
2250 if (!bitmap_bit_p (&ANTIC_OUT
->values
, value_id
))
2253 if (to_clear
!= -1U)
2254 bitmap_clear_bit (&ANTIC_OUT
->expressions
, to_clear
);
2258 /* Prune expressions that are clobbered in block and thus become
2259 invalid if translated from ANTIC_OUT to ANTIC_IN. */
2260 prune_clobbered_mems (ANTIC_OUT
, block
);
2262 /* Generate ANTIC_OUT - TMP_GEN. */
2263 S
= bitmap_set_subtract_expressions (ANTIC_OUT
, TMP_GEN (block
));
2265 /* Start ANTIC_IN with EXP_GEN - TMP_GEN. */
2266 ANTIC_IN (block
) = bitmap_set_subtract_expressions (EXP_GEN (block
),
2269 /* Then union in the ANTIC_OUT - TMP_GEN values,
2270 to get ANTIC_OUT U EXP_GEN - TMP_GEN */
2271 bitmap_ior_into (&ANTIC_IN (block
)->values
, &S
->values
);
2272 bitmap_ior_into (&ANTIC_IN (block
)->expressions
, &S
->expressions
);
2274 /* clean (ANTIC_IN (block)) is defered to after the iteration converged
2275 because it can cause non-convergence, see for example PR81181. */
2277 /* Intersect ANTIC_IN with the old ANTIC_IN. This is required until
2278 we properly represent the maximum expression set, thus not prune
2279 values without expressions during the iteration. */
2281 && bitmap_and_into (&ANTIC_IN (block
)->values
, &old
->values
))
2283 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2284 fprintf (dump_file
, "warning: intersecting with old ANTIC_IN "
2285 "shrinks the set\n");
2286 /* Prune expressions not in the value set. */
2289 unsigned int to_clear
= -1U;
2290 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (block
), i
, bi
)
2292 if (to_clear
!= -1U)
2294 bitmap_clear_bit (&ANTIC_IN (block
)->expressions
, to_clear
);
2297 pre_expr expr
= expression_for_id (i
);
2298 unsigned int value_id
= get_expr_value_id (expr
);
2299 if (!bitmap_bit_p (&ANTIC_IN (block
)->values
, value_id
))
2302 if (to_clear
!= -1U)
2303 bitmap_clear_bit (&ANTIC_IN (block
)->expressions
, to_clear
);
2306 if (!bitmap_set_equal (old
, ANTIC_IN (block
)))
2310 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2313 print_bitmap_set (dump_file
, ANTIC_OUT
, "ANTIC_OUT", block
->index
);
2316 fprintf (dump_file
, "[changed] ");
2317 print_bitmap_set (dump_file
, ANTIC_IN (block
), "ANTIC_IN",
2321 print_bitmap_set (dump_file
, S
, "S", block
->index
);
2324 bitmap_set_free (old
);
2326 bitmap_set_free (S
);
2328 bitmap_set_free (ANTIC_OUT
);
2332 /* Compute PARTIAL_ANTIC for BLOCK.
2334 If succs(BLOCK) > 1 then
2335 PA_OUT[BLOCK] = value wise union of PA_IN[b] + all ANTIC_IN not
2336 in ANTIC_OUT for all succ(BLOCK)
2337 else if succs(BLOCK) == 1 then
2338 PA_OUT[BLOCK] = phi_translate (PA_IN[succ(BLOCK)])
2340 PA_IN[BLOCK] = clean(PA_OUT[BLOCK] - TMP_GEN[BLOCK] - ANTIC_IN[BLOCK])
2344 compute_partial_antic_aux (basic_block block
,
2345 bool block_has_abnormal_pred_edge
)
2347 bitmap_set_t old_PA_IN
;
2348 bitmap_set_t PA_OUT
;
2351 unsigned long max_pa
= param_max_partial_antic_length
;
2353 old_PA_IN
= PA_OUT
= NULL
;
2355 /* If any edges from predecessors are abnormal, antic_in is empty,
2357 if (block_has_abnormal_pred_edge
)
2358 goto maybe_dump_sets
;
2360 /* If there are too many partially anticipatable values in the
2361 block, phi_translate_set can take an exponential time: stop
2362 before the translation starts. */
2364 && single_succ_p (block
)
2365 && bitmap_count_bits (&PA_IN (single_succ (block
))->values
) > max_pa
)
2366 goto maybe_dump_sets
;
2368 old_PA_IN
= PA_IN (block
);
2369 PA_OUT
= bitmap_set_new ();
2371 /* If the block has no successors, ANTIC_OUT is empty. */
2372 if (EDGE_COUNT (block
->succs
) == 0)
2374 /* If we have one successor, we could have some phi nodes to
2375 translate through. Note that we can't phi translate across DFS
2376 back edges in partial antic, because it uses a union operation on
2377 the successors. For recurrences like IV's, we will end up
2378 generating a new value in the set on each go around (i + 3 (VH.1)
2379 VH.1 + 1 (VH.2), VH.2 + 1 (VH.3), etc), forever. */
2380 else if (single_succ_p (block
))
2382 e
= single_succ_edge (block
);
2383 if (!(e
->flags
& EDGE_DFS_BACK
))
2384 phi_translate_set (PA_OUT
, PA_IN (e
->dest
), e
);
2386 /* If we have multiple successors, we take the union of all of
2392 auto_vec
<edge
> worklist (EDGE_COUNT (block
->succs
));
2393 FOR_EACH_EDGE (e
, ei
, block
->succs
)
2395 if (e
->flags
& EDGE_DFS_BACK
)
2397 worklist
.quick_push (e
);
2399 if (worklist
.length () > 0)
2401 FOR_EACH_VEC_ELT (worklist
, i
, e
)
2406 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (e
->dest
), i
, bi
)
2407 bitmap_value_insert_into_set (PA_OUT
,
2408 expression_for_id (i
));
2409 if (!gimple_seq_empty_p (phi_nodes (e
->dest
)))
2411 bitmap_set_t pa_in
= bitmap_set_new ();
2412 phi_translate_set (pa_in
, PA_IN (e
->dest
), e
);
2413 FOR_EACH_EXPR_ID_IN_SET (pa_in
, i
, bi
)
2414 bitmap_value_insert_into_set (PA_OUT
,
2415 expression_for_id (i
));
2416 bitmap_set_free (pa_in
);
2419 FOR_EACH_EXPR_ID_IN_SET (PA_IN (e
->dest
), i
, bi
)
2420 bitmap_value_insert_into_set (PA_OUT
,
2421 expression_for_id (i
));
2426 /* Prune expressions that are clobbered in block and thus become
2427 invalid if translated from PA_OUT to PA_IN. */
2428 prune_clobbered_mems (PA_OUT
, block
);
2430 /* PA_IN starts with PA_OUT - TMP_GEN.
2431 Then we subtract things from ANTIC_IN. */
2432 PA_IN (block
) = bitmap_set_subtract_expressions (PA_OUT
, TMP_GEN (block
));
2434 /* For partial antic, we want to put back in the phi results, since
2435 we will properly avoid making them partially antic over backedges. */
2436 bitmap_ior_into (&PA_IN (block
)->values
, &PHI_GEN (block
)->values
);
2437 bitmap_ior_into (&PA_IN (block
)->expressions
, &PHI_GEN (block
)->expressions
);
2439 /* PA_IN[block] = PA_IN[block] - ANTIC_IN[block] */
2440 bitmap_set_subtract_values (PA_IN (block
), ANTIC_IN (block
));
2442 clean (PA_IN (block
), ANTIC_IN (block
));
2445 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2448 print_bitmap_set (dump_file
, PA_OUT
, "PA_OUT", block
->index
);
2450 print_bitmap_set (dump_file
, PA_IN (block
), "PA_IN", block
->index
);
2453 bitmap_set_free (old_PA_IN
);
2455 bitmap_set_free (PA_OUT
);
2458 /* Compute ANTIC and partial ANTIC sets. */
2461 compute_antic (void)
2463 bool changed
= true;
2464 int num_iterations
= 0;
2470 /* If any predecessor edges are abnormal, we punt, so antic_in is empty.
2471 We pre-build the map of blocks with incoming abnormal edges here. */
2472 auto_sbitmap
has_abnormal_preds (last_basic_block_for_fn (cfun
));
2473 bitmap_clear (has_abnormal_preds
);
2475 FOR_ALL_BB_FN (block
, cfun
)
2477 BB_VISITED (block
) = 0;
2479 FOR_EACH_EDGE (e
, ei
, block
->preds
)
2480 if (e
->flags
& EDGE_ABNORMAL
)
2482 bitmap_set_bit (has_abnormal_preds
, block
->index
);
2486 /* While we are here, give empty ANTIC_IN sets to each block. */
2487 ANTIC_IN (block
) = bitmap_set_new ();
2488 if (do_partial_partial
)
2489 PA_IN (block
) = bitmap_set_new ();
2492 /* At the exit block we anticipate nothing. */
2493 BB_VISITED (EXIT_BLOCK_PTR_FOR_FN (cfun
)) = 1;
2495 /* For ANTIC computation we need a postorder that also guarantees that
2496 a block with a single successor is visited after its successor.
2497 RPO on the inverted CFG has this property. */
2498 auto_vec
<int, 20> postorder
;
2499 inverted_post_order_compute (&postorder
);
2501 auto_sbitmap
worklist (last_basic_block_for_fn (cfun
) + 1);
2502 bitmap_clear (worklist
);
2503 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
2504 bitmap_set_bit (worklist
, e
->src
->index
);
2507 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2508 fprintf (dump_file
, "Starting iteration %d\n", num_iterations
);
2509 /* ??? We need to clear our PHI translation cache here as the
2510 ANTIC sets shrink and we restrict valid translations to
2511 those having operands with leaders in ANTIC. Same below
2512 for PA ANTIC computation. */
2515 for (i
= postorder
.length () - 1; i
>= 0; i
--)
2517 if (bitmap_bit_p (worklist
, postorder
[i
]))
2519 basic_block block
= BASIC_BLOCK_FOR_FN (cfun
, postorder
[i
]);
2520 bitmap_clear_bit (worklist
, block
->index
);
2521 if (compute_antic_aux (block
,
2522 bitmap_bit_p (has_abnormal_preds
,
2525 FOR_EACH_EDGE (e
, ei
, block
->preds
)
2526 bitmap_set_bit (worklist
, e
->src
->index
);
2531 /* Theoretically possible, but *highly* unlikely. */
2532 gcc_checking_assert (num_iterations
< 500);
2535 /* We have to clean after the dataflow problem converged as cleaning
2536 can cause non-convergence because it is based on expressions
2537 rather than values. */
2538 FOR_EACH_BB_FN (block
, cfun
)
2539 clean (ANTIC_IN (block
));
2541 statistics_histogram_event (cfun
, "compute_antic iterations",
2544 if (do_partial_partial
)
2546 /* For partial antic we ignore backedges and thus we do not need
2547 to perform any iteration when we process blocks in postorder. */
2548 for (i
= postorder
.length () - 1; i
>= 0; i
--)
2550 basic_block block
= BASIC_BLOCK_FOR_FN (cfun
, postorder
[i
]);
2551 compute_partial_antic_aux (block
,
2552 bitmap_bit_p (has_abnormal_preds
,
2559 /* Inserted expressions are placed onto this worklist, which is used
2560 for performing quick dead code elimination of insertions we made
2561 that didn't turn out to be necessary. */
2562 static bitmap inserted_exprs
;
2564 /* The actual worker for create_component_ref_by_pieces. */
2567 create_component_ref_by_pieces_1 (basic_block block
, vn_reference_t ref
,
2568 unsigned int *operand
, gimple_seq
*stmts
)
2570 vn_reference_op_t currop
= &ref
->operands
[*operand
];
2573 switch (currop
->opcode
)
2580 tree baseop
= create_component_ref_by_pieces_1 (block
, ref
, operand
,
2584 tree offset
= currop
->op0
;
2585 if (TREE_CODE (baseop
) == ADDR_EXPR
2586 && handled_component_p (TREE_OPERAND (baseop
, 0)))
2590 base
= get_addr_base_and_unit_offset (TREE_OPERAND (baseop
, 0),
2593 offset
= int_const_binop (PLUS_EXPR
, offset
,
2594 build_int_cst (TREE_TYPE (offset
),
2596 baseop
= build_fold_addr_expr (base
);
2598 genop
= build2 (MEM_REF
, currop
->type
, baseop
, offset
);
2599 MR_DEPENDENCE_CLIQUE (genop
) = currop
->clique
;
2600 MR_DEPENDENCE_BASE (genop
) = currop
->base
;
2601 REF_REVERSE_STORAGE_ORDER (genop
) = currop
->reverse
;
2605 case TARGET_MEM_REF
:
2607 tree genop0
= NULL_TREE
, genop1
= NULL_TREE
;
2608 vn_reference_op_t nextop
= &ref
->operands
[(*operand
)++];
2609 tree baseop
= create_component_ref_by_pieces_1 (block
, ref
, operand
,
2615 genop0
= find_or_generate_expression (block
, currop
->op0
, stmts
);
2621 genop1
= find_or_generate_expression (block
, nextop
->op0
, stmts
);
2625 genop
= build5 (TARGET_MEM_REF
, currop
->type
,
2626 baseop
, currop
->op2
, genop0
, currop
->op1
, genop1
);
2628 MR_DEPENDENCE_CLIQUE (genop
) = currop
->clique
;
2629 MR_DEPENDENCE_BASE (genop
) = currop
->base
;
2636 gcc_assert (is_gimple_min_invariant (currop
->op0
));
2642 case VIEW_CONVERT_EXPR
:
2644 tree genop0
= create_component_ref_by_pieces_1 (block
, ref
, operand
,
2648 return fold_build1 (currop
->opcode
, currop
->type
, genop0
);
2651 case WITH_SIZE_EXPR
:
2653 tree genop0
= create_component_ref_by_pieces_1 (block
, ref
, operand
,
2657 tree genop1
= find_or_generate_expression (block
, currop
->op0
, stmts
);
2660 return fold_build2 (currop
->opcode
, currop
->type
, genop0
, genop1
);
2665 tree genop0
= create_component_ref_by_pieces_1 (block
, ref
, operand
,
2669 tree op1
= currop
->op0
;
2670 tree op2
= currop
->op1
;
2671 tree t
= build3 (BIT_FIELD_REF
, currop
->type
, genop0
, op1
, op2
);
2672 REF_REVERSE_STORAGE_ORDER (t
) = currop
->reverse
;
2676 /* For array ref vn_reference_op's, operand 1 of the array ref
2677 is op0 of the reference op and operand 3 of the array ref is
2679 case ARRAY_RANGE_REF
:
2683 tree genop1
= currop
->op0
;
2684 tree genop2
= currop
->op1
;
2685 tree genop3
= currop
->op2
;
2686 genop0
= create_component_ref_by_pieces_1 (block
, ref
, operand
,
2690 genop1
= find_or_generate_expression (block
, genop1
, stmts
);
2695 tree domain_type
= TYPE_DOMAIN (TREE_TYPE (genop0
));
2696 /* Drop zero minimum index if redundant. */
2697 if (integer_zerop (genop2
)
2699 || integer_zerop (TYPE_MIN_VALUE (domain_type
))))
2703 genop2
= find_or_generate_expression (block
, genop2
, stmts
);
2710 tree elmt_type
= TREE_TYPE (TREE_TYPE (genop0
));
2711 /* We can't always put a size in units of the element alignment
2712 here as the element alignment may be not visible. See
2713 PR43783. Simply drop the element size for constant
2715 if (TREE_CODE (genop3
) == INTEGER_CST
2716 && TREE_CODE (TYPE_SIZE_UNIT (elmt_type
)) == INTEGER_CST
2717 && wi::eq_p (wi::to_offset (TYPE_SIZE_UNIT (elmt_type
)),
2718 (wi::to_offset (genop3
)
2719 * vn_ref_op_align_unit (currop
))))
2723 genop3
= find_or_generate_expression (block
, genop3
, stmts
);
2728 return build4 (currop
->opcode
, currop
->type
, genop0
, genop1
,
2735 tree genop2
= currop
->op1
;
2736 op0
= create_component_ref_by_pieces_1 (block
, ref
, operand
, stmts
);
2739 /* op1 should be a FIELD_DECL, which are represented by themselves. */
2743 genop2
= find_or_generate_expression (block
, genop2
, stmts
);
2747 return fold_build3 (COMPONENT_REF
, TREE_TYPE (op1
), op0
, op1
, genop2
);
2752 genop
= find_or_generate_expression (block
, currop
->op0
, stmts
);
2774 /* For COMPONENT_REF's and ARRAY_REF's, we can't have any intermediates for the
2775 COMPONENT_REF or MEM_REF or ARRAY_REF portion, because we'd end up with
2776 trying to rename aggregates into ssa form directly, which is a no no.
2778 Thus, this routine doesn't create temporaries, it just builds a
2779 single access expression for the array, calling
2780 find_or_generate_expression to build the innermost pieces.
2782 This function is a subroutine of create_expression_by_pieces, and
2783 should not be called on it's own unless you really know what you
2787 create_component_ref_by_pieces (basic_block block
, vn_reference_t ref
,
2790 unsigned int op
= 0;
2791 return create_component_ref_by_pieces_1 (block
, ref
, &op
, stmts
);
2794 /* Find a simple leader for an expression, or generate one using
2795 create_expression_by_pieces from a NARY expression for the value.
2796 BLOCK is the basic_block we are looking for leaders in.
2797 OP is the tree expression to find a leader for or generate.
2798 Returns the leader or NULL_TREE on failure. */
2801 find_or_generate_expression (basic_block block
, tree op
, gimple_seq
*stmts
)
2803 pre_expr expr
= get_or_alloc_expr_for (op
);
2804 unsigned int lookfor
= get_expr_value_id (expr
);
2805 pre_expr leader
= bitmap_find_leader (AVAIL_OUT (block
), lookfor
);
2808 if (leader
->kind
== NAME
)
2809 return PRE_EXPR_NAME (leader
);
2810 else if (leader
->kind
== CONSTANT
)
2811 return PRE_EXPR_CONSTANT (leader
);
2817 /* It must be a complex expression, so generate it recursively. Note
2818 that this is only necessary to handle gcc.dg/tree-ssa/ssa-pre28.c
2819 where the insert algorithm fails to insert a required expression. */
2820 bitmap exprset
= value_expressions
[lookfor
];
2823 EXECUTE_IF_SET_IN_BITMAP (exprset
, 0, i
, bi
)
2825 pre_expr temp
= expression_for_id (i
);
2826 /* We cannot insert random REFERENCE expressions at arbitrary
2827 places. We can insert NARYs which eventually re-materializes
2828 its operand values. */
2829 if (temp
->kind
== NARY
)
2830 return create_expression_by_pieces (block
, temp
, stmts
,
2831 get_expr_type (expr
));
2838 /* Create an expression in pieces, so that we can handle very complex
2839 expressions that may be ANTIC, but not necessary GIMPLE.
2840 BLOCK is the basic block the expression will be inserted into,
2841 EXPR is the expression to insert (in value form)
2842 STMTS is a statement list to append the necessary insertions into.
2844 This function will die if we hit some value that shouldn't be
2845 ANTIC but is (IE there is no leader for it, or its components).
2846 The function returns NULL_TREE in case a different antic expression
2847 has to be inserted first.
2848 This function may also generate expressions that are themselves
2849 partially or fully redundant. Those that are will be either made
2850 fully redundant during the next iteration of insert (for partially
2851 redundant ones), or eliminated by eliminate (for fully redundant
2855 create_expression_by_pieces (basic_block block
, pre_expr expr
,
2856 gimple_seq
*stmts
, tree type
)
2860 gimple_seq forced_stmts
= NULL
;
2861 unsigned int value_id
;
2862 gimple_stmt_iterator gsi
;
2863 tree exprtype
= type
? type
: get_expr_type (expr
);
2869 /* We may hit the NAME/CONSTANT case if we have to convert types
2870 that value numbering saw through. */
2872 folded
= PRE_EXPR_NAME (expr
);
2873 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (folded
))
2875 if (useless_type_conversion_p (exprtype
, TREE_TYPE (folded
)))
2880 folded
= PRE_EXPR_CONSTANT (expr
);
2881 tree tem
= fold_convert (exprtype
, folded
);
2882 if (is_gimple_min_invariant (tem
))
2887 if (PRE_EXPR_REFERENCE (expr
)->operands
[0].opcode
== CALL_EXPR
)
2889 vn_reference_t ref
= PRE_EXPR_REFERENCE (expr
);
2890 unsigned int operand
= 1;
2891 vn_reference_op_t currop
= &ref
->operands
[0];
2892 tree sc
= NULL_TREE
;
2893 tree fn
= find_or_generate_expression (block
, currop
->op0
, stmts
);
2898 sc
= find_or_generate_expression (block
, currop
->op1
, stmts
);
2902 auto_vec
<tree
> args (ref
->operands
.length () - 1);
2903 while (operand
< ref
->operands
.length ())
2905 tree arg
= create_component_ref_by_pieces_1 (block
, ref
,
2909 args
.quick_push (arg
);
2911 gcall
*call
= gimple_build_call_vec (fn
, args
);
2912 gimple_set_location (call
, expr
->loc
);
2913 gimple_call_set_fntype (call
, currop
->type
);
2915 gimple_call_set_chain (call
, sc
);
2916 tree forcedname
= make_ssa_name (TREE_TYPE (currop
->type
));
2917 gimple_call_set_lhs (call
, forcedname
);
2918 /* There's no CCP pass after PRE which would re-compute alignment
2919 information so make sure we re-materialize this here. */
2920 if (gimple_call_builtin_p (call
, BUILT_IN_ASSUME_ALIGNED
)
2921 && args
.length () - 2 <= 1
2922 && tree_fits_uhwi_p (args
[1])
2923 && (args
.length () != 3 || tree_fits_uhwi_p (args
[2])))
2925 unsigned HOST_WIDE_INT halign
= tree_to_uhwi (args
[1]);
2926 unsigned HOST_WIDE_INT hmisalign
2927 = args
.length () == 3 ? tree_to_uhwi (args
[2]) : 0;
2928 if ((halign
& (halign
- 1)) == 0
2929 && (hmisalign
& ~(halign
- 1)) == 0
2930 && (unsigned int)halign
!= 0)
2931 set_ptr_info_alignment (get_ptr_info (forcedname
),
2934 gimple_set_vuse (call
, BB_LIVE_VOP_ON_EXIT (block
));
2935 gimple_seq_add_stmt_without_update (&forced_stmts
, call
);
2936 folded
= forcedname
;
2940 folded
= create_component_ref_by_pieces (block
,
2941 PRE_EXPR_REFERENCE (expr
),
2945 name
= make_temp_ssa_name (exprtype
, NULL
, "pretmp");
2946 newstmt
= gimple_build_assign (name
, folded
);
2947 gimple_set_location (newstmt
, expr
->loc
);
2948 gimple_seq_add_stmt_without_update (&forced_stmts
, newstmt
);
2949 gimple_set_vuse (newstmt
, BB_LIVE_VOP_ON_EXIT (block
));
2955 vn_nary_op_t nary
= PRE_EXPR_NARY (expr
);
2956 tree
*genop
= XALLOCAVEC (tree
, nary
->length
);
2958 for (i
= 0; i
< nary
->length
; ++i
)
2960 genop
[i
] = find_or_generate_expression (block
, nary
->op
[i
], stmts
);
2963 /* Ensure genop[] is properly typed for POINTER_PLUS_EXPR. It
2964 may have conversions stripped. */
2965 if (nary
->opcode
== POINTER_PLUS_EXPR
)
2968 genop
[i
] = gimple_convert (&forced_stmts
,
2969 nary
->type
, genop
[i
]);
2971 genop
[i
] = gimple_convert (&forced_stmts
,
2972 sizetype
, genop
[i
]);
2975 genop
[i
] = gimple_convert (&forced_stmts
,
2976 TREE_TYPE (nary
->op
[i
]), genop
[i
]);
2978 if (nary
->opcode
== CONSTRUCTOR
)
2980 vec
<constructor_elt
, va_gc
> *elts
= NULL
;
2981 for (i
= 0; i
< nary
->length
; ++i
)
2982 CONSTRUCTOR_APPEND_ELT (elts
, NULL_TREE
, genop
[i
]);
2983 folded
= build_constructor (nary
->type
, elts
);
2984 name
= make_temp_ssa_name (exprtype
, NULL
, "pretmp");
2985 newstmt
= gimple_build_assign (name
, folded
);
2986 gimple_set_location (newstmt
, expr
->loc
);
2987 gimple_seq_add_stmt_without_update (&forced_stmts
, newstmt
);
2992 switch (nary
->length
)
2995 folded
= gimple_build (&forced_stmts
, expr
->loc
,
2996 nary
->opcode
, nary
->type
, genop
[0]);
2999 folded
= gimple_build (&forced_stmts
, expr
->loc
, nary
->opcode
,
3000 nary
->type
, genop
[0], genop
[1]);
3003 folded
= gimple_build (&forced_stmts
, expr
->loc
, nary
->opcode
,
3004 nary
->type
, genop
[0], genop
[1],
3017 folded
= gimple_convert (&forced_stmts
, exprtype
, folded
);
3019 /* If there is nothing to insert, return the simplified result. */
3020 if (gimple_seq_empty_p (forced_stmts
))
3022 /* If we simplified to a constant return it and discard eventually
3024 if (is_gimple_min_invariant (folded
))
3026 gimple_seq_discard (forced_stmts
);
3029 /* Likewise if we simplified to sth not queued for insertion. */
3031 gsi
= gsi_last (forced_stmts
);
3032 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
3034 gimple
*stmt
= gsi_stmt (gsi
);
3035 tree forcedname
= gimple_get_lhs (stmt
);
3036 if (forcedname
== folded
)
3044 gimple_seq_discard (forced_stmts
);
3047 gcc_assert (TREE_CODE (folded
) == SSA_NAME
);
3049 /* If we have any intermediate expressions to the value sets, add them
3050 to the value sets and chain them in the instruction stream. */
3053 gsi
= gsi_start (forced_stmts
);
3054 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3056 gimple
*stmt
= gsi_stmt (gsi
);
3057 tree forcedname
= gimple_get_lhs (stmt
);
3060 if (forcedname
!= folded
)
3062 vn_ssa_aux_t vn_info
= VN_INFO (forcedname
);
3063 vn_info
->valnum
= forcedname
;
3064 vn_info
->value_id
= get_next_value_id ();
3065 nameexpr
= get_or_alloc_expr_for_name (forcedname
);
3066 add_to_value (vn_info
->value_id
, nameexpr
);
3067 if (NEW_SETS (block
))
3068 bitmap_value_replace_in_set (NEW_SETS (block
), nameexpr
);
3069 bitmap_value_replace_in_set (AVAIL_OUT (block
), nameexpr
);
3072 bitmap_set_bit (inserted_exprs
, SSA_NAME_VERSION (forcedname
));
3074 gimple_seq_add_seq (stmts
, forced_stmts
);
3079 /* Fold the last statement. */
3080 gsi
= gsi_last (*stmts
);
3081 if (fold_stmt_inplace (&gsi
))
3082 update_stmt (gsi_stmt (gsi
));
3084 /* Add a value number to the temporary.
3085 The value may already exist in either NEW_SETS, or AVAIL_OUT, because
3086 we are creating the expression by pieces, and this particular piece of
3087 the expression may have been represented. There is no harm in replacing
3089 value_id
= get_expr_value_id (expr
);
3090 vn_ssa_aux_t vn_info
= VN_INFO (name
);
3091 vn_info
->value_id
= value_id
;
3092 vn_info
->valnum
= vn_valnum_from_value_id (value_id
);
3093 if (vn_info
->valnum
== NULL_TREE
)
3094 vn_info
->valnum
= name
;
3095 gcc_assert (vn_info
->valnum
!= NULL_TREE
);
3096 nameexpr
= get_or_alloc_expr_for_name (name
);
3097 add_to_value (value_id
, nameexpr
);
3098 if (NEW_SETS (block
))
3099 bitmap_value_replace_in_set (NEW_SETS (block
), nameexpr
);
3100 bitmap_value_replace_in_set (AVAIL_OUT (block
), nameexpr
);
3102 pre_stats
.insertions
++;
3103 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3105 fprintf (dump_file
, "Inserted ");
3106 print_gimple_stmt (dump_file
, gsi_stmt (gsi_last (*stmts
)), 0);
3107 fprintf (dump_file
, " in predecessor %d (%04d)\n",
3108 block
->index
, value_id
);
3115 /* Insert the to-be-made-available values of expression EXPRNUM for each
3116 predecessor, stored in AVAIL, into the predecessors of BLOCK, and
3117 merge the result with a phi node, given the same value number as
3118 NODE. Return true if we have inserted new stuff. */
3121 insert_into_preds_of_block (basic_block block
, unsigned int exprnum
,
3122 vec
<pre_expr
> avail
)
3124 pre_expr expr
= expression_for_id (exprnum
);
3126 unsigned int val
= get_expr_value_id (expr
);
3128 bool insertions
= false;
3133 tree type
= get_expr_type (expr
);
3137 /* Make sure we aren't creating an induction variable. */
3138 if (bb_loop_depth (block
) > 0 && EDGE_COUNT (block
->preds
) == 2)
3140 bool firstinsideloop
= false;
3141 bool secondinsideloop
= false;
3142 firstinsideloop
= flow_bb_inside_loop_p (block
->loop_father
,
3143 EDGE_PRED (block
, 0)->src
);
3144 secondinsideloop
= flow_bb_inside_loop_p (block
->loop_father
,
3145 EDGE_PRED (block
, 1)->src
);
3146 /* Induction variables only have one edge inside the loop. */
3147 if ((firstinsideloop
^ secondinsideloop
)
3148 && expr
->kind
!= REFERENCE
)
3150 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3151 fprintf (dump_file
, "Skipping insertion of phi for partial redundancy: Looks like an induction variable\n");
3156 /* Make the necessary insertions. */
3157 FOR_EACH_EDGE (pred
, ei
, block
->preds
)
3159 gimple_seq stmts
= NULL
;
3162 eprime
= avail
[pred
->dest_idx
];
3163 builtexpr
= create_expression_by_pieces (bprime
, eprime
,
3165 gcc_assert (!(pred
->flags
& EDGE_ABNORMAL
));
3166 if (!gimple_seq_empty_p (stmts
))
3168 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pred
, stmts
);
3169 gcc_assert (! new_bb
);
3174 /* We cannot insert a PHI node if we failed to insert
3179 if (is_gimple_min_invariant (builtexpr
))
3180 avail
[pred
->dest_idx
] = get_or_alloc_expr_for_constant (builtexpr
);
3182 avail
[pred
->dest_idx
] = get_or_alloc_expr_for_name (builtexpr
);
3184 /* If we didn't want a phi node, and we made insertions, we still have
3185 inserted new stuff, and thus return true. If we didn't want a phi node,
3186 and didn't make insertions, we haven't added anything new, so return
3188 if (nophi
&& insertions
)
3190 else if (nophi
&& !insertions
)
3193 /* Now build a phi for the new variable. */
3194 temp
= make_temp_ssa_name (type
, NULL
, "prephitmp");
3195 phi
= create_phi_node (temp
, block
);
3197 vn_ssa_aux_t vn_info
= VN_INFO (temp
);
3198 vn_info
->value_id
= val
;
3199 vn_info
->valnum
= vn_valnum_from_value_id (val
);
3200 if (vn_info
->valnum
== NULL_TREE
)
3201 vn_info
->valnum
= temp
;
3202 bitmap_set_bit (inserted_exprs
, SSA_NAME_VERSION (temp
));
3203 FOR_EACH_EDGE (pred
, ei
, block
->preds
)
3205 pre_expr ae
= avail
[pred
->dest_idx
];
3206 gcc_assert (get_expr_type (ae
) == type
3207 || useless_type_conversion_p (type
, get_expr_type (ae
)));
3208 if (ae
->kind
== CONSTANT
)
3209 add_phi_arg (phi
, unshare_expr (PRE_EXPR_CONSTANT (ae
)),
3210 pred
, UNKNOWN_LOCATION
);
3212 add_phi_arg (phi
, PRE_EXPR_NAME (ae
), pred
, UNKNOWN_LOCATION
);
3215 newphi
= get_or_alloc_expr_for_name (temp
);
3216 add_to_value (val
, newphi
);
3218 /* The value should *not* exist in PHI_GEN, or else we wouldn't be doing
3219 this insertion, since we test for the existence of this value in PHI_GEN
3220 before proceeding with the partial redundancy checks in insert_aux.
3222 The value may exist in AVAIL_OUT, in particular, it could be represented
3223 by the expression we are trying to eliminate, in which case we want the
3224 replacement to occur. If it's not existing in AVAIL_OUT, we want it
3227 Similarly, to the PHI_GEN case, the value should not exist in NEW_SETS of
3228 this block, because if it did, it would have existed in our dominator's
3229 AVAIL_OUT, and would have been skipped due to the full redundancy check.
3232 bitmap_insert_into_set (PHI_GEN (block
), newphi
);
3233 bitmap_value_replace_in_set (AVAIL_OUT (block
),
3235 if (NEW_SETS (block
))
3236 bitmap_insert_into_set (NEW_SETS (block
), newphi
);
3238 /* If we insert a PHI node for a conversion of another PHI node
3239 in the same basic-block try to preserve range information.
3240 This is important so that followup loop passes receive optimal
3241 number of iteration analysis results. See PR61743. */
3242 if (expr
->kind
== NARY
3243 && CONVERT_EXPR_CODE_P (expr
->u
.nary
->opcode
)
3244 && TREE_CODE (expr
->u
.nary
->op
[0]) == SSA_NAME
3245 && gimple_bb (SSA_NAME_DEF_STMT (expr
->u
.nary
->op
[0])) == block
3246 && INTEGRAL_TYPE_P (type
)
3247 && INTEGRAL_TYPE_P (TREE_TYPE (expr
->u
.nary
->op
[0]))
3248 && (TYPE_PRECISION (type
)
3249 >= TYPE_PRECISION (TREE_TYPE (expr
->u
.nary
->op
[0])))
3250 && SSA_NAME_RANGE_INFO (expr
->u
.nary
->op
[0]))
3253 if (get_range_info (expr
->u
.nary
->op
[0], &min
, &max
) == VR_RANGE
3254 && !wi::neg_p (min
, SIGNED
)
3255 && !wi::neg_p (max
, SIGNED
))
3256 /* Just handle extension and sign-changes of all-positive ranges. */
3257 set_range_info (temp
,
3258 SSA_NAME_RANGE_TYPE (expr
->u
.nary
->op
[0]),
3259 wide_int_storage::from (min
, TYPE_PRECISION (type
),
3261 wide_int_storage::from (max
, TYPE_PRECISION (type
),
3265 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3267 fprintf (dump_file
, "Created phi ");
3268 print_gimple_stmt (dump_file
, phi
, 0);
3269 fprintf (dump_file
, " in block %d (%04d)\n", block
->index
, val
);
3277 /* Perform insertion of partially redundant or hoistable values.
3278 For BLOCK, do the following:
3279 1. Propagate the NEW_SETS of the dominator into the current block.
3280 If the block has multiple predecessors,
3281 2a. Iterate over the ANTIC expressions for the block to see if
3282 any of them are partially redundant.
3283 2b. If so, insert them into the necessary predecessors to make
3284 the expression fully redundant.
3285 2c. Insert a new PHI merging the values of the predecessors.
3286 2d. Insert the new PHI, and the new expressions, into the
3288 If the block has multiple successors,
3289 3a. Iterate over the ANTIC values for the block to see if
3290 any of them are good candidates for hoisting.
3291 3b. If so, insert expressions computing the values in BLOCK,
3292 and add the new expressions into the NEW_SETS set.
3293 4. Recursively call ourselves on the dominator children of BLOCK.
3295 Steps 1, 2a, and 4 are done by insert_aux. 2b, 2c and 2d are done by
3296 do_pre_regular_insertion and do_partial_insertion. 3a and 3b are
3297 done in do_hoist_insertion.
3301 do_pre_regular_insertion (basic_block block
, basic_block dom
)
3303 bool new_stuff
= false;
3304 vec
<pre_expr
> exprs
;
3306 auto_vec
<pre_expr
, 2> avail
;
3309 exprs
= sorted_array_from_bitmap_set (ANTIC_IN (block
));
3310 avail
.safe_grow (EDGE_COUNT (block
->preds
), true);
3312 FOR_EACH_VEC_ELT (exprs
, i
, expr
)
3314 if (expr
->kind
== NARY
3315 || expr
->kind
== REFERENCE
)
3318 bool by_some
= false;
3319 bool cant_insert
= false;
3320 bool all_same
= true;
3321 pre_expr first_s
= NULL
;
3324 pre_expr eprime
= NULL
;
3326 pre_expr edoubleprime
= NULL
;
3327 bool do_insertion
= false;
3329 val
= get_expr_value_id (expr
);
3330 if (bitmap_set_contains_value (PHI_GEN (block
), val
))
3332 if (bitmap_set_contains_value (AVAIL_OUT (dom
), val
))
3334 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3336 fprintf (dump_file
, "Found fully redundant value: ");
3337 print_pre_expr (dump_file
, expr
);
3338 fprintf (dump_file
, "\n");
3343 FOR_EACH_EDGE (pred
, ei
, block
->preds
)
3345 unsigned int vprime
;
3347 /* We should never run insertion for the exit block
3348 and so not come across fake pred edges. */
3349 gcc_assert (!(pred
->flags
& EDGE_FAKE
));
3351 /* We are looking at ANTIC_OUT of bprime. */
3352 eprime
= phi_translate (NULL
, expr
, ANTIC_IN (block
), NULL
, pred
);
3354 /* eprime will generally only be NULL if the
3355 value of the expression, translated
3356 through the PHI for this predecessor, is
3357 undefined. If that is the case, we can't
3358 make the expression fully redundant,
3359 because its value is undefined along a
3360 predecessor path. We can thus break out
3361 early because it doesn't matter what the
3362 rest of the results are. */
3365 avail
[pred
->dest_idx
] = NULL
;
3370 vprime
= get_expr_value_id (eprime
);
3371 edoubleprime
= bitmap_find_leader (AVAIL_OUT (bprime
),
3373 if (edoubleprime
== NULL
)
3375 avail
[pred
->dest_idx
] = eprime
;
3380 avail
[pred
->dest_idx
] = edoubleprime
;
3382 /* We want to perform insertions to remove a redundancy on
3383 a path in the CFG we want to optimize for speed. */
3384 if (optimize_edge_for_speed_p (pred
))
3385 do_insertion
= true;
3386 if (first_s
== NULL
)
3387 first_s
= edoubleprime
;
3388 else if (!pre_expr_d::equal (first_s
, edoubleprime
))
3392 /* If we can insert it, it's not the same value
3393 already existing along every predecessor, and
3394 it's defined by some predecessor, it is
3395 partially redundant. */
3396 if (!cant_insert
&& !all_same
&& by_some
)
3400 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3402 fprintf (dump_file
, "Skipping partial redundancy for "
3404 print_pre_expr (dump_file
, expr
);
3405 fprintf (dump_file
, " (%04d), no redundancy on to be "
3406 "optimized for speed edge\n", val
);
3409 else if (dbg_cnt (treepre_insert
))
3411 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3413 fprintf (dump_file
, "Found partial redundancy for "
3415 print_pre_expr (dump_file
, expr
);
3416 fprintf (dump_file
, " (%04d)\n",
3417 get_expr_value_id (expr
));
3419 if (insert_into_preds_of_block (block
,
3420 get_expression_id (expr
),
3425 /* If all edges produce the same value and that value is
3426 an invariant, then the PHI has the same value on all
3427 edges. Note this. */
3428 else if (!cant_insert
&& all_same
)
3430 gcc_assert (edoubleprime
->kind
== CONSTANT
3431 || edoubleprime
->kind
== NAME
);
3433 tree temp
= make_temp_ssa_name (get_expr_type (expr
),
3436 = gimple_build_assign (temp
,
3437 edoubleprime
->kind
== CONSTANT
?
3438 PRE_EXPR_CONSTANT (edoubleprime
) :
3439 PRE_EXPR_NAME (edoubleprime
));
3440 gimple_stmt_iterator gsi
= gsi_after_labels (block
);
3441 gsi_insert_before (&gsi
, assign
, GSI_NEW_STMT
);
3443 vn_ssa_aux_t vn_info
= VN_INFO (temp
);
3444 vn_info
->value_id
= val
;
3445 vn_info
->valnum
= vn_valnum_from_value_id (val
);
3446 if (vn_info
->valnum
== NULL_TREE
)
3447 vn_info
->valnum
= temp
;
3448 bitmap_set_bit (inserted_exprs
, SSA_NAME_VERSION (temp
));
3449 pre_expr newe
= get_or_alloc_expr_for_name (temp
);
3450 add_to_value (val
, newe
);
3451 bitmap_value_replace_in_set (AVAIL_OUT (block
), newe
);
3452 bitmap_insert_into_set (NEW_SETS (block
), newe
);
3462 /* Perform insertion for partially anticipatable expressions. There
3463 is only one case we will perform insertion for these. This case is
3464 if the expression is partially anticipatable, and fully available.
3465 In this case, we know that putting it earlier will enable us to
3466 remove the later computation. */
3469 do_pre_partial_partial_insertion (basic_block block
, basic_block dom
)
3471 bool new_stuff
= false;
3472 vec
<pre_expr
> exprs
;
3474 auto_vec
<pre_expr
, 2> avail
;
3477 exprs
= sorted_array_from_bitmap_set (PA_IN (block
));
3478 avail
.safe_grow (EDGE_COUNT (block
->preds
), true);
3480 FOR_EACH_VEC_ELT (exprs
, i
, expr
)
3482 if (expr
->kind
== NARY
3483 || expr
->kind
== REFERENCE
)
3487 bool cant_insert
= false;
3490 pre_expr eprime
= NULL
;
3493 val
= get_expr_value_id (expr
);
3494 if (bitmap_set_contains_value (PHI_GEN (block
), val
))
3496 if (bitmap_set_contains_value (AVAIL_OUT (dom
), val
))
3499 FOR_EACH_EDGE (pred
, ei
, block
->preds
)
3501 unsigned int vprime
;
3502 pre_expr edoubleprime
;
3504 /* We should never run insertion for the exit block
3505 and so not come across fake pred edges. */
3506 gcc_assert (!(pred
->flags
& EDGE_FAKE
));
3508 eprime
= phi_translate (NULL
, expr
, ANTIC_IN (block
),
3509 PA_IN (block
), pred
);
3511 /* eprime will generally only be NULL if the
3512 value of the expression, translated
3513 through the PHI for this predecessor, is
3514 undefined. If that is the case, we can't
3515 make the expression fully redundant,
3516 because its value is undefined along a
3517 predecessor path. We can thus break out
3518 early because it doesn't matter what the
3519 rest of the results are. */
3522 avail
[pred
->dest_idx
] = NULL
;
3527 vprime
= get_expr_value_id (eprime
);
3528 edoubleprime
= bitmap_find_leader (AVAIL_OUT (bprime
), vprime
);
3529 avail
[pred
->dest_idx
] = edoubleprime
;
3530 if (edoubleprime
== NULL
)
3537 /* If we can insert it, it's not the same value
3538 already existing along every predecessor, and
3539 it's defined by some predecessor, it is
3540 partially redundant. */
3541 if (!cant_insert
&& by_all
)
3544 bool do_insertion
= false;
3546 /* Insert only if we can remove a later expression on a path
3547 that we want to optimize for speed.
3548 The phi node that we will be inserting in BLOCK is not free,
3549 and inserting it for the sake of !optimize_for_speed successor
3550 may cause regressions on the speed path. */
3551 FOR_EACH_EDGE (succ
, ei
, block
->succs
)
3553 if (bitmap_set_contains_value (PA_IN (succ
->dest
), val
)
3554 || bitmap_set_contains_value (ANTIC_IN (succ
->dest
), val
))
3556 if (optimize_edge_for_speed_p (succ
))
3557 do_insertion
= true;
3563 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3565 fprintf (dump_file
, "Skipping partial partial redundancy "
3567 print_pre_expr (dump_file
, expr
);
3568 fprintf (dump_file
, " (%04d), not (partially) anticipated "
3569 "on any to be optimized for speed edges\n", val
);
3572 else if (dbg_cnt (treepre_insert
))
3574 pre_stats
.pa_insert
++;
3575 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3577 fprintf (dump_file
, "Found partial partial redundancy "
3579 print_pre_expr (dump_file
, expr
);
3580 fprintf (dump_file
, " (%04d)\n",
3581 get_expr_value_id (expr
));
3583 if (insert_into_preds_of_block (block
,
3584 get_expression_id (expr
),
3596 /* Insert expressions in BLOCK to compute hoistable values up.
3597 Return TRUE if something was inserted, otherwise return FALSE.
3598 The caller has to make sure that BLOCK has at least two successors. */
3601 do_hoist_insertion (basic_block block
)
3605 bool new_stuff
= false;
3607 gimple_stmt_iterator last
;
3609 /* At least two successors, or else... */
3610 gcc_assert (EDGE_COUNT (block
->succs
) >= 2);
3612 /* Check that all successors of BLOCK are dominated by block.
3613 We could use dominated_by_p() for this, but actually there is a much
3614 quicker check: any successor that is dominated by BLOCK can't have
3615 more than one predecessor edge. */
3616 FOR_EACH_EDGE (e
, ei
, block
->succs
)
3617 if (! single_pred_p (e
->dest
))
3620 /* Determine the insertion point. If we cannot safely insert before
3621 the last stmt if we'd have to, bail out. */
3622 last
= gsi_last_bb (block
);
3623 if (!gsi_end_p (last
)
3624 && !is_ctrl_stmt (gsi_stmt (last
))
3625 && stmt_ends_bb_p (gsi_stmt (last
)))
3628 /* Compute the set of hoistable expressions from ANTIC_IN. First compute
3629 hoistable values. */
3630 bitmap_set hoistable_set
;
3632 /* A hoistable value must be in ANTIC_IN(block)
3633 but not in AVAIL_OUT(BLOCK). */
3634 bitmap_initialize (&hoistable_set
.values
, &grand_bitmap_obstack
);
3635 bitmap_and_compl (&hoistable_set
.values
,
3636 &ANTIC_IN (block
)->values
, &AVAIL_OUT (block
)->values
);
3638 /* Short-cut for a common case: hoistable_set is empty. */
3639 if (bitmap_empty_p (&hoistable_set
.values
))
3642 /* Compute which of the hoistable values is in AVAIL_OUT of
3643 at least one of the successors of BLOCK. */
3644 bitmap_head availout_in_some
;
3645 bitmap_initialize (&availout_in_some
, &grand_bitmap_obstack
);
3646 FOR_EACH_EDGE (e
, ei
, block
->succs
)
3647 /* Do not consider expressions solely because their availability
3648 on loop exits. They'd be ANTIC-IN throughout the whole loop
3649 and thus effectively hoisted across loops by combination of
3650 PRE and hoisting. */
3651 if (! loop_exit_edge_p (block
->loop_father
, e
))
3652 bitmap_ior_and_into (&availout_in_some
, &hoistable_set
.values
,
3653 &AVAIL_OUT (e
->dest
)->values
);
3654 bitmap_clear (&hoistable_set
.values
);
3656 /* Short-cut for a common case: availout_in_some is empty. */
3657 if (bitmap_empty_p (&availout_in_some
))
3660 /* Hack hoitable_set in-place so we can use sorted_array_from_bitmap_set. */
3661 bitmap_move (&hoistable_set
.values
, &availout_in_some
);
3662 hoistable_set
.expressions
= ANTIC_IN (block
)->expressions
;
3664 /* Now finally construct the topological-ordered expression set. */
3665 vec
<pre_expr
> exprs
= sorted_array_from_bitmap_set (&hoistable_set
);
3667 bitmap_clear (&hoistable_set
.values
);
3669 /* If there are candidate values for hoisting, insert expressions
3670 strategically to make the hoistable expressions fully redundant. */
3672 FOR_EACH_VEC_ELT (exprs
, i
, expr
)
3674 /* While we try to sort expressions topologically above the
3675 sorting doesn't work out perfectly. Catch expressions we
3676 already inserted. */
3677 unsigned int value_id
= get_expr_value_id (expr
);
3678 if (bitmap_set_contains_value (AVAIL_OUT (block
), value_id
))
3680 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3683 "Already inserted expression for ");
3684 print_pre_expr (dump_file
, expr
);
3685 fprintf (dump_file
, " (%04d)\n", value_id
);
3690 /* If we end up with a punned expression representation and this
3691 happens to be a float typed one give up - we can't know for
3692 sure whether all paths perform the floating-point load we are
3693 about to insert and on some targets this can cause correctness
3694 issues. See PR88240. */
3695 if (expr
->kind
== REFERENCE
3696 && PRE_EXPR_REFERENCE (expr
)->punned
3697 && FLOAT_TYPE_P (get_expr_type (expr
)))
3700 /* OK, we should hoist this value. Perform the transformation. */
3701 pre_stats
.hoist_insert
++;
3702 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3705 "Inserting expression in block %d for code hoisting: ",
3707 print_pre_expr (dump_file
, expr
);
3708 fprintf (dump_file
, " (%04d)\n", value_id
);
3711 gimple_seq stmts
= NULL
;
3712 tree res
= create_expression_by_pieces (block
, expr
, &stmts
,
3713 get_expr_type (expr
));
3715 /* Do not return true if expression creation ultimately
3716 did not insert any statements. */
3717 if (gimple_seq_empty_p (stmts
))
3721 if (gsi_end_p (last
) || is_ctrl_stmt (gsi_stmt (last
)))
3722 gsi_insert_seq_before (&last
, stmts
, GSI_SAME_STMT
);
3724 gsi_insert_seq_after (&last
, stmts
, GSI_NEW_STMT
);
3727 /* Make sure to not return true if expression creation ultimately
3728 failed but also make sure to insert any stmts produced as they
3729 are tracked in inserted_exprs. */
3741 /* Perform insertion of partially redundant and hoistable values. */
3748 FOR_ALL_BB_FN (bb
, cfun
)
3749 NEW_SETS (bb
) = bitmap_set_new ();
3751 int *rpo
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
3752 int rpo_num
= pre_and_rev_post_order_compute (NULL
, rpo
, false);
3754 int num_iterations
= 0;
3759 if (dump_file
&& dump_flags
& TDF_DETAILS
)
3760 fprintf (dump_file
, "Starting insert iteration %d\n", num_iterations
);
3763 for (int idx
= 0; idx
< rpo_num
; ++idx
)
3765 basic_block block
= BASIC_BLOCK_FOR_FN (cfun
, rpo
[idx
]);
3766 basic_block dom
= get_immediate_dominator (CDI_DOMINATORS
, block
);
3771 bitmap_set_t newset
;
3773 /* First, update the AVAIL_OUT set with anything we may have
3774 inserted higher up in the dominator tree. */
3775 newset
= NEW_SETS (dom
);
3778 /* Note that we need to value_replace both NEW_SETS, and
3779 AVAIL_OUT. For both the case of NEW_SETS, the value may be
3780 represented by some non-simple expression here that we want
3781 to replace it with. */
3782 FOR_EACH_EXPR_ID_IN_SET (newset
, i
, bi
)
3784 pre_expr expr
= expression_for_id (i
);
3785 bitmap_value_replace_in_set (NEW_SETS (block
), expr
);
3786 bitmap_value_replace_in_set (AVAIL_OUT (block
), expr
);
3790 /* Insert expressions for partial redundancies. */
3791 if (flag_tree_pre
&& !single_pred_p (block
))
3793 changed
|= do_pre_regular_insertion (block
, dom
);
3794 if (do_partial_partial
)
3795 changed
|= do_pre_partial_partial_insertion (block
, dom
);
3800 /* Clear the NEW sets before the next iteration. We have already
3801 fully propagated its contents. */
3803 FOR_ALL_BB_FN (bb
, cfun
)
3804 bitmap_set_free (NEW_SETS (bb
));
3808 statistics_histogram_event (cfun
, "insert iterations", num_iterations
);
3810 /* AVAIL_OUT is not needed after insertion so we don't have to
3811 propagate NEW_SETS from hoist insertion. */
3812 FOR_ALL_BB_FN (bb
, cfun
)
3814 bitmap_set_pool
.remove (NEW_SETS (bb
));
3815 NEW_SETS (bb
) = NULL
;
3818 /* Insert expressions for hoisting. Do a backward walk here since
3819 inserting into BLOCK exposes new opportunities in its predecessors.
3820 Since PRE and hoist insertions can cause back-to-back iteration
3821 and we are interested in PRE insertion exposed hoisting opportunities
3822 but not in hoisting exposed PRE ones do hoist insertion only after
3823 PRE insertion iteration finished and do not iterate it. */
3824 if (flag_code_hoisting
)
3825 for (int idx
= rpo_num
- 1; idx
>= 0; --idx
)
3827 basic_block block
= BASIC_BLOCK_FOR_FN (cfun
, rpo
[idx
]);
3828 if (EDGE_COUNT (block
->succs
) >= 2)
3829 changed
|= do_hoist_insertion (block
);
3836 /* Compute the AVAIL set for all basic blocks.
3838 This function performs value numbering of the statements in each basic
3839 block. The AVAIL sets are built from information we glean while doing
3840 this value numbering, since the AVAIL sets contain only one entry per
3843 AVAIL_IN[BLOCK] = AVAIL_OUT[dom(BLOCK)].
3844 AVAIL_OUT[BLOCK] = AVAIL_IN[BLOCK] U PHI_GEN[BLOCK] U TMP_GEN[BLOCK]. */
3847 compute_avail (void)
3850 basic_block block
, son
;
3851 basic_block
*worklist
;
3856 /* We pretend that default definitions are defined in the entry block.
3857 This includes function arguments and the static chain decl. */
3858 FOR_EACH_SSA_NAME (i
, name
, cfun
)
3861 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
3862 || has_zero_uses (name
)
3863 || virtual_operand_p (name
))
3866 e
= get_or_alloc_expr_for_name (name
);
3867 add_to_value (get_expr_value_id (e
), e
);
3868 bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), e
);
3869 bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
3873 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3875 print_bitmap_set (dump_file
, TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
3876 "tmp_gen", ENTRY_BLOCK
);
3877 print_bitmap_set (dump_file
, AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
3878 "avail_out", ENTRY_BLOCK
);
3881 /* Allocate the worklist. */
3882 worklist
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (cfun
));
3884 /* Seed the algorithm by putting the dominator children of the entry
3885 block on the worklist. */
3886 for (son
= first_dom_son (CDI_DOMINATORS
, ENTRY_BLOCK_PTR_FOR_FN (cfun
));
3888 son
= next_dom_son (CDI_DOMINATORS
, son
))
3889 worklist
[sp
++] = son
;
3891 BB_LIVE_VOP_ON_EXIT (ENTRY_BLOCK_PTR_FOR_FN (cfun
))
3892 = ssa_default_def (cfun
, gimple_vop (cfun
));
3894 /* Loop until the worklist is empty. */
3900 /* Pick a block from the worklist. */
3901 block
= worklist
[--sp
];
3902 vn_context_bb
= block
;
3904 /* Initially, the set of available values in BLOCK is that of
3905 its immediate dominator. */
3906 dom
= get_immediate_dominator (CDI_DOMINATORS
, block
);
3909 bitmap_set_copy (AVAIL_OUT (block
), AVAIL_OUT (dom
));
3910 BB_LIVE_VOP_ON_EXIT (block
) = BB_LIVE_VOP_ON_EXIT (dom
);
3913 /* Generate values for PHI nodes. */
3914 for (gphi_iterator gsi
= gsi_start_phis (block
); !gsi_end_p (gsi
);
3917 tree result
= gimple_phi_result (gsi
.phi ());
3919 /* We have no need for virtual phis, as they don't represent
3920 actual computations. */
3921 if (virtual_operand_p (result
))
3923 BB_LIVE_VOP_ON_EXIT (block
) = result
;
3927 pre_expr e
= get_or_alloc_expr_for_name (result
);
3928 add_to_value (get_expr_value_id (e
), e
);
3929 bitmap_value_insert_into_set (AVAIL_OUT (block
), e
);
3930 bitmap_insert_into_set (PHI_GEN (block
), e
);
3933 BB_MAY_NOTRETURN (block
) = 0;
3935 /* Now compute value numbers and populate value sets with all
3936 the expressions computed in BLOCK. */
3937 for (gimple_stmt_iterator gsi
= gsi_start_bb (block
); !gsi_end_p (gsi
);
3943 stmt
= gsi_stmt (gsi
);
3945 /* Cache whether the basic-block has any non-visible side-effect
3947 If this isn't a call or it is the last stmt in the
3948 basic-block then the CFG represents things correctly. */
3949 if (is_gimple_call (stmt
) && !stmt_ends_bb_p (stmt
))
3951 /* Non-looping const functions always return normally.
3952 Otherwise the call might not return or have side-effects
3953 that forbids hoisting possibly trapping expressions
3955 int flags
= gimple_call_flags (stmt
);
3956 if (!(flags
& ECF_CONST
)
3957 || (flags
& ECF_LOOPING_CONST_OR_PURE
))
3958 BB_MAY_NOTRETURN (block
) = 1;
3961 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_DEF
)
3963 pre_expr e
= get_or_alloc_expr_for_name (op
);
3965 add_to_value (get_expr_value_id (e
), e
);
3966 bitmap_insert_into_set (TMP_GEN (block
), e
);
3967 bitmap_value_insert_into_set (AVAIL_OUT (block
), e
);
3970 if (gimple_vdef (stmt
))
3971 BB_LIVE_VOP_ON_EXIT (block
) = gimple_vdef (stmt
);
3973 if (gimple_has_side_effects (stmt
)
3974 || stmt_could_throw_p (cfun
, stmt
)
3975 || is_gimple_debug (stmt
))
3978 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
3980 if (ssa_undefined_value_p (op
))
3982 pre_expr e
= get_or_alloc_expr_for_name (op
);
3983 bitmap_value_insert_into_set (EXP_GEN (block
), e
);
3986 switch (gimple_code (stmt
))
3994 vn_reference_s ref1
;
3995 pre_expr result
= NULL
;
3997 /* We can value number only calls to real functions. */
3998 if (gimple_call_internal_p (stmt
))
4001 vn_reference_lookup_call (as_a
<gcall
*> (stmt
), &ref
, &ref1
);
4005 /* If the value of the call is not invalidated in
4006 this block until it is computed, add the expression
4008 if (!gimple_vuse (stmt
)
4010 (SSA_NAME_DEF_STMT (gimple_vuse (stmt
))) == GIMPLE_PHI
4011 || gimple_bb (SSA_NAME_DEF_STMT
4012 (gimple_vuse (stmt
))) != block
)
4014 result
= get_or_alloc_expr_for_reference
4015 (ref
, gimple_location (stmt
));
4016 add_to_value (get_expr_value_id (result
), result
);
4017 bitmap_value_insert_into_set (EXP_GEN (block
), result
);
4024 pre_expr result
= NULL
;
4025 switch (vn_get_stmt_kind (stmt
))
4029 enum tree_code code
= gimple_assign_rhs_code (stmt
);
4032 /* COND_EXPR and VEC_COND_EXPR are awkward in
4033 that they contain an embedded complex expression.
4034 Don't even try to shove those through PRE. */
4035 if (code
== COND_EXPR
4036 || code
== VEC_COND_EXPR
)
4039 vn_nary_op_lookup_stmt (stmt
, &nary
);
4040 if (!nary
|| nary
->predicated_values
)
4043 /* If the NARY traps and there was a preceding
4044 point in the block that might not return avoid
4045 adding the nary to EXP_GEN. */
4046 if (BB_MAY_NOTRETURN (block
)
4047 && vn_nary_may_trap (nary
))
4050 result
= get_or_alloc_expr_for_nary
4051 (nary
, gimple_location (stmt
));
4057 tree rhs1
= gimple_assign_rhs1 (stmt
);
4059 ao_ref_init (&rhs1_ref
, rhs1
);
4060 alias_set_type set
= ao_ref_alias_set (&rhs1_ref
);
4061 alias_set_type base_set
4062 = ao_ref_base_alias_set (&rhs1_ref
);
4063 vec
<vn_reference_op_s
> operands
4064 = vn_reference_operands_for_lookup (rhs1
);
4066 vn_reference_lookup_pieces (gimple_vuse (stmt
), set
,
4067 base_set
, TREE_TYPE (rhs1
),
4068 operands
, &ref
, VN_WALK
);
4071 operands
.release ();
4075 /* If the REFERENCE traps and there was a preceding
4076 point in the block that might not return avoid
4077 adding the reference to EXP_GEN. */
4078 if (BB_MAY_NOTRETURN (block
)
4079 && vn_reference_may_trap (ref
))
4081 operands
.release ();
4085 /* If the value of the reference is not invalidated in
4086 this block until it is computed, add the expression
4088 if (gimple_vuse (stmt
))
4092 def_stmt
= SSA_NAME_DEF_STMT (gimple_vuse (stmt
));
4093 while (!gimple_nop_p (def_stmt
)
4094 && gimple_code (def_stmt
) != GIMPLE_PHI
4095 && gimple_bb (def_stmt
) == block
)
4097 if (stmt_may_clobber_ref_p
4098 (def_stmt
, gimple_assign_rhs1 (stmt
)))
4104 = SSA_NAME_DEF_STMT (gimple_vuse (def_stmt
));
4108 operands
.release ();
4113 /* If the load was value-numbered to another
4114 load make sure we do not use its expression
4115 for insertion if it wouldn't be a valid
4117 /* At the momemt we have a testcase
4118 for hoist insertion of aligned vs. misaligned
4119 variants in gcc.dg/torture/pr65270-1.c thus
4120 with just alignment to be considered we can
4121 simply replace the expression in the hashtable
4122 with the most conservative one. */
4123 vn_reference_op_t ref1
= &ref
->operands
.last ();
4124 while (ref1
->opcode
!= TARGET_MEM_REF
4125 && ref1
->opcode
!= MEM_REF
4126 && ref1
!= &ref
->operands
[0])
4128 vn_reference_op_t ref2
= &operands
.last ();
4129 while (ref2
->opcode
!= TARGET_MEM_REF
4130 && ref2
->opcode
!= MEM_REF
4131 && ref2
!= &operands
[0])
4133 if ((ref1
->opcode
== TARGET_MEM_REF
4134 || ref1
->opcode
== MEM_REF
)
4135 && (TYPE_ALIGN (ref1
->type
)
4136 > TYPE_ALIGN (ref2
->type
)))
4138 = build_aligned_type (ref1
->type
,
4139 TYPE_ALIGN (ref2
->type
));
4140 /* TBAA behavior is an obvious part so make sure
4141 that the hashtable one covers this as well
4142 by adjusting the ref alias set and its base. */
4144 || alias_set_subset_of (set
, ref
->set
))
4146 else if (alias_set_subset_of (ref
->set
, set
))
4149 if (ref1
->opcode
== MEM_REF
)
4151 = wide_int_to_tree (TREE_TYPE (ref2
->op0
),
4152 wi::to_wide (ref1
->op0
));
4155 = wide_int_to_tree (TREE_TYPE (ref2
->op2
),
4156 wi::to_wide (ref1
->op2
));
4161 if (ref1
->opcode
== MEM_REF
)
4163 = wide_int_to_tree (ptr_type_node
,
4164 wi::to_wide (ref1
->op0
));
4167 = wide_int_to_tree (ptr_type_node
,
4168 wi::to_wide (ref1
->op2
));
4170 operands
.release ();
4172 result
= get_or_alloc_expr_for_reference
4173 (ref
, gimple_location (stmt
));
4181 add_to_value (get_expr_value_id (result
), result
);
4182 bitmap_value_insert_into_set (EXP_GEN (block
), result
);
4190 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4192 print_bitmap_set (dump_file
, EXP_GEN (block
),
4193 "exp_gen", block
->index
);
4194 print_bitmap_set (dump_file
, PHI_GEN (block
),
4195 "phi_gen", block
->index
);
4196 print_bitmap_set (dump_file
, TMP_GEN (block
),
4197 "tmp_gen", block
->index
);
4198 print_bitmap_set (dump_file
, AVAIL_OUT (block
),
4199 "avail_out", block
->index
);
4202 /* Put the dominator children of BLOCK on the worklist of blocks
4203 to compute available sets for. */
4204 for (son
= first_dom_son (CDI_DOMINATORS
, block
);
4206 son
= next_dom_son (CDI_DOMINATORS
, son
))
4207 worklist
[sp
++] = son
;
4209 vn_context_bb
= NULL
;
4215 /* Initialize data structures used by PRE. */
4222 next_expression_id
= 1;
4223 expressions
.create (0);
4224 expressions
.safe_push (NULL
);
4225 value_expressions
.create (get_max_value_id () + 1);
4226 value_expressions
.quick_grow_cleared (get_max_value_id () + 1);
4227 constant_value_expressions
.create (get_max_constant_value_id () + 1);
4228 constant_value_expressions
.quick_grow_cleared (get_max_constant_value_id () + 1);
4229 name_to_id
.create (0);
4231 inserted_exprs
= BITMAP_ALLOC (NULL
);
4233 connect_infinite_loops_to_exit ();
4234 memset (&pre_stats
, 0, sizeof (pre_stats
));
4236 alloc_aux_for_blocks (sizeof (struct bb_bitmap_sets
));
4238 calculate_dominance_info (CDI_DOMINATORS
);
4240 bitmap_obstack_initialize (&grand_bitmap_obstack
);
4241 expression_to_id
= new hash_table
<pre_expr_d
> (num_ssa_names
* 3);
4242 FOR_ALL_BB_FN (bb
, cfun
)
4244 EXP_GEN (bb
) = bitmap_set_new ();
4245 PHI_GEN (bb
) = bitmap_set_new ();
4246 TMP_GEN (bb
) = bitmap_set_new ();
4247 AVAIL_OUT (bb
) = bitmap_set_new ();
4248 PHI_TRANS_TABLE (bb
) = NULL
;
4253 /* Deallocate data structures used by PRE. */
4258 value_expressions
.release ();
4259 constant_value_expressions
.release ();
4260 expressions
.release ();
4261 BITMAP_FREE (inserted_exprs
);
4262 bitmap_obstack_release (&grand_bitmap_obstack
);
4263 bitmap_set_pool
.release ();
4264 pre_expr_pool
.release ();
4265 delete expression_to_id
;
4266 expression_to_id
= NULL
;
4267 name_to_id
.release ();
4270 FOR_ALL_BB_FN (bb
, cfun
)
4271 if (bb
->aux
&& PHI_TRANS_TABLE (bb
))
4272 delete PHI_TRANS_TABLE (bb
);
4273 free_aux_for_blocks ();
4278 const pass_data pass_data_pre
=
4280 GIMPLE_PASS
, /* type */
4282 OPTGROUP_NONE
, /* optinfo_flags */
4283 TV_TREE_PRE
, /* tv_id */
4284 ( PROP_cfg
| PROP_ssa
), /* properties_required */
4285 0, /* properties_provided */
4286 0, /* properties_destroyed */
4287 TODO_rebuild_alias
, /* todo_flags_start */
4288 0, /* todo_flags_finish */
4291 class pass_pre
: public gimple_opt_pass
4294 pass_pre (gcc::context
*ctxt
)
4295 : gimple_opt_pass (pass_data_pre
, ctxt
)
4298 /* opt_pass methods: */
4299 virtual bool gate (function
*)
4300 { return flag_tree_pre
!= 0 || flag_code_hoisting
!= 0; }
4301 virtual unsigned int execute (function
*);
4303 }; // class pass_pre
4305 /* Valueization hook for RPO VN when we are calling back to it
4306 at ANTIC compute time. */
4309 pre_valueize (tree name
)
4311 if (TREE_CODE (name
) == SSA_NAME
)
4313 tree tem
= VN_INFO (name
)->valnum
;
4314 if (tem
!= VN_TOP
&& tem
!= name
)
4316 if (TREE_CODE (tem
) != SSA_NAME
4317 || SSA_NAME_IS_DEFAULT_DEF (tem
))
4319 /* We create temporary SSA names for representatives that
4320 do not have a definition (yet) but are not default defs either
4321 assume they are fine to use. */
4322 basic_block def_bb
= gimple_bb (SSA_NAME_DEF_STMT (tem
));
4324 || dominated_by_p (CDI_DOMINATORS
, vn_context_bb
, def_bb
))
4326 /* ??? Now we could look for a leader. Ideally we'd somehow
4327 expose RPO VN leaders and get rid of AVAIL_OUT as well... */
4334 pass_pre::execute (function
*fun
)
4336 unsigned int todo
= 0;
4338 do_partial_partial
=
4339 flag_tree_partial_pre
&& optimize_function_for_speed_p (fun
);
4341 /* This has to happen before VN runs because
4342 loop_optimizer_init may create new phis, etc. */
4343 loop_optimizer_init (LOOPS_NORMAL
);
4344 split_edges_for_insertion ();
4346 calculate_dominance_info (CDI_DOMINATORS
);
4348 run_rpo_vn (VN_WALK
);
4352 vn_valueize
= pre_valueize
;
4354 /* Insert can get quite slow on an incredibly large number of basic
4355 blocks due to some quadratic behavior. Until this behavior is
4356 fixed, don't run it when he have an incredibly large number of
4357 bb's. If we aren't going to run insert, there is no point in
4358 computing ANTIC, either, even though it's plenty fast nor do
4359 we require AVAIL. */
4360 if (n_basic_blocks_for_fn (fun
) < 4000)
4367 /* Make sure to remove fake edges before committing our inserts.
4368 This makes sure we don't end up with extra critical edges that
4369 we would need to split. */
4370 remove_fake_exit_edges ();
4371 gsi_commit_edge_inserts ();
4373 /* Eliminate folds statements which might (should not...) end up
4374 not keeping virtual operands up-to-date. */
4375 gcc_assert (!need_ssa_update_p (fun
));
4377 statistics_counter_event (fun
, "Insertions", pre_stats
.insertions
);
4378 statistics_counter_event (fun
, "PA inserted", pre_stats
.pa_insert
);
4379 statistics_counter_event (fun
, "HOIST inserted", pre_stats
.hoist_insert
);
4380 statistics_counter_event (fun
, "New PHIs", pre_stats
.phis
);
4382 todo
|= eliminate_with_rpo_vn (inserted_exprs
);
4386 /* Because we don't follow exactly the standard PRE algorithm, and decide not
4387 to insert PHI nodes sometimes, and because value numbering of casts isn't
4388 perfect, we sometimes end up inserting dead code. This simple DCE-like
4389 pass removes any insertions we made that weren't actually used. */
4390 simple_dce_from_worklist (inserted_exprs
);
4395 loop_optimizer_finalize ();
4397 /* TODO: tail_merge_optimize may merge all predecessors of a block, in which
4398 case we can merge the block with the remaining predecessor of the block.
4400 - call merge_blocks after each tail merge iteration
4401 - call merge_blocks after all tail merge iterations
4402 - mark TODO_cleanup_cfg when necessary
4403 - share the cfg cleanup with fini_pre. */
4404 todo
|= tail_merge_optimize (todo
);
4408 /* Tail merging invalidates the virtual SSA web, together with
4409 cfg-cleanup opportunities exposed by PRE this will wreck the
4410 SSA updating machinery. So make sure to run update-ssa
4411 manually, before eventually scheduling cfg-cleanup as part of
4413 update_ssa (TODO_update_ssa_only_virtuals
);
4421 make_pass_pre (gcc::context
*ctxt
)
4423 return new pass_pre (ctxt
);