1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
38 #include "tree-pass.h"
43 /* TODO: Support for predicated code motion. I.e.
54 Where COND and INV are is invariants, but evaluating INV may trap or be
55 invalid from some other reason if !COND. This may be transformed to
65 /* A type for the list of statements that have to be moved in order to be able
66 to hoist an invariant computation. */
74 /* The auxiliary data kept for each statement. */
78 struct loop
*max_loop
; /* The outermost loop in that the statement
81 struct loop
*tgt_loop
; /* The loop out of that we want to move the
84 struct loop
*always_executed_in
;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
89 bool sm_done
; /* True iff the store motion for a memory
90 reference in the statement has already
93 unsigned cost
; /* Cost of the computation performed by the
96 struct depend
*depends
; /* List of statements that must be also hoisted
97 out of the loop when this statement is
98 hoisted; i.e. those that define the operands
99 of the statement and are inside of the
103 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
105 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
107 /* Description of a memory reference location for store motion. */
111 tree
*ref
; /* The reference itself. */
112 tree stmt
; /* The statement in that it occurs. */
113 struct mem_ref_loc
*next
; /* Next use in the chain. */
116 /* Description of a memory reference for store motion. */
120 tree mem
; /* The memory itself. */
121 hashval_t hash
; /* Its hash value. */
122 bool is_stored
; /* True if there is a store to the location
124 struct mem_ref_loc
*locs
; /* The locations where it is found. */
125 bitmap vops
; /* Vops corresponding to this memory
127 struct mem_ref
*next
; /* Next memory reference in the list.
128 Memory references are stored in a hash
129 table, but the hash function depends
130 on values of pointers. Thus we cannot use
131 htab_traverse, since then we would get
132 miscompares during bootstrap (although the
133 produced code would be correct). */
136 /* Minimum cost of an expensive expression. */
137 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
139 /* The outermost loop for that execution of the header guarantees that the
140 block will be executed. */
141 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
143 /* Calls CBCK for each index in memory reference ADDR_P. There are two
144 kinds situations handled; in each of these cases, the memory reference
145 and DATA are passed to the callback:
147 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
148 pass the pointer to the index to the callback.
150 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
151 pointer to addr to the callback.
153 If the callback returns false, the whole search stops and false is returned.
154 Otherwise the function returns true after traversing through the whole
155 reference *ADDR_P. */
158 for_each_index (tree
*addr_p
, bool (*cbck
) (tree
, tree
*, void *), void *data
)
162 for (; ; addr_p
= nxt
)
164 switch (TREE_CODE (*addr_p
))
167 return cbck (*addr_p
, addr_p
, data
);
169 case MISALIGNED_INDIRECT_REF
:
170 case ALIGN_INDIRECT_REF
:
172 nxt
= &TREE_OPERAND (*addr_p
, 0);
173 return cbck (*addr_p
, nxt
, data
);
176 case VIEW_CONVERT_EXPR
:
177 case ARRAY_RANGE_REF
:
180 nxt
= &TREE_OPERAND (*addr_p
, 0);
184 /* If the component has varying offset, it behaves like index
186 idx
= &TREE_OPERAND (*addr_p
, 2);
188 && !cbck (*addr_p
, idx
, data
))
191 nxt
= &TREE_OPERAND (*addr_p
, 0);
195 nxt
= &TREE_OPERAND (*addr_p
, 0);
196 if (!cbck (*addr_p
, &TREE_OPERAND (*addr_p
, 1), data
))
209 idx
= &TMR_BASE (*addr_p
);
211 && !cbck (*addr_p
, idx
, data
))
213 idx
= &TMR_INDEX (*addr_p
);
215 && !cbck (*addr_p
, idx
, data
))
225 /* If it is possible to hoist the statement STMT unconditionally,
226 returns MOVE_POSSIBLE.
227 If it is possible to hoist the statement STMT, but we must avoid making
228 it executed if it would not be executed in the original program (e.g.
229 because it may trap), return MOVE_PRESERVE_EXECUTION.
230 Otherwise return MOVE_IMPOSSIBLE. */
233 movement_possibility (tree stmt
)
237 if (flag_unswitch_loops
238 && TREE_CODE (stmt
) == COND_EXPR
)
240 /* If we perform unswitching, force the operands of the invariant
241 condition to be moved out of the loop. */
242 return MOVE_POSSIBLE
;
245 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
246 return MOVE_IMPOSSIBLE
;
248 if (stmt_ends_bb_p (stmt
))
249 return MOVE_IMPOSSIBLE
;
251 if (stmt_ann (stmt
)->has_volatile_ops
)
252 return MOVE_IMPOSSIBLE
;
254 lhs
= TREE_OPERAND (stmt
, 0);
255 if (TREE_CODE (lhs
) == SSA_NAME
256 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
257 return MOVE_IMPOSSIBLE
;
259 rhs
= TREE_OPERAND (stmt
, 1);
261 if (TREE_SIDE_EFFECTS (rhs
))
262 return MOVE_IMPOSSIBLE
;
264 if (TREE_CODE (lhs
) != SSA_NAME
265 || tree_could_trap_p (rhs
))
266 return MOVE_PRESERVE_EXECUTION
;
268 if (get_call_expr_in (stmt
))
270 /* While pure or const call is guaranteed to have no side effects, we
271 cannot move it arbitrarily. Consider code like
273 char *s = something ();
283 Here the strlen call cannot be moved out of the loop, even though
284 s is invariant. In addition to possibly creating a call with
285 invalid arguments, moving out a function call that is not executed
286 may cause performance regressions in case the call is costly and
287 not executed at all. */
288 return MOVE_PRESERVE_EXECUTION
;
290 return MOVE_POSSIBLE
;
293 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
294 loop to that we could move the expression using DEF if it did not have
295 other operands, i.e. the outermost loop enclosing LOOP in that the value
296 of DEF is invariant. */
299 outermost_invariant_loop (tree def
, struct loop
*loop
)
303 struct loop
*max_loop
;
305 if (TREE_CODE (def
) != SSA_NAME
)
306 return superloop_at_depth (loop
, 1);
308 def_stmt
= SSA_NAME_DEF_STMT (def
);
309 def_bb
= bb_for_stmt (def_stmt
);
311 return superloop_at_depth (loop
, 1);
313 max_loop
= find_common_loop (loop
, def_bb
->loop_father
);
315 if (LIM_DATA (def_stmt
) && LIM_DATA (def_stmt
)->max_loop
)
316 max_loop
= find_common_loop (max_loop
,
317 LIM_DATA (def_stmt
)->max_loop
->outer
);
318 if (max_loop
== loop
)
320 max_loop
= superloop_at_depth (loop
, max_loop
->depth
+ 1);
325 /* Returns the outermost superloop of LOOP in that the expression EXPR is
329 outermost_invariant_loop_expr (tree expr
, struct loop
*loop
)
331 enum tree_code_class
class = TREE_CODE_CLASS (TREE_CODE (expr
));
333 struct loop
*max_loop
= superloop_at_depth (loop
, 1), *aloop
;
335 if (TREE_CODE (expr
) == SSA_NAME
336 || TREE_CODE (expr
) == INTEGER_CST
337 || is_gimple_min_invariant (expr
))
338 return outermost_invariant_loop (expr
, loop
);
340 if (class != tcc_unary
341 && class != tcc_binary
342 && class != tcc_expression
343 && class != tcc_comparison
)
346 nops
= TREE_CODE_LENGTH (TREE_CODE (expr
));
347 for (i
= 0; i
< nops
; i
++)
349 aloop
= outermost_invariant_loop_expr (TREE_OPERAND (expr
, i
), loop
);
353 if (flow_loop_nested_p (max_loop
, aloop
))
360 /* DATA is a structure containing information associated with a statement
361 inside LOOP. DEF is one of the operands of this statement.
363 Find the outermost loop enclosing LOOP in that value of DEF is invariant
364 and record this in DATA->max_loop field. If DEF itself is defined inside
365 this loop as well (i.e. we need to hoist it out of the loop if we want
366 to hoist the statement represented by DATA), record the statement in that
367 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
368 add the cost of the computation of DEF to the DATA->cost.
370 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
373 add_dependency (tree def
, struct lim_aux_data
*data
, struct loop
*loop
,
376 tree def_stmt
= SSA_NAME_DEF_STMT (def
);
377 basic_block def_bb
= bb_for_stmt (def_stmt
);
378 struct loop
*max_loop
;
384 max_loop
= outermost_invariant_loop (def
, loop
);
388 if (flow_loop_nested_p (data
->max_loop
, max_loop
))
389 data
->max_loop
= max_loop
;
391 if (!LIM_DATA (def_stmt
))
395 /* Only add the cost if the statement defining DEF is inside LOOP,
396 i.e. if it is likely that by moving the invariants dependent
397 on it, we will be able to avoid creating a new register for
398 it (since it will be only used in these dependent invariants). */
399 && def_bb
->loop_father
== loop
)
400 data
->cost
+= LIM_DATA (def_stmt
)->cost
;
402 dep
= xmalloc (sizeof (struct depend
));
403 dep
->stmt
= def_stmt
;
404 dep
->next
= data
->depends
;
410 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
411 are just ad-hoc constants. The estimates should be based on target-specific
415 stmt_cost (tree stmt
)
420 /* Always try to create possibilities for unswitching. */
421 if (TREE_CODE (stmt
) == COND_EXPR
)
422 return LIM_EXPENSIVE
;
424 rhs
= TREE_OPERAND (stmt
, 1);
426 /* Hoisting memory references out should almost surely be a win. */
427 if (stmt_references_memory_p (stmt
))
430 switch (TREE_CODE (rhs
))
433 /* We should be hoisting calls if possible. */
435 /* Unless the call is a builtin_constant_p; this always folds to a
436 constant, so moving it is useless. */
437 rhs
= get_callee_fndecl (rhs
);
438 if (DECL_BUILT_IN_CLASS (rhs
) == BUILT_IN_NORMAL
439 && DECL_FUNCTION_CODE (rhs
) == BUILT_IN_CONSTANT_P
)
456 /* Division and multiplication are usually expensive. */
467 /* Determine the outermost loop to that it is possible to hoist a statement
468 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
469 the outermost loop in that the value computed by STMT is invariant.
470 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
471 we preserve the fact whether STMT is executed. It also fills other related
472 information to LIM_DATA (STMT).
474 The function returns false if STMT cannot be hoisted outside of the loop it
475 is defined in, and true otherwise. */
478 determine_max_movement (tree stmt
, bool must_preserve_exec
)
480 basic_block bb
= bb_for_stmt (stmt
);
481 struct loop
*loop
= bb
->loop_father
;
483 struct lim_aux_data
*lim_data
= LIM_DATA (stmt
);
487 if (must_preserve_exec
)
488 level
= ALWAYS_EXECUTED_IN (bb
);
490 level
= superloop_at_depth (loop
, 1);
491 lim_data
->max_loop
= level
;
493 FOR_EACH_SSA_TREE_OPERAND (val
, stmt
, iter
, SSA_OP_USE
)
494 if (!add_dependency (val
, lim_data
, loop
, true))
497 FOR_EACH_SSA_TREE_OPERAND (val
, stmt
, iter
, SSA_OP_VIRTUAL_USES
| SSA_OP_VIRTUAL_KILLS
)
498 if (!add_dependency (val
, lim_data
, loop
, false))
501 lim_data
->cost
+= stmt_cost (stmt
);
506 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
507 and that one of the operands of this statement is computed by STMT.
508 Ensure that STMT (together with all the statements that define its
509 operands) is hoisted at least out of the loop LEVEL. */
512 set_level (tree stmt
, struct loop
*orig_loop
, struct loop
*level
)
514 struct loop
*stmt_loop
= bb_for_stmt (stmt
)->loop_father
;
517 stmt_loop
= find_common_loop (orig_loop
, stmt_loop
);
518 if (LIM_DATA (stmt
) && LIM_DATA (stmt
)->tgt_loop
)
519 stmt_loop
= find_common_loop (stmt_loop
,
520 LIM_DATA (stmt
)->tgt_loop
->outer
);
521 if (flow_loop_nested_p (stmt_loop
, level
))
524 gcc_assert (LIM_DATA (stmt
));
525 gcc_assert (level
== LIM_DATA (stmt
)->max_loop
526 || flow_loop_nested_p (LIM_DATA (stmt
)->max_loop
, level
));
528 LIM_DATA (stmt
)->tgt_loop
= level
;
529 for (dep
= LIM_DATA (stmt
)->depends
; dep
; dep
= dep
->next
)
530 set_level (dep
->stmt
, orig_loop
, level
);
533 /* Determines an outermost loop from that we want to hoist the statement STMT.
534 For now we chose the outermost possible loop. TODO -- use profiling
535 information to set it more sanely. */
538 set_profitable_level (tree stmt
)
540 set_level (stmt
, bb_for_stmt (stmt
)->loop_father
, LIM_DATA (stmt
)->max_loop
);
543 /* Returns true if STMT is not a pure call. */
546 nonpure_call_p (tree stmt
)
548 tree call
= get_call_expr_in (stmt
);
553 return TREE_SIDE_EFFECTS (call
) != 0;
556 /* Releases the memory occupied by DATA. */
559 free_lim_aux_data (struct lim_aux_data
*data
)
561 struct depend
*dep
, *next
;
563 for (dep
= data
->depends
; dep
; dep
= next
)
571 /* Determine the outermost loops in that statements in basic block BB are
572 invariant, and record them to the LIM_DATA associated with the statements.
573 Callback for walk_dominator_tree. */
576 determine_invariantness_stmt (struct dom_walk_data
*dw_data ATTRIBUTE_UNUSED
,
580 block_stmt_iterator bsi
;
582 bool maybe_never
= ALWAYS_EXECUTED_IN (bb
) == NULL
;
583 struct loop
*outermost
= ALWAYS_EXECUTED_IN (bb
);
585 if (!bb
->loop_father
->outer
)
588 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
589 fprintf (dump_file
, "Basic block %d (loop %d -- depth %d):\n\n",
590 bb
->index
, bb
->loop_father
->num
, bb
->loop_father
->depth
);
592 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
594 stmt
= bsi_stmt (bsi
);
596 pos
= movement_possibility (stmt
);
597 if (pos
== MOVE_IMPOSSIBLE
)
599 if (nonpure_call_p (stmt
))
607 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
608 to be hoisted out of loop, saving expensive divide. */
609 if (pos
== MOVE_POSSIBLE
610 && (rhs
= TREE_OPERAND (stmt
, 1)) != NULL
611 && TREE_CODE (rhs
) == RDIV_EXPR
612 && flag_unsafe_math_optimizations
613 && outermost_invariant_loop_expr (TREE_OPERAND (rhs
, 1),
614 loop_containing_stmt (stmt
)) != NULL
615 && outermost_invariant_loop_expr (rhs
,
616 loop_containing_stmt (stmt
)) == NULL
)
618 tree lhs
, stmt1
, stmt2
, var
, name
;
620 lhs
= TREE_OPERAND (stmt
, 0);
622 /* stmt must be MODIFY_EXPR. */
623 var
= create_tmp_var (TREE_TYPE (rhs
), "reciptmp");
624 add_referenced_tmp_var (var
);
626 stmt1
= build2 (MODIFY_EXPR
, void_type_node
, var
,
627 build2 (RDIV_EXPR
, TREE_TYPE (rhs
),
628 build_real (TREE_TYPE (rhs
), dconst1
),
629 TREE_OPERAND (rhs
, 1)));
630 name
= make_ssa_name (var
, stmt1
);
631 TREE_OPERAND (stmt1
, 0) = name
;
632 stmt2
= build2 (MODIFY_EXPR
, void_type_node
, lhs
,
633 build2 (MULT_EXPR
, TREE_TYPE (rhs
),
634 name
, TREE_OPERAND (rhs
, 0)));
636 /* Replace division stmt with reciprocal and multiply stmts.
637 The multiply stmt is not invariant, so update iterator
638 and avoid rescanning. */
639 bsi_replace (&bsi
, stmt1
, true);
640 bsi_insert_after (&bsi
, stmt2
, BSI_NEW_STMT
);
641 SSA_NAME_DEF_STMT (lhs
) = stmt2
;
643 /* Continue processing with invariant reciprocal statement. */
647 stmt_ann (stmt
)->common
.aux
= xcalloc (1, sizeof (struct lim_aux_data
));
648 LIM_DATA (stmt
)->always_executed_in
= outermost
;
650 if (maybe_never
&& pos
== MOVE_PRESERVE_EXECUTION
)
653 if (!determine_max_movement (stmt
, pos
== MOVE_PRESERVE_EXECUTION
))
655 LIM_DATA (stmt
)->max_loop
= NULL
;
659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
661 print_generic_stmt_indented (dump_file
, stmt
, 0, 2);
662 fprintf (dump_file
, " invariant up to level %d, cost %d.\n\n",
663 LIM_DATA (stmt
)->max_loop
->depth
,
664 LIM_DATA (stmt
)->cost
);
667 if (LIM_DATA (stmt
)->cost
>= LIM_EXPENSIVE
)
668 set_profitable_level (stmt
);
672 /* For each statement determines the outermost loop in that it is invariant,
673 statements on whose motion it depends and the cost of the computation.
674 This information is stored to the LIM_DATA structure associated with
678 determine_invariantness (void)
680 struct dom_walk_data walk_data
;
682 memset (&walk_data
, 0, sizeof (struct dom_walk_data
));
683 walk_data
.before_dom_children_before_stmts
= determine_invariantness_stmt
;
685 init_walk_dominator_tree (&walk_data
);
686 walk_dominator_tree (&walk_data
, ENTRY_BLOCK_PTR
);
687 fini_walk_dominator_tree (&walk_data
);
690 /* Commits edge insertions and updates loop structures. */
693 loop_commit_inserts (void)
695 unsigned old_last_basic_block
, i
;
698 old_last_basic_block
= last_basic_block
;
699 bsi_commit_edge_inserts ();
700 for (i
= old_last_basic_block
; i
< (unsigned) last_basic_block
; i
++)
702 bb
= BASIC_BLOCK (i
);
704 find_common_loop (single_pred (bb
)->loop_father
,
705 single_succ (bb
)->loop_father
));
709 /* Hoist the statements in basic block BB out of the loops prescribed by
710 data stored in LIM_DATA structures associated with each statement. Callback
711 for walk_dominator_tree. */
714 move_computations_stmt (struct dom_walk_data
*dw_data ATTRIBUTE_UNUSED
,
718 block_stmt_iterator bsi
;
722 if (!bb
->loop_father
->outer
)
725 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); )
727 stmt
= bsi_stmt (bsi
);
729 if (!LIM_DATA (stmt
))
735 cost
= LIM_DATA (stmt
)->cost
;
736 level
= LIM_DATA (stmt
)->tgt_loop
;
737 free_lim_aux_data (LIM_DATA (stmt
));
738 stmt_ann (stmt
)->common
.aux
= NULL
;
746 /* We do not really want to move conditionals out of the loop; we just
747 placed it here to force its operands to be moved if necessary. */
748 if (TREE_CODE (stmt
) == COND_EXPR
)
751 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
753 fprintf (dump_file
, "Moving statement\n");
754 print_generic_stmt (dump_file
, stmt
, 0);
755 fprintf (dump_file
, "(cost %u) out of loop %d.\n\n",
758 bsi_insert_on_edge (loop_preheader_edge (level
), stmt
);
763 /* Hoist the statements out of the loops prescribed by data stored in
764 LIM_DATA structures associated with each statement.*/
767 move_computations (void)
769 struct dom_walk_data walk_data
;
771 memset (&walk_data
, 0, sizeof (struct dom_walk_data
));
772 walk_data
.before_dom_children_before_stmts
= move_computations_stmt
;
774 init_walk_dominator_tree (&walk_data
);
775 walk_dominator_tree (&walk_data
, ENTRY_BLOCK_PTR
);
776 fini_walk_dominator_tree (&walk_data
);
778 loop_commit_inserts ();
779 if (need_ssa_update_p ())
780 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
783 /* Checks whether the statement defining variable *INDEX can be hoisted
784 out of the loop passed in DATA. Callback for for_each_index. */
787 may_move_till (tree ref
, tree
*index
, void *data
)
789 struct loop
*loop
= data
, *max_loop
;
791 /* If REF is an array reference, check also that the step and the lower
792 bound is invariant in LOOP. */
793 if (TREE_CODE (ref
) == ARRAY_REF
)
795 tree step
= array_ref_element_size (ref
);
796 tree lbound
= array_ref_low_bound (ref
);
798 max_loop
= outermost_invariant_loop_expr (step
, loop
);
802 max_loop
= outermost_invariant_loop_expr (lbound
, loop
);
807 max_loop
= outermost_invariant_loop (*index
, loop
);
814 /* Forces statements defining (invariant) SSA names in expression EXPR to be
815 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
818 force_move_till_expr (tree expr
, struct loop
*orig_loop
, struct loop
*loop
)
820 enum tree_code_class
class = TREE_CODE_CLASS (TREE_CODE (expr
));
823 if (TREE_CODE (expr
) == SSA_NAME
)
825 tree stmt
= SSA_NAME_DEF_STMT (expr
);
826 if (IS_EMPTY_STMT (stmt
))
829 set_level (stmt
, orig_loop
, loop
);
833 if (class != tcc_unary
834 && class != tcc_binary
835 && class != tcc_expression
836 && class != tcc_comparison
)
839 nops
= TREE_CODE_LENGTH (TREE_CODE (expr
));
840 for (i
= 0; i
< nops
; i
++)
841 force_move_till_expr (TREE_OPERAND (expr
, i
), orig_loop
, loop
);
844 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
845 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
851 struct loop
*orig_loop
;
855 force_move_till (tree ref
, tree
*index
, void *data
)
858 struct fmt_data
*fmt_data
= data
;
860 if (TREE_CODE (ref
) == ARRAY_REF
)
862 tree step
= array_ref_element_size (ref
);
863 tree lbound
= array_ref_low_bound (ref
);
865 force_move_till_expr (step
, fmt_data
->orig_loop
, fmt_data
->loop
);
866 force_move_till_expr (lbound
, fmt_data
->orig_loop
, fmt_data
->loop
);
869 if (TREE_CODE (*index
) != SSA_NAME
)
872 stmt
= SSA_NAME_DEF_STMT (*index
);
873 if (IS_EMPTY_STMT (stmt
))
876 set_level (stmt
, fmt_data
->orig_loop
, fmt_data
->loop
);
881 /* Records memory reference location *REF to the list MEM_REFS. The reference
882 occurs in statement STMT. */
885 record_mem_ref_loc (struct mem_ref_loc
**mem_refs
, tree stmt
, tree
*ref
)
887 struct mem_ref_loc
*aref
= xmalloc (sizeof (struct mem_ref_loc
));
892 aref
->next
= *mem_refs
;
896 /* Releases list of memory reference locations MEM_REFS. */
899 free_mem_ref_locs (struct mem_ref_loc
*mem_refs
)
901 struct mem_ref_loc
*act
;
906 mem_refs
= mem_refs
->next
;
911 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
914 rewrite_mem_refs (tree tmp_var
, struct mem_ref_loc
*mem_refs
)
919 for (; mem_refs
; mem_refs
= mem_refs
->next
)
921 FOR_EACH_SSA_TREE_OPERAND (var
, mem_refs
->stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
922 mark_sym_for_renaming (SSA_NAME_VAR (var
));
924 *mem_refs
->ref
= tmp_var
;
925 update_stmt (mem_refs
->stmt
);
929 /* Records request for store motion of memory reference REF from LOOP.
930 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
931 these references are rewritten by a new temporary variable.
932 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
933 The initialization of the temporary variable is put to the preheader
934 of the loop, and assignments to the reference from the temporary variable
935 are emitted to exits. */
938 schedule_sm (struct loop
*loop
, edge
*exits
, unsigned n_exits
, tree ref
,
939 struct mem_ref_loc
*mem_refs
)
941 struct mem_ref_loc
*aref
;
945 struct fmt_data fmt_data
;
947 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
949 fprintf (dump_file
, "Executing store motion of ");
950 print_generic_expr (dump_file
, ref
, 0);
951 fprintf (dump_file
, " from loop %d\n", loop
->num
);
954 tmp_var
= make_rename_temp (TREE_TYPE (ref
), "lsm_tmp");
956 fmt_data
.loop
= loop
;
957 fmt_data
.orig_loop
= loop
;
958 for_each_index (&ref
, force_move_till
, &fmt_data
);
960 rewrite_mem_refs (tmp_var
, mem_refs
);
961 for (aref
= mem_refs
; aref
; aref
= aref
->next
)
962 if (LIM_DATA (aref
->stmt
))
963 LIM_DATA (aref
->stmt
)->sm_done
= true;
965 /* Emit the load & stores. */
966 load
= build (MODIFY_EXPR
, void_type_node
, tmp_var
, ref
);
967 get_stmt_ann (load
)->common
.aux
= xcalloc (1, sizeof (struct lim_aux_data
));
968 LIM_DATA (load
)->max_loop
= loop
;
969 LIM_DATA (load
)->tgt_loop
= loop
;
971 /* Put this into the latch, so that we are sure it will be processed after
973 bsi_insert_on_edge (loop_latch_edge (loop
), load
);
975 for (i
= 0; i
< n_exits
; i
++)
977 store
= build (MODIFY_EXPR
, void_type_node
,
978 unshare_expr (ref
), tmp_var
);
979 bsi_insert_on_edge (exits
[i
], store
);
983 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
984 is true, prepare the statements that load the value of the memory reference
985 to a temporary variable in the loop preheader, store it back on the loop
986 exits, and replace all the references inside LOOP by this temporary variable.
987 LOOP has N_EXITS stored in EXITS. CLOBBERED_VOPS is the bitmap of virtual
988 operands that are clobbered by a call or accessed through multiple references
992 determine_lsm_ref (struct loop
*loop
, edge
*exits
, unsigned n_exits
,
993 bitmap clobbered_vops
, struct mem_ref
*ref
)
995 struct mem_ref_loc
*aref
;
996 struct loop
*must_exec
;
998 /* In case the memory is not stored to, there is nothing for SM to do. */
1002 /* If the reference is aliased with any different ref, or killed by call
1003 in function, then fail. */
1004 if (bitmap_intersect_p (ref
->vops
, clobbered_vops
))
1007 if (tree_could_trap_p (ref
->mem
))
1009 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1010 of the statements in that it occurs is always executed when the loop
1011 is entered. This way we know that by moving the load from the
1012 reference out of the loop we will not cause the error that would not
1015 TODO -- in fact we would like to check for anticipability of the
1016 reference, i.e. that on each path from loop entry to loop exit at
1017 least one of the statements containing the memory reference is
1020 for (aref
= ref
->locs
; aref
; aref
= aref
->next
)
1022 if (!LIM_DATA (aref
->stmt
))
1025 must_exec
= LIM_DATA (aref
->stmt
)->always_executed_in
;
1029 if (must_exec
== loop
1030 || flow_loop_nested_p (must_exec
, loop
))
1038 schedule_sm (loop
, exits
, n_exits
, ref
->mem
, ref
->locs
);
1041 /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1042 of vops clobbered by call in loop or accessed by multiple memory references.
1043 EXITS is the list of N_EXITS exit edges of the LOOP. */
1046 hoist_memory_references (struct loop
*loop
, struct mem_ref
*mem_refs
,
1047 bitmap clobbered_vops
, edge
*exits
, unsigned n_exits
)
1049 struct mem_ref
*ref
;
1051 for (ref
= mem_refs
; ref
; ref
= ref
->next
)
1052 determine_lsm_ref (loop
, exits
, n_exits
, clobbered_vops
, ref
);
1055 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1056 for a store motion optimization (i.e. whether we can insert statement
1060 loop_suitable_for_sm (struct loop
*loop ATTRIBUTE_UNUSED
, edge
*exits
,
1065 for (i
= 0; i
< n_exits
; i
++)
1066 if (exits
[i
]->flags
& EDGE_ABNORMAL
)
1072 /* A hash function for struct mem_ref object OBJ. */
1075 memref_hash (const void *obj
)
1077 const struct mem_ref
*mem
= obj
;
1082 /* An equality function for struct mem_ref object OBJ1 with
1083 memory reference OBJ2. */
1086 memref_eq (const void *obj1
, const void *obj2
)
1088 const struct mem_ref
*mem1
= obj1
;
1090 return operand_equal_p (mem1
->mem
, (tree
) obj2
, 0);
1093 /* Gathers memory references in statement STMT in LOOP, storing the
1094 information about them in MEM_REFS hash table. Note vops accessed through
1095 unrecognized statements in CLOBBERED_VOPS. The newly created references
1096 are also stored to MEM_REF_LIST. */
1099 gather_mem_refs_stmt (struct loop
*loop
, htab_t mem_refs
,
1100 bitmap clobbered_vops
, tree stmt
,
1101 struct mem_ref
**mem_ref_list
)
1103 tree
*lhs
, *rhs
, *mem
= NULL
;
1106 struct mem_ref
*ref
= NULL
;
1111 if (ZERO_SSA_OPERANDS (stmt
, SSA_OP_ALL_VIRTUALS
))
1114 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1115 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1118 lhs
= &TREE_OPERAND (stmt
, 0);
1119 rhs
= &TREE_OPERAND (stmt
, 1);
1121 if (TREE_CODE (*lhs
) == SSA_NAME
)
1123 if (!is_gimple_addressable (*rhs
))
1129 else if (TREE_CODE (*rhs
) == SSA_NAME
1130 || is_gimple_min_invariant (*rhs
))
1138 /* If we cannot create an SSA name for the result, give up. */
1139 if (!is_gimple_reg_type (TREE_TYPE (*mem
))
1140 || TREE_THIS_VOLATILE (*mem
))
1143 /* If we cannot move the reference out of the loop, fail. */
1144 if (!for_each_index (mem
, may_move_till
, loop
))
1147 hash
= iterative_hash_expr (*mem
, 0);
1148 slot
= htab_find_slot_with_hash (mem_refs
, *mem
, hash
, INSERT
);
1154 ref
= xmalloc (sizeof (struct mem_ref
));
1158 ref
->is_stored
= false;
1159 ref
->vops
= BITMAP_ALLOC (NULL
);
1160 ref
->next
= *mem_ref_list
;
1161 *mem_ref_list
= ref
;
1164 ref
->is_stored
|= is_stored
;
1166 FOR_EACH_SSA_TREE_OPERAND (vname
, stmt
, oi
,
1167 SSA_OP_VIRTUAL_USES
| SSA_OP_VIRTUAL_KILLS
)
1169 bitmap_set_bit (ref
->vops
,
1170 var_ann (SSA_NAME_VAR (vname
))->uid
);
1172 record_mem_ref_loc (&ref
->locs
, stmt
, mem
);
1176 FOR_EACH_SSA_TREE_OPERAND (vname
, stmt
, oi
,
1177 SSA_OP_VIRTUAL_USES
| SSA_OP_VIRTUAL_KILLS
)
1179 bitmap_set_bit (clobbered_vops
,
1180 var_ann (SSA_NAME_VAR (vname
))->uid
);
1184 /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1185 statements in CLOBBERED_VOPS. The list of the references found by
1186 the function is returned. */
1188 static struct mem_ref
*
1189 gather_mem_refs (struct loop
*loop
, bitmap clobbered_vops
)
1191 basic_block
*body
= get_loop_body (loop
);
1192 block_stmt_iterator bsi
;
1194 struct mem_ref
*mem_ref_list
= NULL
;
1195 htab_t mem_refs
= htab_create (100, memref_hash
, memref_eq
, NULL
);
1197 for (i
= 0; i
< loop
->num_nodes
; i
++)
1199 for (bsi
= bsi_start (body
[i
]); !bsi_end_p (bsi
); bsi_next (&bsi
))
1200 gather_mem_refs_stmt (loop
, mem_refs
, clobbered_vops
, bsi_stmt (bsi
),
1206 htab_delete (mem_refs
);
1207 return mem_ref_list
;
1210 /* Finds the vops accessed by more than one of the memory references described
1211 in MEM_REFS and marks them in CLOBBERED_VOPS. */
1214 find_more_ref_vops (struct mem_ref
*mem_refs
, bitmap clobbered_vops
)
1216 bitmap_head tmp
, all_vops
;
1217 struct mem_ref
*ref
;
1219 bitmap_initialize (&tmp
, &bitmap_default_obstack
);
1220 bitmap_initialize (&all_vops
, &bitmap_default_obstack
);
1222 for (ref
= mem_refs
; ref
; ref
= ref
->next
)
1224 /* The vops that are already in all_vops are accessed by more than
1225 one memory reference. */
1226 bitmap_and (&tmp
, &all_vops
, ref
->vops
);
1227 bitmap_ior_into (clobbered_vops
, &tmp
);
1228 bitmap_clear (&tmp
);
1230 bitmap_ior_into (&all_vops
, ref
->vops
);
1233 bitmap_clear (&all_vops
);
1236 /* Releases the memory occupied by REF. */
1239 free_mem_ref (struct mem_ref
*ref
)
1241 free_mem_ref_locs (ref
->locs
);
1242 BITMAP_FREE (ref
->vops
);
1246 /* Releases the memory occupied by REFS. */
1249 free_mem_refs (struct mem_ref
*refs
)
1251 struct mem_ref
*ref
, *next
;
1253 for (ref
= refs
; ref
; ref
= next
)
1260 /* Try to perform store motion for all memory references modified inside
1264 determine_lsm_loop (struct loop
*loop
)
1267 edge
*exits
= get_loop_exit_edges (loop
, &n_exits
);
1268 bitmap clobbered_vops
;
1269 struct mem_ref
*mem_refs
;
1271 if (!loop_suitable_for_sm (loop
, exits
, n_exits
))
1277 /* Find the memory references in LOOP. */
1278 clobbered_vops
= BITMAP_ALLOC (NULL
);
1279 mem_refs
= gather_mem_refs (loop
, clobbered_vops
);
1281 /* Find the vops that are used for more than one reference. */
1282 find_more_ref_vops (mem_refs
, clobbered_vops
);
1284 /* Hoist all suitable memory references. */
1285 hoist_memory_references (loop
, mem_refs
, clobbered_vops
, exits
, n_exits
);
1287 free_mem_refs (mem_refs
);
1289 BITMAP_FREE (clobbered_vops
);
1292 /* Try to perform store motion for all memory references modified inside
1296 determine_lsm (struct loops
*loops
)
1300 if (!loops
->tree_root
->inner
)
1303 /* Pass the loops from the outermost and perform the store motion as
1306 loop
= loops
->tree_root
->inner
;
1309 determine_lsm_loop (loop
);
1319 if (loop
== loops
->tree_root
)
1321 loop_commit_inserts ();
1329 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1330 for each such basic block bb records the outermost loop for that execution
1331 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1332 blocks that contain a nonpure call. */
1335 fill_always_executed_in (struct loop
*loop
, sbitmap contains_call
)
1337 basic_block bb
= NULL
, *bbs
, last
= NULL
;
1340 struct loop
*inn_loop
= loop
;
1342 if (!loop
->header
->aux
)
1344 bbs
= get_loop_body_in_dom_order (loop
);
1346 for (i
= 0; i
< loop
->num_nodes
; i
++)
1351 if (dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1354 if (TEST_BIT (contains_call
, bb
->index
))
1357 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1358 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
1363 /* A loop might be infinite (TODO use simple loop analysis
1364 to disprove this if possible). */
1365 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
1368 if (!flow_bb_inside_loop_p (inn_loop
, bb
))
1371 if (bb
->loop_father
->header
== bb
)
1373 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1376 /* In a loop that is always entered we may proceed anyway.
1377 But record that we entered it and stop once we leave it. */
1378 inn_loop
= bb
->loop_father
;
1385 if (last
== loop
->header
)
1387 last
= get_immediate_dominator (CDI_DOMINATORS
, last
);
1393 for (loop
= loop
->inner
; loop
; loop
= loop
->next
)
1394 fill_always_executed_in (loop
, contains_call
);
1397 /* Compute the global information needed by the loop invariant motion pass.
1398 LOOPS is the loop tree. */
1401 tree_ssa_lim_initialize (struct loops
*loops
)
1403 sbitmap contains_call
= sbitmap_alloc (last_basic_block
);
1404 block_stmt_iterator bsi
;
1408 sbitmap_zero (contains_call
);
1411 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1413 if (nonpure_call_p (bsi_stmt (bsi
)))
1417 if (!bsi_end_p (bsi
))
1418 SET_BIT (contains_call
, bb
->index
);
1421 for (loop
= loops
->tree_root
->inner
; loop
; loop
= loop
->next
)
1422 fill_always_executed_in (loop
, contains_call
);
1424 sbitmap_free (contains_call
);
1427 /* Cleans up after the invariant motion pass. */
1430 tree_ssa_lim_finalize (void)
1440 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1441 i.e. those that are likely to be win regardless of the register pressure. */
1444 tree_ssa_lim (struct loops
*loops
)
1446 tree_ssa_lim_initialize (loops
);
1448 /* For each statement determine the outermost loop in that it is
1449 invariant and cost for computing the invariant. */
1450 determine_invariantness ();
1452 /* For each memory reference determine whether it is possible to hoist it
1453 out of the loop. Force the necessary invariants to be moved out of the
1455 determine_lsm (loops
);
1457 /* Move the expressions that are expensive enough. */
1458 move_computations ();
1460 tree_ssa_lim_finalize ();