re PR tree-optimization/19828 (LIM is pulling out a pure function even though there...
[gcc.git] / gcc / tree-ssa-loop-im.c
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "output.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "domwalk.h"
37 #include "params.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40
41 /* TODO: Support for predicated code motion. I.e.
42
43 while (1)
44 {
45 if (cond)
46 {
47 a = inv;
48 something;
49 }
50 }
51
52 Where COND and INV are is invariants, but evaluating INV may trap or be
53 invalid from some other reason if !COND. This may be transformed to
54
55 if (cond)
56 a = inv;
57 while (1)
58 {
59 if (cond)
60 something;
61 } */
62
63 /* A type for the list of statements that have to be moved in order to be able
64 to hoist an invariant computation. */
65
66 struct depend
67 {
68 tree stmt;
69 struct depend *next;
70 };
71
72 /* The auxiliary data kept for each statement. */
73
74 struct lim_aux_data
75 {
76 struct loop *max_loop; /* The outermost loop in that the statement
77 is invariant. */
78
79 struct loop *tgt_loop; /* The loop out of that we want to move the
80 invariant. */
81
82 struct loop *always_executed_in;
83 /* The outermost loop for that we are sure
84 the statement is executed if the loop
85 is entered. */
86
87 bool sm_done; /* True iff the store motion for a memory
88 reference in the statement has already
89 been executed. */
90
91 unsigned cost; /* Cost of the computation performed by the
92 statement. */
93
94 struct depend *depends; /* List of statements that must be also hoisted
95 out of the loop when this statement is
96 hoisted; i.e. those that define the operands
97 of the statement and are inside of the
98 MAX_LOOP loop. */
99 };
100
101 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
102 ? NULL \
103 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
104
105 /* Description of a memory reference for store motion. */
106
107 struct mem_ref
108 {
109 tree *ref; /* The reference itself. */
110 tree stmt; /* The statement in that it occurs. */
111 struct mem_ref *next; /* Next use in the chain. */
112 };
113
114 /* Minimum cost of an expensive expression. */
115 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
116
117 /* The outermost loop for that execution of the header guarantees that the
118 block will be executed. */
119 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
120
121 static unsigned max_stmt_uid; /* Maximal uid of a statement. Uids to phi
122 nodes are assigned using the versions of
123 ssa names they define. */
124
125 /* Returns uid of statement STMT. */
126
127 static unsigned
128 get_stmt_uid (tree stmt)
129 {
130 if (TREE_CODE (stmt) == PHI_NODE)
131 return SSA_NAME_VERSION (PHI_RESULT (stmt)) + max_stmt_uid;
132
133 return stmt_ann (stmt)->uid;
134 }
135
136 /* Calls CBCK for each index in memory reference ADDR_P. There are two
137 kinds situations handled; in each of these cases, the memory reference
138 and DATA are passed to the callback:
139
140 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
141 pass the pointer to the index to the callback.
142
143 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
144 pointer to addr to the callback.
145
146 If the callback returns false, the whole search stops and false is returned.
147 Otherwise the function returns true after traversing through the whole
148 reference *ADDR_P. */
149
150 bool
151 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
152 {
153 tree *nxt, *idx;
154
155 for (; ; addr_p = nxt)
156 {
157 switch (TREE_CODE (*addr_p))
158 {
159 case SSA_NAME:
160 return cbck (*addr_p, addr_p, data);
161
162 case MISALIGNED_INDIRECT_REF:
163 case ALIGN_INDIRECT_REF:
164 case INDIRECT_REF:
165 nxt = &TREE_OPERAND (*addr_p, 0);
166 return cbck (*addr_p, nxt, data);
167
168 case BIT_FIELD_REF:
169 case VIEW_CONVERT_EXPR:
170 case ARRAY_RANGE_REF:
171 case REALPART_EXPR:
172 case IMAGPART_EXPR:
173 nxt = &TREE_OPERAND (*addr_p, 0);
174 break;
175
176 case COMPONENT_REF:
177 /* If the component has varying offset, it behaves like index
178 as well. */
179 idx = &TREE_OPERAND (*addr_p, 2);
180 if (*idx
181 && !cbck (*addr_p, idx, data))
182 return false;
183
184 nxt = &TREE_OPERAND (*addr_p, 0);
185 break;
186
187 case ARRAY_REF:
188 nxt = &TREE_OPERAND (*addr_p, 0);
189 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
190 return false;
191 break;
192
193 case VAR_DECL:
194 case PARM_DECL:
195 case STRING_CST:
196 case RESULT_DECL:
197 return true;
198
199 default:
200 gcc_unreachable ();
201 }
202 }
203 }
204
205 /* If it is possible to hoist the statement STMT unconditionally,
206 returns MOVE_POSSIBLE.
207 If it is possible to hoist the statement STMT, but we must avoid making
208 it executed if it would not be executed in the original program (e.g.
209 because it may trap), return MOVE_PRESERVE_EXECUTION.
210 Otherwise return MOVE_IMPOSSIBLE. */
211
212 enum move_pos
213 movement_possibility (tree stmt)
214 {
215 tree lhs, rhs;
216
217 if (flag_unswitch_loops
218 && TREE_CODE (stmt) == COND_EXPR)
219 {
220 /* If we perform unswitching, force the operands of the invariant
221 condition to be moved out of the loop. */
222 get_stmt_operands (stmt);
223
224 return MOVE_POSSIBLE;
225 }
226
227 if (TREE_CODE (stmt) != MODIFY_EXPR)
228 return MOVE_IMPOSSIBLE;
229
230 if (stmt_ends_bb_p (stmt))
231 return MOVE_IMPOSSIBLE;
232
233 get_stmt_operands (stmt);
234
235 if (stmt_ann (stmt)->has_volatile_ops)
236 return MOVE_IMPOSSIBLE;
237
238 lhs = TREE_OPERAND (stmt, 0);
239 if (TREE_CODE (lhs) == SSA_NAME
240 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
241 return MOVE_IMPOSSIBLE;
242
243 rhs = TREE_OPERAND (stmt, 1);
244
245 if (TREE_SIDE_EFFECTS (rhs))
246 return MOVE_IMPOSSIBLE;
247
248 if (TREE_CODE (lhs) != SSA_NAME
249 || tree_could_trap_p (rhs))
250 return MOVE_PRESERVE_EXECUTION;
251
252 if (get_call_expr_in (stmt))
253 {
254 /* While pure or const call is guaranteed to have no side effects, we
255 cannot move it arbitrarily. Consider code like
256
257 char *s = something ();
258
259 while (1)
260 {
261 if (s)
262 t = strlen (s);
263 else
264 t = 0;
265 }
266
267 Here the strlen call cannot be moved out of the loop, even though
268 s is invariant. In addition to possibly creating a call with
269 invalid arguments, moving out a function call that is not executed
270 may cause performance regressions in case the call is costly and
271 not executed at all. */
272 return MOVE_PRESERVE_EXECUTION;
273 }
274 return MOVE_POSSIBLE;
275 }
276
277 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
278 loop to that we could move the expression using DEF if it did not have
279 other operands, i.e. the outermost loop enclosing LOOP in that the value
280 of DEF is invariant. */
281
282 static struct loop *
283 outermost_invariant_loop (tree def, struct loop *loop)
284 {
285 tree def_stmt;
286 basic_block def_bb;
287 struct loop *max_loop;
288
289 if (TREE_CODE (def) != SSA_NAME)
290 return superloop_at_depth (loop, 1);
291
292 def_stmt = SSA_NAME_DEF_STMT (def);
293 def_bb = bb_for_stmt (def_stmt);
294 if (!def_bb)
295 return superloop_at_depth (loop, 1);
296
297 max_loop = find_common_loop (loop, def_bb->loop_father);
298
299 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
300 max_loop = find_common_loop (max_loop,
301 LIM_DATA (def_stmt)->max_loop->outer);
302 if (max_loop == loop)
303 return NULL;
304 max_loop = superloop_at_depth (loop, max_loop->depth + 1);
305
306 return max_loop;
307 }
308
309 /* Returns the outermost superloop of LOOP in that the expression EXPR is
310 invariant. */
311
312 static struct loop *
313 outermost_invariant_loop_expr (tree expr, struct loop *loop)
314 {
315 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
316 unsigned i, nops;
317 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
318
319 if (TREE_CODE (expr) == SSA_NAME
320 || TREE_CODE (expr) == INTEGER_CST
321 || is_gimple_min_invariant (expr))
322 return outermost_invariant_loop (expr, loop);
323
324 if (class != tcc_unary
325 && class != tcc_binary
326 && class != tcc_expression
327 && class != tcc_comparison)
328 return NULL;
329
330 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
331 for (i = 0; i < nops; i++)
332 {
333 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
334 if (!aloop)
335 return NULL;
336
337 if (flow_loop_nested_p (max_loop, aloop))
338 max_loop = aloop;
339 }
340
341 return max_loop;
342 }
343
344 /* DATA is a structure containing information associated with a statement
345 inside LOOP. DEF is one of the operands of this statement.
346
347 Find the outermost loop enclosing LOOP in that value of DEF is invariant
348 and record this in DATA->max_loop field. If DEF itself is defined inside
349 this loop as well (i.e. we need to hoist it out of the loop if we want
350 to hoist the statement represented by DATA), record the statement in that
351 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
352 add the cost of the computation of DEF to the DATA->cost.
353
354 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
355
356 static bool
357 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
358 bool add_cost)
359 {
360 tree def_stmt = SSA_NAME_DEF_STMT (def);
361 basic_block def_bb = bb_for_stmt (def_stmt);
362 struct loop *max_loop;
363 struct depend *dep;
364
365 if (!def_bb)
366 return true;
367
368 max_loop = outermost_invariant_loop (def, loop);
369 if (!max_loop)
370 return false;
371
372 if (flow_loop_nested_p (data->max_loop, max_loop))
373 data->max_loop = max_loop;
374
375 if (!LIM_DATA (def_stmt))
376 return true;
377
378 if (add_cost
379 /* Only add the cost if the statement defining DEF is inside LOOP,
380 i.e. if it is likely that by moving the invariants dependent
381 on it, we will be able to avoid creating a new register for
382 it (since it will be only used in these dependent invariants). */
383 && def_bb->loop_father == loop)
384 data->cost += LIM_DATA (def_stmt)->cost;
385
386 dep = xmalloc (sizeof (struct depend));
387 dep->stmt = def_stmt;
388 dep->next = data->depends;
389 data->depends = dep;
390
391 return true;
392 }
393
394 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
395 are just ad-hoc constants. The estimates should be based on target-specific
396 values. */
397
398 static unsigned
399 stmt_cost (tree stmt)
400 {
401 tree lhs, rhs;
402 unsigned cost = 1;
403
404 /* Always try to create possibilities for unswitching. */
405 if (TREE_CODE (stmt) == COND_EXPR)
406 return LIM_EXPENSIVE;
407
408 lhs = TREE_OPERAND (stmt, 0);
409 rhs = TREE_OPERAND (stmt, 1);
410
411 /* Hoisting memory references out should almost surely be a win. */
412 if (stmt_references_memory_p (stmt))
413 cost += 20;
414
415 switch (TREE_CODE (rhs))
416 {
417 case CALL_EXPR:
418 /* We should be hoisting calls if possible. */
419
420 /* Unless the call is a builtin_constant_p; this always folds to a
421 constant, so moving it is useless. */
422 rhs = get_callee_fndecl (rhs);
423 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
424 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
425 return 0;
426
427 cost += 20;
428 break;
429
430 case MULT_EXPR:
431 case TRUNC_DIV_EXPR:
432 case CEIL_DIV_EXPR:
433 case FLOOR_DIV_EXPR:
434 case ROUND_DIV_EXPR:
435 case EXACT_DIV_EXPR:
436 case CEIL_MOD_EXPR:
437 case FLOOR_MOD_EXPR:
438 case ROUND_MOD_EXPR:
439 case TRUNC_MOD_EXPR:
440 /* Division and multiplication are usually expensive. */
441 cost += 20;
442 break;
443
444 default:
445 break;
446 }
447
448 return cost;
449 }
450
451 /* Determine the outermost loop to that it is possible to hoist a statement
452 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
453 the outermost loop in that the value computed by STMT is invariant.
454 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
455 we preserve the fact whether STMT is executed. It also fills other related
456 information to LIM_DATA (STMT).
457
458 The function returns false if STMT cannot be hoisted outside of the loop it
459 is defined in, and true otherwise. */
460
461 static bool
462 determine_max_movement (tree stmt, bool must_preserve_exec)
463 {
464 basic_block bb = bb_for_stmt (stmt);
465 struct loop *loop = bb->loop_father;
466 struct loop *level;
467 struct lim_aux_data *lim_data = LIM_DATA (stmt);
468 tree val;
469 ssa_op_iter iter;
470
471 if (must_preserve_exec)
472 level = ALWAYS_EXECUTED_IN (bb);
473 else
474 level = superloop_at_depth (loop, 1);
475 lim_data->max_loop = level;
476
477 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
478 if (!add_dependency (val, lim_data, loop, true))
479 return false;
480
481 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
482 if (!add_dependency (val, lim_data, loop, false))
483 return false;
484
485 lim_data->cost += stmt_cost (stmt);
486
487 return true;
488 }
489
490 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
491 and that one of the operands of this statement is computed by STMT.
492 Ensure that STMT (together with all the statements that define its
493 operands) is hoisted at least out of the loop LEVEL. */
494
495 static void
496 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
497 {
498 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
499 struct depend *dep;
500
501 stmt_loop = find_common_loop (orig_loop, stmt_loop);
502 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
503 stmt_loop = find_common_loop (stmt_loop,
504 LIM_DATA (stmt)->tgt_loop->outer);
505 if (flow_loop_nested_p (stmt_loop, level))
506 return;
507
508 gcc_assert (LIM_DATA (stmt));
509 gcc_assert (level == LIM_DATA (stmt)->max_loop
510 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
511
512 LIM_DATA (stmt)->tgt_loop = level;
513 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
514 set_level (dep->stmt, orig_loop, level);
515 }
516
517 /* Determines an outermost loop from that we want to hoist the statement STMT.
518 For now we chose the outermost possible loop. TODO -- use profiling
519 information to set it more sanely. */
520
521 static void
522 set_profitable_level (tree stmt)
523 {
524 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
525 }
526
527 /* Returns true if STMT is not a pure call. */
528
529 static bool
530 nonpure_call_p (tree stmt)
531 {
532 tree call = get_call_expr_in (stmt);
533
534 if (!call)
535 return false;
536
537 return TREE_SIDE_EFFECTS (call) != 0;
538 }
539
540 /* Releases the memory occupied by DATA. */
541
542 static void
543 free_lim_aux_data (struct lim_aux_data *data)
544 {
545 struct depend *dep, *next;
546
547 for (dep = data->depends; dep; dep = next)
548 {
549 next = dep->next;
550 free (dep);
551 }
552 free (data);
553 }
554
555 /* Determine the outermost loops in that statements in basic block BB are
556 invariant, and record them to the LIM_DATA associated with the statements.
557 Callback for walk_dominator_tree. */
558
559 static void
560 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
561 basic_block bb)
562 {
563 enum move_pos pos;
564 block_stmt_iterator bsi;
565 tree stmt;
566 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
567 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
568
569 if (!bb->loop_father->outer)
570 return;
571
572 if (dump_file && (dump_flags & TDF_DETAILS))
573 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
574 bb->index, bb->loop_father->num, bb->loop_father->depth);
575
576 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
577 {
578 stmt = bsi_stmt (bsi);
579
580 pos = movement_possibility (stmt);
581 if (pos == MOVE_IMPOSSIBLE)
582 {
583 if (nonpure_call_p (stmt))
584 {
585 maybe_never = true;
586 outermost = NULL;
587 }
588 continue;
589 }
590
591 stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
592 LIM_DATA (stmt)->always_executed_in = outermost;
593
594 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
595 continue;
596
597 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
598 {
599 LIM_DATA (stmt)->max_loop = NULL;
600 continue;
601 }
602
603 if (dump_file && (dump_flags & TDF_DETAILS))
604 {
605 print_generic_stmt_indented (dump_file, stmt, 0, 2);
606 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
607 LIM_DATA (stmt)->max_loop->depth,
608 LIM_DATA (stmt)->cost);
609 }
610
611 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
612 set_profitable_level (stmt);
613 }
614 }
615
616 /* For each statement determines the outermost loop in that it is invariant,
617 statements on whose motion it depends and the cost of the computation.
618 This information is stored to the LIM_DATA structure associated with
619 each statement. */
620
621 static void
622 determine_invariantness (void)
623 {
624 struct dom_walk_data walk_data;
625
626 memset (&walk_data, 0, sizeof (struct dom_walk_data));
627 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
628
629 init_walk_dominator_tree (&walk_data);
630 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
631 fini_walk_dominator_tree (&walk_data);
632 }
633
634 /* Commits edge insertions and updates loop structures. */
635
636 void
637 loop_commit_inserts (void)
638 {
639 unsigned old_last_basic_block, i;
640 basic_block bb;
641
642 old_last_basic_block = last_basic_block;
643 bsi_commit_edge_inserts ();
644 for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
645 {
646 bb = BASIC_BLOCK (i);
647 add_bb_to_loop (bb,
648 find_common_loop (EDGE_SUCC (bb, 0)->dest->loop_father,
649 EDGE_PRED (bb, 0)->src->loop_father));
650 }
651 }
652
653 /* Hoist the statements in basic block BB out of the loops prescribed by
654 data stored in LIM_DATA structures associated with each statement. Callback
655 for walk_dominator_tree. */
656
657 static void
658 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
659 basic_block bb)
660 {
661 struct loop *level;
662 block_stmt_iterator bsi;
663 tree stmt;
664 unsigned cost = 0;
665
666 if (!bb->loop_father->outer)
667 return;
668
669 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
670 {
671 stmt = bsi_stmt (bsi);
672
673 if (!LIM_DATA (stmt))
674 {
675 bsi_next (&bsi);
676 continue;
677 }
678
679 cost = LIM_DATA (stmt)->cost;
680 level = LIM_DATA (stmt)->tgt_loop;
681 free_lim_aux_data (LIM_DATA (stmt));
682 stmt_ann (stmt)->common.aux = NULL;
683
684 if (!level)
685 {
686 bsi_next (&bsi);
687 continue;
688 }
689
690 /* We do not really want to move conditionals out of the loop; we just
691 placed it here to force its operands to be moved if necessary. */
692 if (TREE_CODE (stmt) == COND_EXPR)
693 continue;
694
695 if (dump_file && (dump_flags & TDF_DETAILS))
696 {
697 fprintf (dump_file, "Moving statement\n");
698 print_generic_stmt (dump_file, stmt, 0);
699 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
700 cost, level->num);
701 }
702 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
703 bsi_remove (&bsi);
704 }
705 }
706
707 /* Hoist the statements out of the loops prescribed by data stored in
708 LIM_DATA structures associated with each statement.*/
709
710 static void
711 move_computations (void)
712 {
713 struct dom_walk_data walk_data;
714
715 memset (&walk_data, 0, sizeof (struct dom_walk_data));
716 walk_data.before_dom_children_before_stmts = move_computations_stmt;
717
718 init_walk_dominator_tree (&walk_data);
719 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
720 fini_walk_dominator_tree (&walk_data);
721
722 loop_commit_inserts ();
723 rewrite_into_ssa (false);
724 if (!bitmap_empty_p (vars_to_rename))
725 {
726 /* The rewrite of ssa names may cause violation of loop closed ssa
727 form invariants. TODO -- avoid these rewrites completely.
728 Information in virtual phi nodes is sufficient for it. */
729 rewrite_into_loop_closed_ssa ();
730 }
731 bitmap_clear (vars_to_rename);
732 }
733
734 /* Checks whether the statement defining variable *INDEX can be hoisted
735 out of the loop passed in DATA. Callback for for_each_index. */
736
737 static bool
738 may_move_till (tree ref, tree *index, void *data)
739 {
740 struct loop *loop = data, *max_loop;
741
742 /* If REF is an array reference, check also that the step and the lower
743 bound is invariant in LOOP. */
744 if (TREE_CODE (ref) == ARRAY_REF)
745 {
746 tree step = array_ref_element_size (ref);
747 tree lbound = array_ref_low_bound (ref);
748
749 max_loop = outermost_invariant_loop_expr (step, loop);
750 if (!max_loop)
751 return false;
752
753 max_loop = outermost_invariant_loop_expr (lbound, loop);
754 if (!max_loop)
755 return false;
756 }
757
758 max_loop = outermost_invariant_loop (*index, loop);
759 if (!max_loop)
760 return false;
761
762 return true;
763 }
764
765 /* Forces statements defining (invariant) SSA names in expression EXPR to be
766 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
767
768 static void
769 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
770 {
771 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
772 unsigned i, nops;
773
774 if (TREE_CODE (expr) == SSA_NAME)
775 {
776 tree stmt = SSA_NAME_DEF_STMT (expr);
777 if (IS_EMPTY_STMT (stmt))
778 return;
779
780 set_level (stmt, orig_loop, loop);
781 return;
782 }
783
784 if (class != tcc_unary
785 && class != tcc_binary
786 && class != tcc_expression
787 && class != tcc_comparison)
788 return;
789
790 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
791 for (i = 0; i < nops; i++)
792 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
793 }
794
795 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
796 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
797 for_each_index. */
798
799 struct fmt_data
800 {
801 struct loop *loop;
802 struct loop *orig_loop;
803 };
804
805 static bool
806 force_move_till (tree ref, tree *index, void *data)
807 {
808 tree stmt;
809 struct fmt_data *fmt_data = data;
810
811 if (TREE_CODE (ref) == ARRAY_REF)
812 {
813 tree step = array_ref_element_size (ref);
814 tree lbound = array_ref_low_bound (ref);
815
816 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
817 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
818 }
819
820 if (TREE_CODE (*index) != SSA_NAME)
821 return true;
822
823 stmt = SSA_NAME_DEF_STMT (*index);
824 if (IS_EMPTY_STMT (stmt))
825 return true;
826
827 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
828
829 return true;
830 }
831
832 /* Records memory reference *REF (that occurs in statement STMT)
833 to the list MEM_REFS. */
834
835 static void
836 record_mem_ref (struct mem_ref **mem_refs, tree stmt, tree *ref)
837 {
838 struct mem_ref *aref = xmalloc (sizeof (struct mem_ref));
839
840 aref->stmt = stmt;
841 aref->ref = ref;
842
843 aref->next = *mem_refs;
844 *mem_refs = aref;
845 }
846
847 /* Releases list of memory references MEM_REFS. */
848
849 static void
850 free_mem_refs (struct mem_ref *mem_refs)
851 {
852 struct mem_ref *act;
853
854 while (mem_refs)
855 {
856 act = mem_refs;
857 mem_refs = mem_refs->next;
858 free (act);
859 }
860 }
861
862 /* If VAR is defined in LOOP and the statement it is defined in does not belong
863 to the set SEEN, add the statement to QUEUE of length IN_QUEUE and
864 to the set SEEN. */
865
866 static void
867 maybe_queue_var (tree var, struct loop *loop,
868 sbitmap seen, tree *queue, unsigned *in_queue)
869 {
870 tree stmt = SSA_NAME_DEF_STMT (var);
871 basic_block def_bb = bb_for_stmt (stmt);
872
873 if (!def_bb
874 || !flow_bb_inside_loop_p (loop, def_bb)
875 || TEST_BIT (seen, get_stmt_uid (stmt)))
876 return;
877
878 SET_BIT (seen, get_stmt_uid (stmt));
879 queue[(*in_queue)++] = stmt;
880 }
881
882 /* If COMMON_REF is NULL, set COMMON_REF to *OP and return true.
883 Otherwise return true if the memory reference *OP is equal to COMMON_REF.
884 Record the reference OP to list MEM_REFS. STMT is the statement in that
885 the reference occurs. */
886
887 struct sra_data
888 {
889 struct mem_ref **mem_refs;
890 tree common_ref;
891 tree stmt;
892 };
893
894 static bool
895 fem_single_reachable_address (tree *op, void *data)
896 {
897 struct sra_data *sra_data = data;
898
899 if (sra_data->common_ref
900 && !operand_equal_p (*op, sra_data->common_ref, 0))
901 return false;
902 sra_data->common_ref = *op;
903
904 record_mem_ref (sra_data->mem_refs, sra_data->stmt, op);
905 return true;
906 }
907
908 /* Runs CALLBACK for each operand of STMT that is a memory reference. DATA
909 is passed to the CALLBACK as well. The traversal stops if CALLBACK
910 returns false, for_each_memref then returns false as well. Otherwise
911 for_each_memref returns true. */
912
913 static bool
914 for_each_memref (tree stmt, bool (*callback)(tree *, void *), void *data)
915 {
916 tree *op;
917
918 if (TREE_CODE (stmt) == RETURN_EXPR)
919 stmt = TREE_OPERAND (stmt, 1);
920
921 if (TREE_CODE (stmt) == MODIFY_EXPR)
922 {
923 op = &TREE_OPERAND (stmt, 0);
924 if (TREE_CODE (*op) != SSA_NAME
925 && !callback (op, data))
926 return false;
927
928 op = &TREE_OPERAND (stmt, 1);
929 if (TREE_CODE (*op) != SSA_NAME
930 && is_gimple_lvalue (*op)
931 && !callback (op, data))
932 return false;
933
934 stmt = TREE_OPERAND (stmt, 1);
935 }
936
937 if (TREE_CODE (stmt) == WITH_SIZE_EXPR)
938 stmt = TREE_OPERAND (stmt, 0);
939
940 if (TREE_CODE (stmt) == CALL_EXPR)
941 {
942 tree args;
943
944 for (args = TREE_OPERAND (stmt, 1); args; args = TREE_CHAIN (args))
945 {
946 op = &TREE_VALUE (args);
947
948 if (TREE_CODE (*op) != SSA_NAME
949 && is_gimple_lvalue (*op)
950 && !callback (op, data))
951 return false;
952 }
953 }
954
955 return true;
956 }
957
958 /* Determine whether all memory references inside the LOOP that correspond
959 to virtual ssa names defined in statement STMT are equal.
960 If so, store the list of the references to MEM_REFS, and return one
961 of them. Otherwise store NULL to MEM_REFS and return NULL_TREE.
962 *SEEN_CALL_STMT is set to true if the virtual operands suggest
963 that the reference might be clobbered by a call inside the LOOP. */
964
965 static tree
966 single_reachable_address (struct loop *loop, tree stmt,
967 struct mem_ref **mem_refs,
968 bool *seen_call_stmt)
969 {
970 unsigned max_uid = max_stmt_uid + num_ssa_names;
971 tree *queue = xmalloc (sizeof (tree) * max_uid);
972 sbitmap seen = sbitmap_alloc (max_uid);
973 unsigned in_queue = 1;
974 dataflow_t df;
975 unsigned i, n;
976 struct sra_data sra_data;
977 tree call;
978 tree val;
979 ssa_op_iter iter;
980
981 sbitmap_zero (seen);
982
983 *mem_refs = NULL;
984 sra_data.mem_refs = mem_refs;
985 sra_data.common_ref = NULL_TREE;
986
987 queue[0] = stmt;
988 SET_BIT (seen, get_stmt_uid (stmt));
989 *seen_call_stmt = false;
990
991 while (in_queue)
992 {
993 stmt = queue[--in_queue];
994 sra_data.stmt = stmt;
995
996 if (LIM_DATA (stmt)
997 && LIM_DATA (stmt)->sm_done)
998 goto fail;
999
1000 switch (TREE_CODE (stmt))
1001 {
1002 case MODIFY_EXPR:
1003 case CALL_EXPR:
1004 case RETURN_EXPR:
1005 if (!for_each_memref (stmt, fem_single_reachable_address,
1006 &sra_data))
1007 goto fail;
1008
1009 /* If this is a function that may depend on the memory location,
1010 record the fact. We cannot directly refuse call clobbered
1011 operands here, since sra_data.common_ref did not have
1012 to be set yet. */
1013 call = get_call_expr_in (stmt);
1014 if (call
1015 && !(call_expr_flags (call) & ECF_CONST))
1016 *seen_call_stmt = true;
1017
1018 /* Traverse also definitions of the VUSES (there may be other
1019 distinct from the one we used to get to this statement). */
1020 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES)
1021 maybe_queue_var (val, loop, seen, queue, &in_queue);
1022
1023 break;
1024
1025 case PHI_NODE:
1026 for (i = 0; i < (unsigned) PHI_NUM_ARGS (stmt); i++)
1027 if (TREE_CODE (PHI_ARG_DEF (stmt, i)) == SSA_NAME)
1028 maybe_queue_var (PHI_ARG_DEF (stmt, i), loop,
1029 seen, queue, &in_queue);
1030 break;
1031
1032 default:
1033 goto fail;
1034 }
1035
1036 /* Find uses of virtual names. */
1037 df = get_immediate_uses (stmt);
1038 n = num_immediate_uses (df);
1039
1040 for (i = 0; i < n; i++)
1041 {
1042 stmt = immediate_use (df, i);
1043
1044 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (stmt)))
1045 continue;
1046
1047 if (TEST_BIT (seen, get_stmt_uid (stmt)))
1048 continue;
1049 SET_BIT (seen, get_stmt_uid (stmt));
1050
1051 queue[in_queue++] = stmt;
1052 }
1053 }
1054
1055 free (queue);
1056 sbitmap_free (seen);
1057
1058 return sra_data.common_ref;
1059
1060 fail:
1061 free_mem_refs (*mem_refs);
1062 *mem_refs = NULL;
1063 free (queue);
1064 sbitmap_free (seen);
1065
1066 return NULL;
1067 }
1068
1069 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
1070
1071 static void
1072 rewrite_mem_refs (tree tmp_var, struct mem_ref *mem_refs)
1073 {
1074 tree var;
1075 ssa_op_iter iter;
1076
1077 for (; mem_refs; mem_refs = mem_refs->next)
1078 {
1079 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
1080 {
1081 var = SSA_NAME_VAR (var);
1082 bitmap_set_bit (vars_to_rename, var_ann (var)->uid);
1083 }
1084
1085 *mem_refs->ref = tmp_var;
1086 modify_stmt (mem_refs->stmt);
1087 }
1088 }
1089
1090 /* Records request for store motion of memory reference REF from LOOP.
1091 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
1092 these references are rewritten by a new temporary variable.
1093 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
1094 The initialization of the temporary variable is put to the preheader
1095 of the loop, and assignments to the reference from the temporary variable
1096 are emitted to exits. */
1097
1098 static void
1099 schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
1100 struct mem_ref *mem_refs)
1101 {
1102 struct mem_ref *aref;
1103 tree tmp_var;
1104 unsigned i;
1105 tree load, store;
1106 struct fmt_data fmt_data;
1107
1108 if (dump_file && (dump_flags & TDF_DETAILS))
1109 {
1110 fprintf (dump_file, "Executing store motion of ");
1111 print_generic_expr (dump_file, ref, 0);
1112 fprintf (dump_file, " from loop %d\n", loop->num);
1113 }
1114
1115 tmp_var = make_rename_temp (TREE_TYPE (ref), "lsm_tmp");
1116
1117 fmt_data.loop = loop;
1118 fmt_data.orig_loop = loop;
1119 for_each_index (&ref, force_move_till, &fmt_data);
1120
1121 rewrite_mem_refs (tmp_var, mem_refs);
1122 for (aref = mem_refs; aref; aref = aref->next)
1123 if (LIM_DATA (aref->stmt))
1124 LIM_DATA (aref->stmt)->sm_done = true;
1125
1126 /* Emit the load & stores. */
1127 load = build (MODIFY_EXPR, void_type_node, tmp_var, ref);
1128 get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
1129 LIM_DATA (load)->max_loop = loop;
1130 LIM_DATA (load)->tgt_loop = loop;
1131
1132 /* Put this into the latch, so that we are sure it will be processed after
1133 all dependencies. */
1134 bsi_insert_on_edge (loop_latch_edge (loop), load);
1135
1136 for (i = 0; i < n_exits; i++)
1137 {
1138 store = build (MODIFY_EXPR, void_type_node,
1139 unshare_expr (ref), tmp_var);
1140 bsi_insert_on_edge (exits[i], store);
1141 }
1142 }
1143
1144 /* Returns true if REF may be clobbered by calls. */
1145
1146 static bool
1147 is_call_clobbered_ref (tree ref)
1148 {
1149 tree base;
1150
1151 base = get_base_address (ref);
1152 if (!base)
1153 return true;
1154
1155 if (DECL_P (base))
1156 return is_call_clobbered (base);
1157
1158 if (INDIRECT_REF_P (base))
1159 {
1160 /* Check whether the alias tags associated with the pointer
1161 are call clobbered. */
1162 tree ptr = TREE_OPERAND (base, 0);
1163 struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr);
1164 tree nmt = (pi) ? pi->name_mem_tag : NULL_TREE;
1165 tree tmt = var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
1166
1167 if ((nmt && is_call_clobbered (nmt))
1168 || (tmt && is_call_clobbered (tmt)))
1169 return true;
1170
1171 return false;
1172 }
1173
1174 gcc_unreachable ();
1175 }
1176
1177 /* Determine whether all memory references inside LOOP corresponding to the
1178 virtual ssa name REG are equal to each other, and whether the address of
1179 this common reference can be hoisted outside of the loop. If this is true,
1180 prepare the statements that load the value of the memory reference to a
1181 temporary variable in the loop preheader, store it back on the loop exits,
1182 and replace all the references inside LOOP by this temporary variable.
1183 LOOP has N_EXITS stored in EXITS. */
1184
1185 static void
1186 determine_lsm_reg (struct loop *loop, edge *exits, unsigned n_exits, tree reg)
1187 {
1188 tree ref;
1189 struct mem_ref *mem_refs, *aref;
1190 struct loop *must_exec;
1191 bool sees_call;
1192
1193 if (is_gimple_reg (reg))
1194 return;
1195
1196 ref = single_reachable_address (loop, SSA_NAME_DEF_STMT (reg), &mem_refs,
1197 &sees_call);
1198 if (!ref)
1199 return;
1200
1201 /* If we cannot create a ssa name for the result, give up. */
1202 if (!is_gimple_reg_type (TREE_TYPE (ref))
1203 || TREE_THIS_VOLATILE (ref))
1204 goto fail;
1205
1206 /* If there is a call that may use the location, give up as well. */
1207 if (sees_call
1208 && is_call_clobbered_ref (ref))
1209 goto fail;
1210
1211 if (!for_each_index (&ref, may_move_till, loop))
1212 goto fail;
1213
1214 if (tree_could_trap_p (ref))
1215 {
1216 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1217 of the statements in that it occurs is always executed when the loop
1218 is entered. This way we know that by moving the load from the
1219 reference out of the loop we will not cause the error that would not
1220 occur otherwise.
1221
1222 TODO -- in fact we would like to check for anticipability of the
1223 reference, i.e. that on each path from loop entry to loop exit at
1224 least one of the statements containing the memory reference is
1225 executed. */
1226
1227 for (aref = mem_refs; aref; aref = aref->next)
1228 {
1229 if (!LIM_DATA (aref->stmt))
1230 continue;
1231
1232 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1233 if (!must_exec)
1234 continue;
1235
1236 if (must_exec == loop
1237 || flow_loop_nested_p (must_exec, loop))
1238 break;
1239 }
1240
1241 if (!aref)
1242 goto fail;
1243 }
1244
1245 schedule_sm (loop, exits, n_exits, ref, mem_refs);
1246
1247 fail: ;
1248 free_mem_refs (mem_refs);
1249 }
1250
1251 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1252 for a store motion optimization (i.e. whether we can insert statement
1253 on its exits). */
1254
1255 static bool
1256 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1257 unsigned n_exits)
1258 {
1259 unsigned i;
1260
1261 for (i = 0; i < n_exits; i++)
1262 if (exits[i]->flags & EDGE_ABNORMAL)
1263 return false;
1264
1265 return true;
1266 }
1267
1268 /* Try to perform store motion for all memory references modified inside
1269 LOOP. */
1270
1271 static void
1272 determine_lsm_loop (struct loop *loop)
1273 {
1274 tree phi;
1275 unsigned n_exits;
1276 edge *exits = get_loop_exit_edges (loop, &n_exits);
1277
1278 if (!loop_suitable_for_sm (loop, exits, n_exits))
1279 {
1280 free (exits);
1281 return;
1282 }
1283
1284 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1285 determine_lsm_reg (loop, exits, n_exits, PHI_RESULT (phi));
1286
1287 free (exits);
1288 }
1289
1290 /* Try to perform store motion for all memory references modified inside
1291 any of LOOPS. */
1292
1293 static void
1294 determine_lsm (struct loops *loops)
1295 {
1296 struct loop *loop;
1297 basic_block bb;
1298
1299 if (!loops->tree_root->inner)
1300 return;
1301
1302 /* Create a UID for each statement in the function. Ordering of the
1303 UIDs is not important for this pass. */
1304 max_stmt_uid = 0;
1305 FOR_EACH_BB (bb)
1306 {
1307 block_stmt_iterator bsi;
1308
1309 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1310 stmt_ann (bsi_stmt (bsi))->uid = max_stmt_uid++;
1311 }
1312
1313 compute_immediate_uses (TDFA_USE_VOPS, NULL);
1314
1315 /* Pass the loops from the outermost. For each virtual operand loop phi node
1316 check whether all the references inside the loop correspond to a single
1317 address, and if so, move them. */
1318
1319 loop = loops->tree_root->inner;
1320 while (1)
1321 {
1322 determine_lsm_loop (loop);
1323
1324 if (loop->inner)
1325 {
1326 loop = loop->inner;
1327 continue;
1328 }
1329 while (!loop->next)
1330 {
1331 loop = loop->outer;
1332 if (loop == loops->tree_root)
1333 {
1334 free_df ();
1335 loop_commit_inserts ();
1336 return;
1337 }
1338 }
1339 loop = loop->next;
1340 }
1341 }
1342
1343 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1344 for each such basic block bb records the outermost loop for that execution
1345 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1346 blocks that contain a nonpure call. */
1347
1348 static void
1349 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1350 {
1351 basic_block bb = NULL, *bbs, last = NULL;
1352 unsigned i;
1353 edge e;
1354 struct loop *inn_loop = loop;
1355
1356 if (!loop->header->aux)
1357 {
1358 bbs = get_loop_body_in_dom_order (loop);
1359
1360 for (i = 0; i < loop->num_nodes; i++)
1361 {
1362 edge_iterator ei;
1363 bb = bbs[i];
1364
1365 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1366 last = bb;
1367
1368 if (TEST_BIT (contains_call, bb->index))
1369 break;
1370
1371 FOR_EACH_EDGE (e, ei, bb->succs)
1372 if (!flow_bb_inside_loop_p (loop, e->dest))
1373 break;
1374 if (e)
1375 break;
1376
1377 /* A loop might be infinite (TODO use simple loop analysis
1378 to disprove this if possible). */
1379 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1380 break;
1381
1382 if (!flow_bb_inside_loop_p (inn_loop, bb))
1383 break;
1384
1385 if (bb->loop_father->header == bb)
1386 {
1387 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1388 break;
1389
1390 /* In a loop that is always entered we may proceed anyway.
1391 But record that we entered it and stop once we leave it. */
1392 inn_loop = bb->loop_father;
1393 }
1394 }
1395
1396 while (1)
1397 {
1398 last->aux = loop;
1399 if (last == loop->header)
1400 break;
1401 last = get_immediate_dominator (CDI_DOMINATORS, last);
1402 }
1403
1404 free (bbs);
1405 }
1406
1407 for (loop = loop->inner; loop; loop = loop->next)
1408 fill_always_executed_in (loop, contains_call);
1409 }
1410
1411 /* Compute the global information needed by the loop invariant motion pass.
1412 LOOPS is the loop tree. */
1413
1414 static void
1415 tree_ssa_lim_initialize (struct loops *loops)
1416 {
1417 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1418 block_stmt_iterator bsi;
1419 struct loop *loop;
1420 basic_block bb;
1421
1422 sbitmap_zero (contains_call);
1423 FOR_EACH_BB (bb)
1424 {
1425 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1426 {
1427 if (nonpure_call_p (bsi_stmt (bsi)))
1428 break;
1429 }
1430
1431 if (!bsi_end_p (bsi))
1432 SET_BIT (contains_call, bb->index);
1433 }
1434
1435 for (loop = loops->tree_root->inner; loop; loop = loop->next)
1436 fill_always_executed_in (loop, contains_call);
1437
1438 sbitmap_free (contains_call);
1439 }
1440
1441 /* Cleans up after the invariant motion pass. */
1442
1443 static void
1444 tree_ssa_lim_finalize (void)
1445 {
1446 basic_block bb;
1447
1448 FOR_EACH_BB (bb)
1449 {
1450 bb->aux = NULL;
1451 }
1452 }
1453
1454 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1455 i.e. those that are likely to be win regardless of the register pressure. */
1456
1457 void
1458 tree_ssa_lim (struct loops *loops)
1459 {
1460 tree_ssa_lim_initialize (loops);
1461
1462 /* For each statement determine the outermost loop in that it is
1463 invariant and cost for computing the invariant. */
1464 determine_invariantness ();
1465
1466 /* For each memory reference determine whether it is possible to hoist it
1467 out of the loop. Force the necessary invariants to be moved out of the
1468 loops as well. */
1469 determine_lsm (loops);
1470
1471 /* Move the expressions that are expensive enough. */
1472 move_computations ();
1473
1474 tree_ssa_lim_finalize ();
1475 }